1 # Make the VFS call down into the FS on flock calls.
2 diff -urN -p linux-2.6.7/fs/locks.c linux/fs/locks.c
3 --- linux-2.6.7/fs/locks.c 2004-06-16 12:00:44.567463632 -0500
4 +++ linux/fs/locks.c 2004-06-16 12:01:58.844205936 -0500
5 @@ -1294,6 +1294,27 @@ out_unlock:
10 + * Wrapper function around the file_operations lock routine when called for
11 + * flock(). The lock routine is called for both fcntl() and flock(), so
12 + * the flock parameters must be translated to an equivalent fcntl()-like
15 + * Don't use locks_alloc_lock() (or flock_make_lock()) here, as
16 + * this is just a temporary lock structure. We especially don't
17 + * want to fail because we couldn't allocate a lock structure if
18 + * this is an unlock operation.
20 +int flock_fs_file(struct file *filp, int type, int wait)
22 + struct file_lock fl = { .fl_flags = FL_FLOCK,
25 + return filp->f_op->lock(filp,
26 + (wait) ? F_SETLKW : F_SETLK,
31 * sys_flock: - flock() system call.
32 * @fd: the file descriptor to lock.
33 @@ -1342,6 +1363,50 @@ asmlinkage long sys_flock(unsigned int f
38 + * Execute any filesystem-specific flock routines. The filesystem may
39 + * maintain supplemental locks. This code allows the supplemental locks
40 + * to be kept in sync with the vfs flock lock. If flock() is called on
41 + * a lock already held for the given filp, the current flock lock is
42 + * dropped before obtaining the requested lock. This unlock operation
43 + * must be completed for the any filesystem specific locks and the vfs
44 + * flock lock before proceeding with obtaining the requested lock. When
45 + * the filesystem routine drops a lock for such a request, it must
46 + * return -EDEADLK, allowing the vfs lock to be dropped, and the
47 + * filesystem code is then re-executed to obtain the lock.
49 + * A non-blocking request that returns EWOULDBLOCK also causes any vfs
50 + * flock lock to be released, but then returns the error to the caller.
52 + if (filp->f_op && filp->f_op->lock) {
54 + error = flock_fs_file(filp, lock->fl_type, can_sleep);
57 + * We may have dropped a lock. We need to
58 + * finish unlocking before returning or
59 + * continuing with lock acquisition.
61 + if (error != -ENOLCK)
62 + flock_lock_file(filp, &(struct file_lock){.fl_type = F_UNLCK});
65 + * We already held the lock in some mode, and
66 + * had to drop filesystem-specific locks before
67 + * proceeding. We come back through this
68 + * routine to unlock the vfs flock lock. Now go
69 + * back and try again. Using EAGAIN as the
70 + * error here would be better, but the one valid
71 + * error value defined for flock(), EWOULDBLOCK,
72 + * is defined as EAGAIN.
74 + if (error == -EDEADLK)
82 error = flock_lock_file(filp, lock);
83 if ((error != -EAGAIN) || !can_sleep)
84 @@ -1354,6 +1419,13 @@ asmlinkage long sys_flock(unsigned int f
89 + * If we failed to get the vfs flock, we need to clean up any
90 + * filesystem-specific lock state that we previously obtained.
92 + if (error && filp->f_op && filp->f_op->lock)
93 + flock_fs_file(filp, F_UNLCK, 1);
96 if (list_empty(&lock->fl_link)) {
97 locks_free_lock(lock);
98 @@ -1714,6 +1786,8 @@ void locks_remove_flock(struct file *fil
99 if (fl->fl_file == filp) {
101 locks_delete_lock(before);
102 + if (filp->f_op && filp->f_op->lock)
103 + flock_fs_file(filp, F_UNLCK, 1);
107 # Add lock harness to the build system.
108 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
109 --- linux-2.6.7/fs/Kconfig 2004-06-16 12:00:44.558465722 -0500
110 +++ linux/fs/Kconfig 2004-06-16 12:02:02.401379449 -0500
111 @@ -1669,6 +1669,14 @@ config AFS_FS
116 + tristate "GFS Lock Harness"
118 + The module that connects GFS to the modules that provide
121 + If you want to use GFS (a cluster filesystem) say Y here.
125 menu "Partition Types"
126 diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile
127 --- linux-2.6.7/fs/Makefile 2004-06-16 12:00:44.558465722 -0500
128 +++ linux/fs/Makefile 2004-06-16 12:02:02.402379216 -0500
129 @@ -91,3 +91,4 @@ obj-$(CONFIG_JFS_FS) += jfs/
130 obj-$(CONFIG_XFS_FS) += xfs/
131 obj-$(CONFIG_AFS_FS) += afs/
132 obj-$(CONFIG_BEFS_FS) += befs/
133 +obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/
134 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
135 --- linux-2.6.7/fs/gfs_locking/Makefile 1969-12-31 18:00:00.000000000 -0600
136 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:02.402379216 -0500
138 +###############################################################################
139 +###############################################################################
141 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
143 +## This copyrighted material is made available to anyone wishing to use,
144 +## modify, copy, or redistribute it subject to the terms and conditions
145 +## of the GNU General Public License v.2.
147 +###############################################################################
148 +###############################################################################
150 +obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
152 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_harness/Makefile linux/fs/gfs_locking/lock_harness/Makefile
153 --- linux-2.6.7/fs/gfs_locking/lock_harness/Makefile 1969-12-31 18:00:00.000000000 -0600
154 +++ linux/fs/gfs_locking/lock_harness/Makefile 2004-06-16 12:02:02.402379216 -0500
156 +###############################################################################
157 +###############################################################################
159 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
161 +## This copyrighted material is made available to anyone wishing to use,
162 +## modify, copy, or redistribute it subject to the terms and conditions
163 +## of the GNU General Public License v.2.
165 +###############################################################################
166 +###############################################################################
168 +obj-$(CONFIG_LOCK_HARNESS) += lock_harness.o
170 +lock_harness-y := main.o
172 # Add GFS to the build system.
173 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
174 --- linux-2.6.7/fs/Kconfig 2004-06-25 13:57:24.435829621 -0500
175 +++ linux/fs/Kconfig 2004-06-25 13:59:16.786347614 -0500
176 @@ -316,13 +316,13 @@ config JFS_STATISTICS
177 to be made available to the user in the /proc/fs/jfs/ directory.
180 -# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs)
181 +# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/GFS)
183 # NOTE: you can implement Posix ACLs without these helpers (XFS does).
184 # Never use this symbol for ifdefs.
187 - depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL
188 + depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || GFS_FS
192 @@ -1677,6 +1677,20 @@ config LOCK_HARNESS
194 If you want to use GFS (a cluster filesystem) say Y here.
197 + tristate "GFS file system support"
198 + depends on LOCK_HARNESS
200 + A cluster filesystem.
202 + Allows a cluster of computers to simultaneously use a block device
203 + that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
204 + and writes to the block device like a local filesystem, but also uses
205 + a lock module to allow the computers coordinate their I/O so
206 + filesystem consistency is maintained. One of the nifty features of
207 + GFS is perfect consistency -- changes made to the filesystem on one
208 + machine show up immediately on all other machines in the cluster.
212 menu "Partition Types"
213 diff -urN -p linux-2.6.7/fs/Makefile linux/fs/Makefile
214 --- linux-2.6.7/fs/Makefile 2004-06-25 13:57:24.436829391 -0500
215 +++ linux/fs/Makefile 2004-06-25 13:57:24.447826863 -0500
216 @@ -92,3 +92,4 @@ obj-$(CONFIG_XFS_FS) += xfs/
217 obj-$(CONFIG_AFS_FS) += afs/
218 obj-$(CONFIG_BEFS_FS) += befs/
219 obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/
220 +obj-$(CONFIG_GFS_FS) += gfs/
221 diff -urN -p linux-2.6.7/fs/gfs/Makefile linux/fs/gfs/Makefile
222 --- linux-2.6.7/fs/gfs/Makefile 1969-12-31 18:00:00.000000000 -0600
223 +++ linux/fs/gfs/Makefile 2004-06-25 13:57:24.448826633 -0500
225 +###############################################################################
226 +###############################################################################
228 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
230 +## This copyrighted material is made available to anyone wishing to use,
231 +## modify, copy, or redistribute it subject to the terms and conditions
232 +## of the GNU General Public License v.2.
234 +###############################################################################
235 +###############################################################################
237 +obj-$(CONFIG_GFS_FS) += gfs.o
276 # Add lock_nolock to the build system.
277 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
278 --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:09.563715325 -0500
279 +++ linux/fs/Kconfig 2004-06-16 12:02:09.574712769 -0500
280 @@ -1691,6 +1691,12 @@ config GFS_FS
281 GFS is perfect consistency -- changes made to the filesystem on one
282 machine show up immediately on all other machines in the cluster.
285 + tristate "Lock Nolock"
286 + depends on LOCK_HARNESS
288 + A "fake" lock module that allows GFS to run as a local filesystem.
292 menu "Partition Types"
293 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
294 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:05.985546690 -0500
295 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:09.574712769 -0500
297 ###############################################################################
299 obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
300 +obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
302 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile linux/fs/gfs_locking/lock_nolock/Makefile
303 --- linux-2.6.7/fs/gfs_locking/lock_nolock/Makefile 1969-12-31 18:00:00.000000000 -0600
304 +++ linux/fs/gfs_locking/lock_nolock/Makefile 2004-06-16 12:02:09.575712537 -0500
306 +###############################################################################
307 +###############################################################################
309 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
311 +## This copyrighted material is made available to anyone wishing to use,
312 +## modify, copy, or redistribute it subject to the terms and conditions
313 +## of the GNU General Public License v.2.
315 +###############################################################################
316 +###############################################################################
318 +obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock.o
320 +lock_nolock-y := main.o
322 # Add lock_dlm to the build system.
323 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
324 --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:13.145883030 -0500
325 +++ linux/fs/Kconfig 2004-06-16 12:02:13.157880243 -0500
326 @@ -1697,6 +1697,12 @@ config LOCK_NOLOCK
328 A "fake" lock module that allows GFS to run as a local filesystem.
331 + tristate "Lock DLM"
332 + depends on LOCK_HARNESS
334 + A lock module that allows GFS to use a Distributed Lock Manager.
338 menu "Partition Types"
339 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
340 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:13.146882798 -0500
341 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:13.157880243 -0500
344 obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
345 obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
346 +obj-$(CONFIG_LOCK_DLM) += lock_dlm/
348 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile linux/fs/gfs_locking/lock_dlm/Makefile
349 --- linux-2.6.7/fs/gfs_locking/lock_dlm/Makefile 1969-12-31 18:00:00.000000000 -0600
350 +++ linux/fs/gfs_locking/lock_dlm/Makefile 2004-06-16 12:02:13.157880243 -0500
352 +###############################################################################
353 +###############################################################################
355 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
357 +## This copyrighted material is made available to anyone wishing to use,
358 +## modify, copy, or redistribute it subject to the terms and conditions
359 +## of the GNU General Public License v.2.
361 +###############################################################################
362 +###############################################################################
364 +obj-$(CONFIG_LOCK_DLM) += lock_dlm.o
366 +lock_dlm-y := main.o group.o lock.o mount.o thread.o plock.o
368 # Add lock_gulm to the build system.
369 diff -urN -p linux-2.6.7/fs/Kconfig linux/fs/Kconfig
370 --- linux-2.6.7/fs/Kconfig 2004-06-16 12:02:16.816030294 -0500
371 +++ linux/fs/Kconfig 2004-06-16 12:02:16.827027739 -0500
372 @@ -1703,6 +1703,12 @@ config LOCK_DLM
374 A lock module that allows GFS to use a Distributed Lock Manager.
377 + tristate "Lock GULM"
378 + depends on LOCK_HARNESS
380 + A lock module that allows GFS to use a Failover Lock Manager.
384 menu "Partition Types"
385 diff -urN -p linux-2.6.7/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
386 --- linux-2.6.7/fs/gfs_locking/Makefile 2004-06-16 12:02:16.817030062 -0500
387 +++ linux/fs/gfs_locking/Makefile 2004-06-16 12:02:16.828027507 -0500
389 obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
390 obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
391 obj-$(CONFIG_LOCK_DLM) += lock_dlm/
392 +obj-$(CONFIG_LOCK_GULM) += lock_gulm/
394 diff -urN -p linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile linux/fs/gfs_locking/lock_gulm/Makefile
395 --- linux-2.6.7/fs/gfs_locking/lock_gulm/Makefile 1969-12-31 18:00:00.000000000 -0600
396 +++ linux/fs/gfs_locking/lock_gulm/Makefile 2004-06-16 12:02:16.828027507 -0500
398 +###############################################################################
399 +###############################################################################
401 +## Copyright (C) 2004 Red Hat, Inc. All rights reserved.
403 +## This copyrighted material is made available to anyone wishing to use,
404 +## modify, copy, or redistribute it subject to the terms and conditions
405 +## of the GNU General Public License v.2.
407 +###############################################################################
408 +###############################################################################
410 +obj-$(CONFIG_LOCK_GULM) += lock_gulm.o
412 +lock_gulm-y := gulm_core.o \
421 + linux_gulm_main.o \
426 + utils_verb_flags.o \
431 diff -urN linux-orig/fs/gfs/acl.c linux-patched/fs/gfs/acl.c
432 --- linux-orig/fs/gfs/acl.c 1969-12-31 18:00:00.000000000 -0600
433 +++ linux-patched/fs/gfs/acl.c 2004-06-30 13:27:49.332713682 -0500
435 +/******************************************************************************
436 +*******************************************************************************
438 +** Copyright (C) Sistina Software, Inc. 2003 All rights reserved.
439 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
441 +** This copyrighted material is made available to anyone wishing to use,
442 +** modify, copy, or redistribute it subject to the terms and conditions
443 +** of the GNU General Public License v.2.
445 +*******************************************************************************
446 +******************************************************************************/
448 +#include <linux/sched.h>
449 +#include <linux/slab.h>
450 +#include <linux/smp_lock.h>
451 +#include <linux/spinlock.h>
452 +#include <asm/semaphore.h>
453 +#include <linux/completion.h>
454 +#include <linux/buffer_head.h>
455 +#include <linux/xattr_acl.h>
466 + * Check to make sure that the acl is actually valid
469 +gfs_validate_acl(struct gfs_inode *ip, const char *value, int size, int access)
472 + struct posix_acl *acl = NULL;
473 + struct gfs_sbd *sdp = ip->i_sbd;
475 + if ((current->fsuid != ip->i_di.di_uid) && !capable(CAP_FOWNER))
477 + if (ip->i_di.di_type == GFS_FILE_LNK)
478 + return -EOPNOTSUPP;
479 + if (!access && ip->i_di.di_type != GFS_FILE_DIR)
481 + if (!sdp->sd_args.ar_posixacls)
482 + return -EOPNOTSUPP;
485 + acl = posix_acl_from_xattr(value, size);
487 + return PTR_ERR(acl);
489 + err = posix_acl_valid(acl);
490 + posix_acl_release(acl);
497 +gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl)
499 + struct inode *inode;
502 + inode = gfs_iget(ip, NO_CREATE);
503 + mode = inode->i_mode;
504 + posix_acl_equiv_mode(acl, &mode);
505 + inode->i_mode = mode;
507 + gfs_inode_attr_out(ip);
512 + * gfs_replace_acl - replace the value of the ea to the value of the acl
514 + * NOTE: The new value must be the same size as the old one.
517 +gfs_replace_acl(struct inode *inode, struct posix_acl *acl, int access,
518 + struct gfs_ea_location location)
520 + struct gfs_inode *ip = vn2ip(inode);
521 + struct gfs_easet_io req;
526 + size = posix_acl_to_xattr(acl, NULL, 0);
527 + GFS_ASSERT(size == GFS_EA_DATA_LEN(location.ea),
528 + printk("new acl size = %d, ea size = %u\n", size,
529 + GFS_EA_DATA_LEN(location.ea)););
531 + data = gmalloc(size);
533 + posix_acl_to_xattr(acl, data, size);
535 + req.es_data = data;
536 + req.es_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
537 + req.es_data_len = size;
538 + req.es_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
539 + req.es_cmd = GFS_EACMD_REPLACE;
540 + req.es_type = GFS_EATYPE_SYS;
542 + error = replace_ea(ip->i_sbd, ip, location.ea, &req);
544 + gfs_trans_add_bh(ip->i_gl, location.bh);
552 + * gfs_findacl - returns the requested posix acl
554 + * this function does not log the inode. It assumes that a lock is already
558 +gfs_findacl(struct gfs_inode *ip, int access, struct posix_acl **acl_ptr,
559 + struct gfs_ea_location *location)
561 + struct gfs_sbd *sdp = ip->i_sbd;
562 + struct posix_acl *acl;
563 + uint32_t avail_size;
567 + avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
570 + if (!ip->i_di.di_eattr)
573 + error = find_eattr(ip,
574 + (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT,
575 + (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN,
576 + GFS_EATYPE_SYS, location);
580 + data = gmalloc(GFS_EA_DATA_LEN(location->ea));
583 + if (GFS_EA_IS_UNSTUFFED(location->ea))
584 + error = read_unstuffed(data, ip, sdp, location->ea, avail_size,
587 + gfs_ea_memcpy(data, GFS_EA_DATA(location->ea),
588 + GFS_EA_DATA_LEN(location->ea));
592 + acl = posix_acl_from_xattr(data, GFS_EA_DATA_LEN(location->ea));
594 + error = PTR_ERR(acl);
601 + brelse(location->bh);
607 +gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr)
609 + struct gfs_inode *ip = vn2ip(inode);
610 + struct gfs_sbd *sdp = ip->i_sbd;
611 + struct gfs_eaget_io req;
612 + struct posix_acl *acl;
619 + if (!sdp->sd_args.ar_posixacls)
622 + req.eg_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
623 + req.eg_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
624 + req.eg_type = GFS_EATYPE_SYS;
626 + req.eg_data = NULL;
627 + req.eg_data_len = 0;
629 + error = gfs_ea_read_permission(&req, ip);
633 + if (!ip->i_di.di_eattr)
636 + size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
638 + if (size != -ENODATA)
643 + data = gmalloc(size);
645 + req.eg_data = data;
646 + req.eg_data_len = size;
648 + size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
654 + acl = posix_acl_from_xattr(data, size);
656 + error = PTR_ERR(acl);
667 +gfs_setup_new_acl(struct gfs_inode *dip,
668 + unsigned int type, unsigned int *mode,
669 + struct posix_acl **acl_ptr)
671 + struct gfs_ea_location location;
672 + struct posix_acl *acl = NULL;
673 + mode_t access_mode = *mode;
676 + if (type == GFS_FILE_LNK)
679 + error = gfs_findacl(dip, FALSE, &acl, &location);
683 + (*mode) &= ~current->fs->umask;
686 + brelse(location.bh);
688 + if (type == GFS_FILE_DIR) {
693 + error = posix_acl_create_masq(acl, &access_mode);
694 + *mode = access_mode;
700 + posix_acl_release(acl);
706 + * gfs_init_default_acl - initializes the default acl
708 + * NOTE: gfs_init_access_acl must be called first
711 +gfs_create_default_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
714 + struct gfs_easet_io req;
715 + struct gfs_ea_location avail;
718 + memset(&avail, 0, sizeof(struct gfs_ea_location));
720 + req.es_data = data;
721 + req.es_name = GFS_POSIX_ACL_DEFAULT;
722 + req.es_data_len = size;
723 + req.es_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
724 + req.es_cmd = GFS_EACMD_CREATE;
725 + req.es_type = GFS_EATYPE_SYS;
727 + error = find_sys_space(dip, ip, size, &avail);
731 + avail.ea = prep_ea(avail.ea);
733 + error = write_ea(ip->i_sbd, dip, ip, avail.ea, &req);
735 + gfs_trans_add_bh(ip->i_gl, avail.bh); /* Huh!?! */
743 + * gfs_init_access_acl - initialized the access acl
745 + * NOTE: This must be the first extended attribute that is created for
749 +gfs_init_access_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
752 + struct gfs_easet_io req;
754 + req.es_data = data;
755 + req.es_name = GFS_POSIX_ACL_ACCESS;
756 + req.es_data_len = size;
757 + req.es_name_len = GFS_POSIX_ACL_ACCESS_LEN;
758 + req.es_cmd = GFS_EACMD_CREATE;
759 + req.es_type = GFS_EATYPE_SYS;
761 + return init_new_inode_eattr(dip, ip, &req);
765 +gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
766 + struct posix_acl *acl)
768 + struct buffer_head *dibh;
773 + size = posix_acl_to_xattr(acl, NULL, 0);
775 + data = gmalloc(size);
777 + posix_acl_to_xattr(acl, data, size);
779 + error = gfs_get_inode_buffer(ip, &dibh);
783 + error = gfs_init_access_acl(dip, ip, data, size);
787 + if (type == GFS_FILE_DIR) {
788 + error = gfs_create_default_acl(dip, ip, data, size);
793 + gfs_trans_add_bh(ip->i_gl, dibh);
794 + gfs_dinode_out(&ip->i_di, dibh->b_data);
801 + posix_acl_release(acl);
807 +gfs_acl_setattr(struct inode *inode)
809 + struct gfs_inode *ip = vn2ip(inode);
810 + struct posix_acl *acl;
811 + struct gfs_ea_location location;
814 + if (S_ISLNK(inode->i_mode))
817 + memset(&location, 0, sizeof(struct gfs_ea_location));
819 + error = gfs_findacl(ip, TRUE, &acl, &location); /* Check error here? */
823 + error = posix_acl_chmod_masq(acl, inode->i_mode);
825 + error = gfs_replace_acl(inode, acl, TRUE, location);
827 + posix_acl_release(acl);
828 + brelse(location.bh);
832 diff -urN linux-orig/fs/gfs/acl.h linux-patched/fs/gfs/acl.h
833 --- linux-orig/fs/gfs/acl.h 1969-12-31 18:00:00.000000000 -0600
834 +++ linux-patched/fs/gfs/acl.h 2004-06-30 13:27:49.332713682 -0500
836 +/******************************************************************************
837 +*******************************************************************************
839 +** Copyright (C) Sistina Software, Inc. 2003 All rights reserved.
840 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
842 +** This copyrighted material is made available to anyone wishing to use,
843 +** modify, copy, or redistribute it subject to the terms and conditions
844 +** of the GNU General Public License v.2.
846 +*******************************************************************************
847 +******************************************************************************/
849 +#ifndef __ACL_DOT_H__
850 +#define __ACL_DOT_H__
852 +int gfs_setup_new_acl(struct gfs_inode *dip,
853 + unsigned int type, unsigned int *mode,
854 + struct posix_acl **acl_ptr);
855 +int gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr);
856 +int gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
857 + struct posix_acl *acl);
858 +int gfs_acl_setattr(struct inode *inode);
859 +int gfs_validate_acl(struct gfs_inode *ip, const char *value, int size,
861 +void gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl);
863 +#endif /* __ACL_DOT_H__ */
864 diff -urN linux-orig/fs/gfs/bits.c linux-patched/fs/gfs/bits.c
865 --- linux-orig/fs/gfs/bits.c 1969-12-31 18:00:00.000000000 -0600
866 +++ linux-patched/fs/gfs/bits.c 2004-06-30 13:27:49.332713682 -0500
868 +/******************************************************************************
869 +*******************************************************************************
871 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
872 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
874 +** This copyrighted material is made available to anyone wishing to use,
875 +** modify, copy, or redistribute it subject to the terms and conditions
876 +** of the GNU General Public License v.2.
878 +*******************************************************************************
879 +******************************************************************************/
882 + * These routines are used by the resource group routines (rgrp.c)
883 + * to keep track of block allocation. Each block is represented by two
884 + * bits. One bit indicates whether or not the block is used. (1=used,
885 + * 0=free) The other bit indicates whether or not the block contains a
886 + * dinode or not. (1=dinode, 0=data block) So, each byte represents
887 + * GFS_NBBY (i.e. 4) blocks.
890 +#include <linux/sched.h>
891 +#include <linux/slab.h>
892 +#include <linux/smp_lock.h>
893 +#include <linux/spinlock.h>
894 +#include <asm/semaphore.h>
895 +#include <linux/completion.h>
896 +#include <linux/buffer_head.h>
901 +static const char valid_change[16] = {
904 + /* n */ 0, 1, 1, 1,
905 + /* e */ 1, 0, 0, 0,
906 + /* w */ 1, 0, 0, 1,
911 + * gfs_setbit - Set a bit in the bitmaps
912 + * @buffer: the buffer that holds the bitmaps
913 + * @buflen: the length (in bytes) of the buffer
914 + * @block: the block to set
915 + * @new_state: the new state of the block
920 +gfs_setbit(struct gfs_rgrpd *rgd,
921 + unsigned char *buffer, unsigned int buflen,
922 + uint32_t block, unsigned char new_state)
924 + unsigned char *byte, *end, cur_state;
927 + byte = buffer + (block / GFS_NBBY);
928 + bit = (block % GFS_NBBY) * GFS_BIT_SIZE;
929 + end = buffer + buflen;
931 + GFS_ASSERT_RGRPD(byte < end, rgd,);
933 + cur_state = (*byte >> bit) & GFS_BIT_MASK;
934 + GFS_ASSERT_RGRPD(valid_change[new_state * 4 + cur_state], rgd,
935 + printk("cur_state = %u, new_state = %u\n",
936 + cur_state, new_state););
938 + *byte ^= cur_state << bit;
939 + *byte |= new_state << bit;
943 + * gfs_testbit - test a bit in the bitmaps
944 + * @buffer: the buffer that holds the bitmaps
945 + * @buflen: the length (in bytes) of the buffer
946 + * @block: the block to read
951 +gfs_testbit(struct gfs_rgrpd *rgd,
952 + unsigned char *buffer, unsigned int buflen, uint32_t block)
954 + unsigned char *byte, *end, cur_state;
957 + byte = buffer + (block / GFS_NBBY);
958 + bit = (block % GFS_NBBY) * GFS_BIT_SIZE;
959 + end = buffer + buflen;
961 + GFS_ASSERT_RGRPD(byte < end, rgd,);
963 + cur_state = (*byte >> bit) & GFS_BIT_MASK;
969 + * gfs_bitfit - Find a free block in the bitmaps
970 + * @buffer: the buffer that holds the bitmaps
971 + * @buflen: the length (in bytes) of the buffer
972 + * @goal: the block to try to allocate
973 + * @old_state: the state of the block we're looking for
975 + * Return: the block number that was allocated
979 +gfs_bitfit(struct gfs_rgrpd *rgd,
980 + unsigned char *buffer, unsigned int buflen,
981 + uint32_t goal, unsigned char old_state)
983 + unsigned char *byte, *end, alloc;
984 + uint32_t blk = goal;
987 + byte = buffer + (goal / GFS_NBBY);
988 + bit = (goal % GFS_NBBY) * GFS_BIT_SIZE;
989 + end = buffer + buflen;
990 + alloc = (old_state & 1) ? 0 : 0x55;
992 + while (byte < end) {
993 + if ((*byte & 0x55) == alloc) {
994 + blk += (8 - bit) >> 1;
1002 + if (((*byte >> bit) & GFS_BIT_MASK) == old_state)
1005 + bit += GFS_BIT_SIZE;
1018 + * gfs_bitcount - count the number of bits in a certain state
1019 + * @buffer: the buffer that holds the bitmaps
1020 + * @buflen: the length (in bytes) of the buffer
1021 + * @state: the state of the block we're looking for
1023 + * Returns: The number of bits
1027 +gfs_bitcount(struct gfs_rgrpd *rgd,
1028 + unsigned char *buffer, unsigned int buflen,
1029 + unsigned char state)
1031 + unsigned char *byte = buffer;
1032 + unsigned char *end = buffer + buflen;
1033 + unsigned char state1 = state << 2;
1034 + unsigned char state2 = state << 4;
1035 + unsigned char state3 = state << 6;
1036 + uint32_t count = 0;
1038 + for (; byte < end; byte++) {
1039 + if (((*byte) & 0x03) == state)
1041 + if (((*byte) & 0x0C) == state1)
1043 + if (((*byte) & 0x30) == state2)
1045 + if (((*byte) & 0xC0) == state3)
1051 diff -urN linux-orig/fs/gfs/bits.h linux-patched/fs/gfs/bits.h
1052 --- linux-orig/fs/gfs/bits.h 1969-12-31 18:00:00.000000000 -0600
1053 +++ linux-patched/fs/gfs/bits.h 2004-06-30 13:27:49.332713682 -0500
1055 +/******************************************************************************
1056 +*******************************************************************************
1058 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
1059 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
1061 +** This copyrighted material is made available to anyone wishing to use,
1062 +** modify, copy, or redistribute it subject to the terms and conditions
1063 +** of the GNU General Public License v.2.
1065 +*******************************************************************************
1066 +******************************************************************************/
1068 +#ifndef __BITS_DOT_H__
1069 +#define __BITS_DOT_H__
1071 +#define BFITNOENT (0xFFFFFFFF)
1073 +void gfs_setbit(struct gfs_rgrpd *rgd,
1074 + unsigned char *buffer, unsigned int buflen,
1075 + uint32_t block, unsigned char new_state);
1076 +unsigned char gfs_testbit(struct gfs_rgrpd *rgd,
1077 + unsigned char *buffer, unsigned int buflen,
1079 +uint32_t gfs_bitfit(struct gfs_rgrpd *rgd,
1080 + unsigned char *buffer, unsigned int buflen,
1081 + uint32_t goal, unsigned char old_state);
1082 +uint32_t gfs_bitcount(struct gfs_rgrpd *rgd,
1083 + unsigned char *buffer, unsigned int buflen,
1084 + unsigned char state);
1086 +#endif /* __BITS_DOT_H__ */
1087 diff -urN linux-orig/fs/gfs/bmap.c linux-patched/fs/gfs/bmap.c
1088 --- linux-orig/fs/gfs/bmap.c 1969-12-31 18:00:00.000000000 -0600
1089 +++ linux-patched/fs/gfs/bmap.c 2004-06-30 13:27:49.333713450 -0500
1091 +/******************************************************************************
1092 +*******************************************************************************
1094 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
1095 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
1097 +** This copyrighted material is made available to anyone wishing to use,
1098 +** modify, copy, or redistribute it subject to the terms and conditions
1099 +** of the GNU General Public License v.2.
1101 +*******************************************************************************
1102 +******************************************************************************/
1104 +#include <linux/sched.h>
1105 +#include <linux/slab.h>
1106 +#include <linux/smp_lock.h>
1107 +#include <linux/spinlock.h>
1108 +#include <asm/semaphore.h>
1109 +#include <linux/completion.h>
1110 +#include <linux/buffer_head.h>
1123 + unsigned int mp_list[GFS_MAX_META_HEIGHT];
1126 +typedef int (*block_call_t) (struct gfs_inode *ip, struct buffer_head *dibh,
1127 + struct buffer_head *bh, uint64_t *top,
1128 + uint64_t *bottom, unsigned int height,
1131 +struct strip_mine {
1133 + unsigned int sm_height;
1137 + * gfs_unstuffer_sync - unstuff a dinode synchronously
1139 + * @dibh: the dinode buffer
1140 + * @block: the block number that was allocated
1141 + * @private: not used
1143 + * Returns: 0 on success, -EXXX on failure
1147 +gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh,
1148 + uint64_t block, void *private)
1150 + struct gfs_sbd *sdp = ip->i_sbd;
1151 + struct buffer_head *bh;
1154 + error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1158 + gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode));
1160 + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
1168 + * gfs_unstuffer_async - unstuff a dinode asynchronously
1170 + * @dibh: the dinode buffer
1171 + * @block: the block number that was allocated
1172 + * @private: not used
1174 + * Returns: 0 on success, -EXXX on failure
1178 +gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh,
1179 + uint64_t block, void *private)
1181 + struct gfs_sbd *sdp = ip->i_sbd;
1182 + struct buffer_head *bh;
1185 + error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1189 + gfs_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs_dinode));
1191 + error = gfs_dwrite(sdp, bh, DIO_DIRTY);
1199 + * gfs_unstuff_dinode - Unstuff a dinode when the data has grown too big
1200 + * @ip: The GFS inode to unstuff
1201 + * @unstuffer: the routine that handles unstuffing a non-zero length file
1202 + * @private: private data for the unstuffer
1204 + * This routine unstuffs a dinode and returns it to a "normal" state such
1205 + * that the height can be grown in the traditional way.
1207 + * Returns: 0 on success, -EXXXX on failure
1211 +gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer,
1214 + struct buffer_head *bh, *dibh;
1215 + uint64_t block = 0;
1216 + int journaled = gfs_is_jdata(ip);
1219 + GFS_ASSERT_INODE(gfs_is_stuffed(ip), ip,);
1221 + error = gfs_get_inode_buffer(ip, &dibh);
1225 + if (ip->i_di.di_size) {
1226 + /* Get a free block, fill it with the stuffed data,
1227 + and write it out to disk */
1230 + error = gfs_metaalloc(ip, &block);
1234 + error = gfs_get_data_buffer(ip, block, TRUE, &bh);
1238 + gfs_buffer_copy_tail(bh, sizeof(struct gfs_meta_header),
1239 + dibh, sizeof(struct gfs_dinode));
1243 + gfs_blkalloc(ip, &block);
1245 + error = unstuffer(ip, dibh, block, private);
1251 + /* Set up the pointer to the new block */
1253 + gfs_trans_add_bh(ip->i_gl, dibh);
1255 + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
1257 + if (ip->i_di.di_size) {
1258 + *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block);
1259 + ip->i_di.di_blocks++;
1262 + ip->i_di.di_height = 1;
1264 + gfs_dinode_out(&ip->i_di, dibh->b_data);
1276 + * calc_tree_height - Calculate the height of a metadata tree
1277 + * @ip: The GFS inode
1278 + * @size: The proposed size of the file
1280 + * Work out how tall a metadata tree needs to be in order to accommodate a
1281 + * file of a particular size. If size is less than the current size of
1282 + * the inode, then the current size of the inode is used instead of the
1285 + * Returns: the height the tree should be
1288 +static unsigned int
1289 +calc_tree_height(struct gfs_inode *ip, uint64_t size)
1291 + struct gfs_sbd *sdp = ip->i_sbd;
1293 + unsigned int max, height;
1295 + if (ip->i_di.di_size > size)
1296 + size = ip->i_di.di_size;
1298 + if (gfs_is_jdata(ip)) {
1299 + arr = sdp->sd_jheightsize;
1300 + max = sdp->sd_max_jheight;
1302 + arr = sdp->sd_heightsize;
1303 + max = sdp->sd_max_height;
1306 + for (height = 0; height < max; height++)
1307 + if (arr[height] >= size)
1314 + * build_height - Build a metadata tree of the requested height
1315 + * @ip: The GFS inode
1316 + * @height: The height to build to
1318 + * This routine makes sure that the metadata tree is tall enough to hold
1319 + * "size" bytes of data.
1321 + * Returns: 0 on success, -EXXXX on failure
1325 +build_height(struct gfs_inode *ip, int height)
1327 + struct gfs_sbd *sdp = ip->i_sbd;
1328 + struct buffer_head *bh, *dibh;
1329 + uint64_t block, *bp;
1334 + while (ip->i_di.di_height < height) {
1335 + error = gfs_get_inode_buffer(ip, &dibh);
1339 + new_block = FALSE;
1340 + bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode));
1341 + for (x = 0; x < sdp->sd_diptrs; x++, bp++)
1348 + /* Get a new block, fill it with the old direct pointers,
1349 + and write it out */
1351 + error = gfs_metaalloc(ip, &block);
1355 + error = gfs_dread(sdp, block, ip->i_gl,
1356 + DIO_NEW | DIO_START | DIO_WAIT, &bh);
1360 + gfs_trans_add_bh(ip->i_gl, bh);
1361 + gfs_metatype_set(sdp, bh, GFS_METATYPE_IN,
1363 + memset(bh->b_data + sizeof(struct gfs_meta_header),
1365 + sizeof(struct gfs_indirect) -
1366 + sizeof(struct gfs_meta_header));
1367 + gfs_buffer_copy_tail(bh, sizeof(struct gfs_indirect),
1368 + dibh, sizeof(struct gfs_dinode));
1373 + /* Set up the new direct pointer and write it out to disk */
1375 + gfs_trans_add_bh(ip->i_gl, dibh);
1377 + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
1380 + *(uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode)) = cpu_to_gfs64(block);
1381 + ip->i_di.di_blocks++;
1384 + ip->i_di.di_height++;
1386 + gfs_dinode_out(&ip->i_di, dibh->b_data);
1399 + * find_metapath - Find path through the metadata tree
1400 + * @ip: The inode pointer
1401 + * @mp: The metapath to return the result in
1402 + * @block: The disk block to look up
1404 + * This routine returns a struct metapath structure that defines a path through
1405 + * the metadata of inode "ip" to get to block "block".
1408 + * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a
1409 + * filesystem with a blocksize of 4096.
1411 + * find_metapath() would return a struct metapath structure set to:
1412 + * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
1413 + * and mp_list[2] = 165.
1415 + * That means that in order to get to the block containing the byte at
1416 + * offset 101342453, we would load the indirect block pointed to by pointer
1417 + * 0 in the dinode. We would then load the indirect block pointed to by
1418 + * pointer 48 in that indirect block. We would then load the data block
1419 + * pointed to by pointer 165 in that indirect block.
1421 + * ----------------------------------------
1424 + * | |0 1 2 3 4 5 9|
1426 + * ----------------------------------------
1430 + * ----------------------------------------
1431 + * | Indirect Block |
1433 + * | 4 4 4 4 4 5 5 1|
1434 + * |0 5 6 7 8 9 0 1 2|
1435 + * ----------------------------------------
1439 + * ----------------------------------------
1440 + * | Indirect Block |
1444 + * ----------------------------------------
1448 + * ----------------------------------------
1449 + * | Data block containing offset |
1453 + * ----------------------------------------
1457 +static struct metapath *
1458 +find_metapath(struct gfs_inode *ip, uint64_t block)
1460 + struct gfs_sbd *sdp = ip->i_sbd;
1461 + struct metapath *mp;
1462 + uint64_t b = block;
1465 + mp = gmalloc(sizeof(struct metapath));
1466 + memset(mp, 0, sizeof(struct metapath));
1468 + for (i = ip->i_di.di_height; i--;)
1469 + mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
1475 + * metapointer - Return pointer to start of metadata in a buffer
1477 + * @height: The metadata height (0 = dinode)
1478 + * @mp: The metapath
1480 + * Return a pointer to the block number of the next height of the metadata
1481 + * tree given a buffer containing the pointer to the current height of the
1485 +static __inline__ uint64_t *
1486 +metapointer(struct buffer_head *bh, unsigned int height, struct metapath *mp)
1488 + unsigned int head_size = (height > 0) ?
1489 + sizeof(struct gfs_indirect) : sizeof(struct gfs_dinode);
1491 + return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height];
1495 + * get_metablock - Get the next metadata block in metadata tree
1496 + * @ip: The GFS inode
1497 + * @bh: Buffer containing the pointers to metadata blocks
1498 + * @height: The height of the tree (0 = dinode)
1499 + * @mp: The metapath
1500 + * @create: Non-zero if we may create a new meatdata block
1501 + * @new: Used to indicate if we did create a new metadata block
1502 + * @block: the returned disk block number
1504 + * Given a metatree, complete to a particular height, checks to see if the next
1505 + * height of the tree exists. If not the next height of the tree is created.
1506 + * The block number of the next height of the metadata tree is returned.
1508 + * Returns: 0 on success, -EXXX on failure
1512 +get_metablock(struct gfs_inode *ip,
1513 + struct buffer_head *bh, unsigned int height, struct metapath *mp,
1514 + int create, int *new, uint64_t *block)
1516 + uint64_t *ptr = metapointer(bh, height, mp);
1520 + *block = gfs64_to_cpu(*ptr);
1529 + error = gfs_metaalloc(ip, block);
1533 + gfs_trans_add_bh(ip->i_gl, bh);
1535 + *ptr = cpu_to_gfs64(*block);
1536 + ip->i_di.di_blocks++;
1544 + * get_datablock - Get datablock number from metadata block
1545 + * @ip: The GFS inode
1546 + * @bh: The buffer containing pointers to datablocks
1547 + * @mp: The metapath
1548 + * @create: Non-zero if we may create a new data block
1549 + * @new: Used to indicate if we created a new data block
1550 + * @block: the returned disk block number
1552 + * Given a fully built metadata tree, checks to see if a particular data
1553 + * block exists. It is created if it does not exist and the block number
1554 + * on disk is returned.
1556 + * Returns: 0 on success, -EXXX on failure
1560 +get_datablock(struct gfs_inode *ip,
1561 + struct buffer_head *bh, struct metapath *mp,
1562 + int create, int *new, uint64_t *block)
1564 + uint64_t *ptr = metapointer(bh, ip->i_di.di_height - 1, mp);
1567 + *block = gfs64_to_cpu(*ptr);
1576 + if (gfs_is_jdata(ip)) {
1578 + error = gfs_metaalloc(ip, block);
1582 + gfs_blkalloc(ip, block);
1584 + gfs_trans_add_bh(ip->i_gl, bh);
1586 + *ptr = cpu_to_gfs64(*block);
1587 + ip->i_di.di_blocks++;
1595 + * gfs_block_map - Map a block from an inode to a disk block
1596 + * @ip: The GFS inode
1597 + * @lblock: The logical block number
1598 + * @new: Value/Result argument (1 = may create/did create new blocks)
1599 + * @dblock: the disk block number of the start of an extent
1600 + * @extlen: the size of the extent
1602 + * Find the block number on the current device which corresponds to an
1603 + * inode's block. If the block had to be created, "new" will be set.
1605 + * Returns: 0 on success, -EXXX on failure
1609 +gfs_block_map(struct gfs_inode *ip,
1610 + uint64_t lblock, int *new,
1611 + uint64_t *dblock, uint32_t *extlen)
1613 + struct gfs_sbd *sdp = ip->i_sbd;
1614 + struct buffer_head *bh;
1615 + struct metapath *mp;
1616 + int create = *new;
1617 + unsigned int bsize;
1618 + unsigned int height;
1619 + unsigned int end_of_metadata;
1628 + if (gfs_is_stuffed(ip)) {
1630 + *dblock = ip->i_num.no_addr;
1637 + bsize = (gfs_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
1639 + height = calc_tree_height(ip, (lblock + 1) * bsize);
1640 + if (ip->i_di.di_height < height) {
1644 + error = build_height(ip, height);
1649 + mp = find_metapath(ip, lblock);
1650 + end_of_metadata = ip->i_di.di_height - 1;
1652 + error = gfs_get_inode_buffer(ip, &bh);
1656 + for (x = 0; x < end_of_metadata; x++) {
1657 + error = get_metablock(ip, bh, x, mp, create, new, dblock);
1659 + if (error || !*dblock)
1662 + error = gfs_get_meta_buffer(ip, x + 1, *dblock, *new, &bh);
1667 + error = get_datablock(ip, bh, mp, create, new, dblock);
1673 + if (extlen && *dblock) {
1677 + uint64_t tmp_dblock;
1679 + unsigned int nptrs;
1681 + nptrs = (end_of_metadata) ? sdp->sd_inptrs : sdp->sd_diptrs;
1683 + while (++mp->mp_list[end_of_metadata] < nptrs) {
1684 + get_datablock(ip, bh, mp,
1688 + if (*dblock + *extlen != tmp_dblock)
1699 + error = gfs_get_inode_buffer(ip, &bh);
1701 + gfs_trans_add_bh(ip->i_gl, bh);
1702 + gfs_dinode_out(&ip->i_di, bh->b_data);
1714 + * do_grow - Make a file look bigger than it is
1716 + * @size: the size to set the file to
1718 + * Called with an exclusive lock on @ip.
1720 + * Returns: 0 on succes, -EXXX on failure
1724 +do_grow(struct gfs_inode *ip, uint64_t size)
1726 + struct gfs_sbd *sdp = ip->i_sbd;
1727 + struct gfs_alloc *al;
1728 + struct buffer_head *dibh;
1730 + int journaled = gfs_is_jdata(ip);
1733 + al = gfs_alloc_get(ip);
1735 + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1739 + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
1741 + goto fail_gunlock_q;
1744 + al->al_requested_meta = sdp->sd_max_height + 1;
1746 + al->al_requested_meta = sdp->sd_max_height;
1747 + al->al_requested_data = 1;
1750 + error = gfs_inplace_reserve(ip);
1752 + goto fail_gunlock_q;
1754 + /* Trans may require:
1755 + Full extention of the metadata tree, block allocation,
1756 + a dinode modification, and a quota change */
1758 + error = gfs_trans_begin(sdp,
1759 + sdp->sd_max_height + al->al_rgd->rd_ri.ri_length +
1765 + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) {
1766 + if (gfs_is_stuffed(ip)) {
1767 + error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL);
1769 + goto fail_end_trans;
1772 + h = calc_tree_height(ip, size);
1773 + if (ip->i_di.di_height < h) {
1774 + error = build_height(ip, h);
1776 + goto fail_end_trans;
1780 + ip->i_di.di_size = size;
1781 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
1783 + error = gfs_get_inode_buffer(ip, &dibh);
1785 + goto fail_end_trans;
1787 + gfs_trans_add_bh(ip->i_gl, dibh);
1788 + gfs_dinode_out(&ip->i_di, dibh->b_data);
1791 + gfs_trans_end(sdp);
1793 + gfs_inplace_release(ip);
1794 + gfs_quota_unlock_m(ip);
1795 + gfs_alloc_put(ip);
1800 + gfs_trans_end(sdp);
1803 + gfs_inplace_release(ip);
1806 + gfs_quota_unlock_m(ip);
1809 + gfs_alloc_put(ip);
1815 + * recursive_scan - recursively scan through the end of a file
1817 + * @dibh: the dinode buffer
1818 + * @mp: the path through the metadata to the point to start
1819 + * @height: the height the recursion is at
1820 + * @block: the indirect block to look at
1821 + * @first: TRUE if this is the first block
1822 + * @bc: the call to make for each piece of metadata
1823 + * @data: data opaque to this function to pass to @bc
1825 + * When this is first called @height and @block should be zero and
1826 + * @first should be TRUE.
1828 + * Returns: 0 on success, -EXXX on failure
1832 +recursive_scan(struct gfs_inode *ip, struct buffer_head *dibh,
1833 + struct metapath *mp, unsigned int height, uint64_t block,
1834 + int first, block_call_t bc, void *data)
1836 + struct gfs_sbd *sdp = ip->i_sbd;
1837 + struct buffer_head *bh = NULL;
1838 + uint64_t *top, *bottom;
1843 + error = gfs_get_inode_buffer(ip, &bh);
1848 + top = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) +
1850 + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_dinode)) +
1853 + error = gfs_get_meta_buffer(ip, height, block, FALSE, &bh);
1857 + top = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) +
1858 + ((first) ? mp->mp_list[height] : 0);
1859 + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect)) +
1863 + error = bc(ip, dibh, bh, top, bottom, height, data);
1867 + if (height < ip->i_di.di_height - 1)
1868 + for (; top < bottom; top++, first = FALSE) {
1872 + bn = gfs64_to_cpu(*top);
1874 + error = recursive_scan(ip, dibh, mp,
1875 + height + 1, bn, first,
1893 + * do_strip - Look for a layer a particular layer of the file and strip it off
1895 + * @dibh: the dinode buffer
1896 + * @bh: A buffer of pointers
1897 + * @top: The first pointer in the buffer
1898 + * @bottom: One more than the last pointer
1899 + * @height: the height this buffer is at
1900 + * @data: a pointer to a struct strip_mine
1902 + * Returns: 0 on success, -EXXX on failure
1906 +do_strip(struct gfs_inode *ip, struct buffer_head *dibh,
1907 + struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
1908 + unsigned int height, void *data)
1910 + struct strip_mine *sm = (struct strip_mine *)data;
1911 + struct gfs_sbd *sdp = ip->i_sbd;
1912 + struct gfs_holder ri_gh;
1913 + struct gfs_rgrp_list rlist;
1914 + uint64_t bn, bstart;
1917 + unsigned int rg_blocks = 0;
1923 + sm->sm_first = FALSE;
1925 + if (height != sm->sm_height)
1928 + if (sm->sm_first) {
1930 + sm->sm_first = FALSE;
1933 + metadata = (height != ip->i_di.di_height - 1) || gfs_is_jdata(ip);
1935 + error = gfs_rindex_hold(sdp, &ri_gh);
1939 + memset(&rlist, 0, sizeof(struct gfs_rgrp_list));
1943 + for (p = top; p < bottom; p++) {
1947 + bn = gfs64_to_cpu(*p);
1949 + if (bstart + blen == bn)
1953 + gfs_rlist_add(sdp, &rlist, bstart);
1961 + gfs_rlist_add(sdp, &rlist, bstart);
1963 + goto out; /* Nothing to do */
1965 + gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
1967 + error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
1971 + for (x = 0; x < rlist.rl_rgrps; x++) {
1972 + struct gfs_rgrpd *rgd;
1973 + rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
1974 + rg_blocks += rgd->rd_ri.ri_length;
1977 + /* Trans may require:
1978 + All the bitmaps that were reserved.
1979 + One block for the dinode.
1980 + One block for the indirect block being cleared.
1981 + One block for a quota change. */
1983 + error = gfs_trans_begin(sdp, rg_blocks + 2, 1);
1985 + goto fail_rg_gunlock;
1987 + gfs_trans_add_bh(ip->i_gl, dibh);
1988 + gfs_trans_add_bh(ip->i_gl, bh);
1993 + for (p = top; p < bottom; p++) {
1997 + bn = gfs64_to_cpu(*p);
1999 + if (bstart + blen == bn)
2004 + gfs_metafree(ip, bstart, blen);
2006 + gfs_blkfree(ip, bstart, blen);
2014 + ip->i_di.di_blocks--;
2019 + gfs_metafree(ip, bstart, blen);
2021 + gfs_blkfree(ip, bstart, blen);
2024 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2026 + gfs_dinode_out(&ip->i_di, dibh->b_data);
2028 + gfs_trans_end(sdp);
2030 + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2031 + gfs_rlist_free(&rlist);
2034 + gfs_glock_dq_uninit(&ri_gh);
2039 + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
2042 + gfs_rlist_free(&rlist);
2044 + gfs_glock_dq_uninit(&ri_gh);
2050 + * gfs_truncator_default - truncate a partial data block
2052 + * @size: the size the file should be
2054 + * Returns: 0 on success, -EXXX on failure
2058 +gfs_truncator_default(struct gfs_inode *ip, uint64_t size)
2060 + struct gfs_sbd *sdp = ip->i_sbd;
2061 + struct buffer_head *bh;
2066 + error = gfs_block_map(ip, size >> sdp->sd_sb.sb_bsize_shift, ¬_new,
2073 + error = gfs_get_data_buffer(ip, bn, FALSE, &bh);
2077 + gfs_buffer_clear_tail(bh, size & (sdp->sd_sb.sb_bsize - 1));
2079 + error = gfs_dwrite(sdp, bh, DIO_DIRTY);
2087 + * truncator_journaled - truncate a partial data block
2089 + * @size: the size the file should be
2091 + * Returns: 0 on success, -EXXX on failure
2095 +truncator_journaled(struct gfs_inode *ip, uint64_t size)
2097 + struct gfs_sbd *sdp = ip->i_sbd;
2098 + struct buffer_head *bh;
2099 + uint64_t lbn, dbn;
2105 + off = do_div(lbn, sdp->sd_jbsize);
2107 + error = gfs_block_map(ip, lbn, ¬_new, &dbn, NULL);
2113 + error = gfs_trans_begin(sdp, 1, 0);
2117 + error = gfs_get_data_buffer(ip, dbn, FALSE, &bh);
2119 + gfs_trans_add_bh(ip->i_gl, bh);
2120 + gfs_buffer_clear_tail(bh,
2121 + sizeof(struct gfs_meta_header) +
2126 + gfs_trans_end(sdp);
2132 + * gfs_shrink - make a file smaller
2134 + * @size: the size to make the file
2135 + * @truncator: function to truncate the last partial block
2137 + * Called with an exclusive lock on @ip.
2139 + * Returns: 0 on success, -EXXX on failure
2143 +gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator)
2145 + struct gfs_sbd *sdp = ip->i_sbd;
2146 + struct gfs_holder ri_gh;
2147 + struct gfs_rgrpd *rgd;
2148 + struct buffer_head *dibh;
2150 + unsigned int height;
2151 + int journaled = gfs_is_jdata(ip);
2156 + else if (journaled) {
2158 + do_div(block, sdp->sd_jbsize);
2161 + block = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
2163 + /* Get rid of all the data/metadata blocks */
2165 + height = ip->i_di.di_height;
2167 + struct metapath *mp = find_metapath(ip, block);
2168 + gfs_alloc_get(ip);
2170 + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
2172 + gfs_alloc_put(ip);
2177 + while (height--) {
2178 + struct strip_mine sm;
2180 + sm.sm_first = (size) ? TRUE : FALSE;
2181 + sm.sm_height = height;
2183 + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE,
2186 + gfs_quota_unhold_m(ip);
2187 + gfs_alloc_put(ip);
2193 + gfs_quota_unhold_m(ip);
2194 + gfs_alloc_put(ip);
2198 + /* If we truncated in the middle of a block, zero out the leftovers. */
2200 + if (gfs_is_stuffed(ip)) {
2202 + } else if (journaled) {
2203 + if (do_mod(size, sdp->sd_jbsize)) {
2204 + error = truncator_journaled(ip, size);
2208 + } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) {
2209 + error = truncator(ip, size);
2214 + /* Set the new size (and possibly the height) */
2217 + error = gfs_rindex_hold(sdp, &ri_gh);
2222 + error = gfs_trans_begin(sdp, 1, 0);
2226 + error = gfs_get_inode_buffer(ip, &dibh);
2228 + goto out_end_trans;
2231 + ip->i_di.di_height = 0;
2233 + rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr);
2234 + GFS_ASSERT_INODE(rgd, ip,);
2236 + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
2237 + ip->i_di.di_goal_dblk =
2238 + ip->i_di.di_goal_mblk =
2239 + ip->i_num.no_addr - rgd->rd_ri.ri_data1;
2242 + ip->i_di.di_size = size;
2243 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2245 + gfs_trans_add_bh(ip->i_gl, dibh);
2247 + if (!ip->i_di.di_height &&
2248 + size < sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))
2249 + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode) + size);
2251 + gfs_dinode_out(&ip->i_di, dibh->b_data);
2255 + gfs_trans_end(sdp);
2259 + gfs_glock_dq_uninit(&ri_gh);
2265 + * do_same - truncate to same size (update time stamps)
2272 +do_same(struct gfs_inode *ip)
2274 + struct gfs_sbd *sdp = ip->i_sbd;
2275 + struct buffer_head *dibh;
2278 + error = gfs_trans_begin(sdp, 1, 0);
2282 + error = gfs_get_inode_buffer(ip, &dibh);
2286 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
2288 + gfs_trans_add_bh(ip->i_gl, dibh);
2289 + gfs_dinode_out(&ip->i_di, dibh->b_data);
2294 + gfs_trans_end(sdp);
2300 + * gfs_truncatei - make a file a give size
2302 + * @size: the size to make the file
2303 + * @truncator: function to truncate the last partial block
2305 + * The file size can grow, shrink, or stay the same size.
2307 + * Returns: 0 on success, -EXXX on failure
2311 +gfs_truncatei(struct gfs_inode *ip, uint64_t size,
2312 + gfs_truncator_t truncator)
2314 + GFS_ASSERT_INODE(ip->i_di.di_type == GFS_FILE_REG, ip,);
2316 + if (size == ip->i_di.di_size)
2317 + return do_same(ip);
2318 + else if (size > ip->i_di.di_size)
2319 + return do_grow(ip, size);
2321 + return gfs_shrink(ip, size, truncator);
2325 + * gfs_write_calc_reserv - calculate the number of blocks needed to write to a file
2327 + * @len: the number of bytes to be written to the file
2328 + * @data_blocks: returns the number of data blocks required
2329 + * @ind_blocks: returns the number of indirect blocks required
2334 +gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len,
2335 + unsigned int *data_blocks, unsigned int *ind_blocks)
2337 + struct gfs_sbd *sdp = ip->i_sbd;
2340 + if (gfs_is_jdata(ip)) {
2341 + *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
2342 + *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
2344 + *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
2345 + *ind_blocks = 3 * (sdp->sd_max_height - 1);
2348 + for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
2349 + tmp = DIV_RU(tmp, sdp->sd_inptrs);
2350 + *ind_blocks += tmp;
2355 + * gfs_write_alloc_required - figure out if a write is going to require an allocation
2356 + * @ip: the file being written to
2357 + * @offset: the offset to write to
2358 + * @len: the number of bytes being written
2359 + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise
2361 + * Returns: 0 on success, -EXXX on error
2365 +gfs_write_alloc_required(struct gfs_inode *ip,
2366 + uint64_t offset, unsigned int len,
2367 + int *alloc_required)
2369 + struct gfs_sbd *sdp = ip->i_sbd;
2370 + uint64_t lblock, lblock_stop, dblock;
2372 + int not_new = FALSE;
2375 + *alloc_required = FALSE;
2380 + if (gfs_is_stuffed(ip)) {
2381 + if (offset + len > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))
2382 + *alloc_required = TRUE;
2386 + if (gfs_is_jdata(ip)) {
2387 + unsigned int bsize = sdp->sd_jbsize;
2389 + do_div(lblock, bsize);
2390 + lblock_stop = offset + len + bsize - 1;
2391 + do_div(lblock_stop, bsize);
2393 + unsigned int shift = sdp->sd_sb.sb_bsize_shift;
2394 + lblock = offset >> shift;
2395 + lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
2398 + for (; lblock < lblock_stop; lblock += extlen) {
2399 + error = gfs_block_map(ip, lblock, ¬_new, &dblock, &extlen);
2404 + *alloc_required = TRUE;
2413 + * do_gfm - Copy out the dinode/indirect blocks of a file
2415 + * @dibh: the dinode buffer
2416 + * @bh: the indirect buffer we're looking at
2417 + * @top: the first pointer in the block
2418 + * @bottom: one more than the last pointer in the block
2419 + * @height: the height the block is at
2420 + * @data: a pointer to a struct gfs_user_buffer structure
2422 + * If this is a journaled file, copy out the data too.
2424 + * Returns: 0 on success, -EXXX on failure
2428 +do_gfm(struct gfs_inode *ip, struct buffer_head *dibh,
2429 + struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
2430 + unsigned int height, void *data)
2432 + struct gfs_sbd *sdp = ip->i_sbd;
2433 + struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
2434 + struct buffer_head *data_bh;
2438 + error = gfs_add_bh_to_ub(ub, bh);
2442 + if (ip->i_di.di_type != GFS_FILE_DIR ||
2443 + height + 1 != ip->i_di.di_height)
2446 + for (bp = top; bp < bottom; bp++)
2448 + bn = gfs64_to_cpu(*bp);
2450 + error = gfs_dread(sdp, bn, ip->i_gl,
2451 + DIO_START | DIO_WAIT, &data_bh);
2455 + error = gfs_add_bh_to_ub(ub, data_bh);
2467 + * gfs_get_file_meta - return all the metadata for a file
2469 + * @ub: the structure representing the meta
2471 + * Returns: 0 on success, -EXXX on failure
2475 +gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
2477 + struct buffer_head *dibh;
2478 + struct metapath *mp;
2481 + if (gfs_is_stuffed(ip)) {
2482 + error = gfs_get_inode_buffer(ip, &dibh);
2484 + error = gfs_add_bh_to_ub(ub, dibh);
2488 + mp = find_metapath(ip, 0);
2489 + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub);
2495 diff -urN linux-orig/fs/gfs/bmap.h linux-patched/fs/gfs/bmap.h
2496 --- linux-orig/fs/gfs/bmap.h 1969-12-31 18:00:00.000000000 -0600
2497 +++ linux-patched/fs/gfs/bmap.h 2004-06-30 13:27:49.333713450 -0500
2499 +/******************************************************************************
2500 +*******************************************************************************
2502 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2503 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2505 +** This copyrighted material is made available to anyone wishing to use,
2506 +** modify, copy, or redistribute it subject to the terms and conditions
2507 +** of the GNU General Public License v.2.
2509 +*******************************************************************************
2510 +******************************************************************************/
2512 +#ifndef __BMAP_DOT_H__
2513 +#define __BMAP_DOT_H__
2515 +typedef int (*gfs_unstuffer_t) (struct gfs_inode * ip,
2516 + struct buffer_head * dibh, uint64_t block,
2519 +int gfs_unstuffer_sync(struct gfs_inode *ip, struct buffer_head *dibh,
2520 + uint64_t block, void *private);
2521 +int gfs_unstuffer_async(struct gfs_inode *ip, struct buffer_head *dibh,
2522 + uint64_t block, void *private);
2524 +int gfs_unstuff_dinode(struct gfs_inode *ip, gfs_unstuffer_t unstuffer,
2527 +int gfs_block_map(struct gfs_inode *ip,
2528 + uint64_t lblock, int *new,
2529 + uint64_t *dblock, uint32_t *extlen);
2531 +typedef int (*gfs_truncator_t) (struct gfs_inode * ip, uint64_t size);
2533 +int gfs_truncator_default(struct gfs_inode *ip, uint64_t size);
2535 +int gfs_shrink(struct gfs_inode *ip, uint64_t size, gfs_truncator_t truncator);
2536 +int gfs_truncatei(struct gfs_inode *ip, uint64_t size,
2537 + gfs_truncator_t truncator);
2539 +void gfs_write_calc_reserv(struct gfs_inode *ip, unsigned int len,
2540 + unsigned int *data_blocks, unsigned int *ind_blocks);
2541 +int gfs_write_alloc_required(struct gfs_inode *ip, uint64_t offset,
2542 + unsigned int len, int *alloc_required);
2544 +int gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
2546 +#endif /* __BMAP_DOT_H__ */
2547 diff -urN linux-orig/fs/gfs/daemon.c linux-patched/fs/gfs/daemon.c
2548 --- linux-orig/fs/gfs/daemon.c 1969-12-31 18:00:00.000000000 -0600
2549 +++ linux-patched/fs/gfs/daemon.c 2004-06-30 13:27:49.333713450 -0500
2551 +/******************************************************************************
2552 +*******************************************************************************
2554 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2555 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2557 +** This copyrighted material is made available to anyone wishing to use,
2558 +** modify, copy, or redistribute it subject to the terms and conditions
2559 +** of the GNU General Public License v.2.
2561 +*******************************************************************************
2562 +******************************************************************************/
2564 +#include <linux/sched.h>
2565 +#include <linux/slab.h>
2566 +#include <linux/smp_lock.h>
2567 +#include <linux/spinlock.h>
2568 +#include <asm/semaphore.h>
2569 +#include <linux/completion.h>
2570 +#include <linux/buffer_head.h>
2573 +#include "daemon.h"
2577 +#include "recovery.h"
2579 +#include "unlinked.h"
2582 + * gfs_scand - Writing of cached scan chanes into the scan file
2583 + * @sdp: Pointer to GFS superblock
2588 +gfs_scand(void *data)
2590 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2592 + daemonize("gfs_scand");
2593 + sdp->sd_scand_process = current;
2594 + set_bit(SDF_SCAND_RUN, &sdp->sd_flags);
2595 + complete(&sdp->sd_thread_completion);
2598 + gfs_scand_internal(sdp);
2600 + if (!test_bit(SDF_SCAND_RUN, &sdp->sd_flags))
2603 + current->state = TASK_INTERRUPTIBLE;
2604 + schedule_timeout(sdp->sd_tune.gt_scand_secs * HZ);
2607 + down(&sdp->sd_thread_lock);
2608 + up(&sdp->sd_thread_lock);
2610 + complete(&sdp->sd_thread_completion);
2616 + * gfs_glockd - Writing of cached scan chanes into the scan file
2617 + * @sdp: Pointer to GFS superblock
2622 +gfs_glockd(void *data)
2624 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2626 + daemonize("gfs_glockd");
2627 + set_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
2628 + complete(&sdp->sd_thread_completion);
2631 + while (atomic_read(&sdp->sd_reclaim_count))
2632 + gfs_reclaim_glock(sdp);
2634 + if (!test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags))
2638 + DECLARE_WAITQUEUE(__wait_chan, current);
2639 + current->state = TASK_INTERRUPTIBLE;
2640 + add_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan);
2641 + if (!atomic_read(&sdp->sd_reclaim_count)
2642 + && test_bit(SDF_GLOCKD_RUN, &sdp->sd_flags))
2644 + remove_wait_queue(&sdp->sd_reclaim_wchan, &__wait_chan);
2645 + current->state = TASK_RUNNING;
2649 + complete(&sdp->sd_thread_completion);
2655 + * gfs_recoverd - Recovery of dead machine's journals
2656 + * @sdp: Pointer to GFS superblock
2661 +gfs_recoverd(void *data)
2663 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2665 + daemonize("gfs_recoverd");
2666 + sdp->sd_recoverd_process = current;
2667 + set_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
2668 + complete(&sdp->sd_thread_completion);
2671 + gfs_check_journals(sdp);
2673 + if (!test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags))
2676 + current->state = TASK_INTERRUPTIBLE;
2677 + schedule_timeout(sdp->sd_tune.gt_recoverd_secs * HZ);
2680 + down(&sdp->sd_thread_lock);
2681 + up(&sdp->sd_thread_lock);
2683 + complete(&sdp->sd_thread_completion);
2689 + * gfs_logd - Writing of cached log chanes into the log file
2690 + * @sdp: Pointer to GFS superblock
2695 +gfs_logd(void *data)
2697 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2698 + struct gfs_holder ji_gh;
2700 + daemonize("gfs_logd");
2701 + sdp->sd_logd_process = current;
2702 + set_bit(SDF_LOGD_RUN, &sdp->sd_flags);
2703 + complete(&sdp->sd_thread_completion);
2706 + gfs_ail_empty(sdp);
2708 + if (time_after_eq(jiffies,
2709 + sdp->sd_jindex_refresh_time +
2710 + sdp->sd_tune.gt_jindex_refresh_secs * HZ)) {
2711 + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags) &&
2712 + !gfs_jindex_hold(sdp, &ji_gh))
2713 + gfs_glock_dq_uninit(&ji_gh);
2714 + sdp->sd_jindex_refresh_time = jiffies;
2717 + if (!test_bit(SDF_LOGD_RUN, &sdp->sd_flags))
2720 + current->state = TASK_INTERRUPTIBLE;
2721 + schedule_timeout(sdp->sd_tune.gt_logd_secs * HZ);
2724 + down(&sdp->sd_thread_lock);
2725 + up(&sdp->sd_thread_lock);
2727 + complete(&sdp->sd_thread_completion);
2733 + * gfs_quotad - Writing of cached quota chanes into the quota file
2734 + * @sdp: Pointer to GFS superblock
2739 +gfs_quotad(void *data)
2741 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2744 + daemonize("gfs_quotad");
2745 + sdp->sd_quotad_process = current;
2746 + set_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
2747 + complete(&sdp->sd_thread_completion);
2750 + if (time_after_eq(jiffies,
2751 + sdp->sd_quota_sync_time +
2752 + sdp->sd_tune.gt_quota_quantum * HZ)) {
2753 + error = gfs_quota_sync(sdp);
2754 + if (error && error != -EROFS)
2755 + printk("GFS: fsid=%s: quotad: error = %d\n",
2756 + sdp->sd_fsname, error);
2757 + sdp->sd_quota_sync_time = jiffies;
2760 + gfs_quota_scan(sdp);
2762 + if (!test_bit(SDF_QUOTAD_RUN, &sdp->sd_flags))
2765 + current->state = TASK_INTERRUPTIBLE;
2766 + schedule_timeout(sdp->sd_tune.gt_quotad_secs * HZ);
2769 + down(&sdp->sd_thread_lock);
2770 + up(&sdp->sd_thread_lock);
2772 + complete(&sdp->sd_thread_completion);
2778 + * gfs_inoded - Deallocation of unlinked inodes
2779 + * @sdp: Pointer to GFS superblock
2784 +gfs_inoded(void *data)
2786 + struct gfs_sbd *sdp = (struct gfs_sbd *)data;
2788 + daemonize("gfs_inoded");
2789 + sdp->sd_inoded_process = current;
2790 + set_bit(SDF_INODED_RUN, &sdp->sd_flags);
2791 + complete(&sdp->sd_thread_completion);
2794 + gfs_unlinked_dealloc(sdp);
2796 + if (!test_bit(SDF_INODED_RUN, &sdp->sd_flags))
2799 + current->state = TASK_INTERRUPTIBLE;
2800 + schedule_timeout(sdp->sd_tune.gt_inoded_secs * HZ);
2803 + down(&sdp->sd_thread_lock);
2804 + up(&sdp->sd_thread_lock);
2806 + complete(&sdp->sd_thread_completion);
2810 diff -urN linux-orig/fs/gfs/daemon.h linux-patched/fs/gfs/daemon.h
2811 --- linux-orig/fs/gfs/daemon.h 1969-12-31 18:00:00.000000000 -0600
2812 +++ linux-patched/fs/gfs/daemon.h 2004-06-30 13:27:49.334713218 -0500
2814 +/******************************************************************************
2815 +*******************************************************************************
2817 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2818 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2820 +** This copyrighted material is made available to anyone wishing to use,
2821 +** modify, copy, or redistribute it subject to the terms and conditions
2822 +** of the GNU General Public License v.2.
2824 +*******************************************************************************
2825 +******************************************************************************/
2827 +#ifndef __DAEMON_DOT_H__
2828 +#define __DAEMON_DOT_H__
2830 +int gfs_scand(void *data);
2831 +int gfs_glockd(void *data);
2832 +int gfs_recoverd(void *data);
2833 +int gfs_logd(void *data);
2834 +int gfs_quotad(void *data);
2835 +int gfs_inoded(void *data);
2837 +#endif /* __DAEMON_DOT_H__ */
2838 diff -urN linux-orig/fs/gfs/dio.c linux-patched/fs/gfs/dio.c
2839 --- linux-orig/fs/gfs/dio.c 1969-12-31 18:00:00.000000000 -0600
2840 +++ linux-patched/fs/gfs/dio.c 2004-06-30 13:27:49.334713218 -0500
2842 +/******************************************************************************
2843 +*******************************************************************************
2845 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2846 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2848 +** This copyrighted material is made available to anyone wishing to use,
2849 +** modify, copy, or redistribute it subject to the terms and conditions
2850 +** of the GNU General Public License v.2.
2852 +*******************************************************************************
2853 +******************************************************************************/
2855 +#include <linux/sched.h>
2856 +#include <linux/slab.h>
2857 +#include <linux/smp_lock.h>
2858 +#include <linux/spinlock.h>
2859 +#include <asm/semaphore.h>
2860 +#include <linux/completion.h>
2861 +#include <linux/buffer_head.h>
2862 +#include <linux/mm.h>
2863 +#include <linux/pagemap.h>
2864 +#include <linux/writeback.h>
2876 +#define buffer_busy(bh) ((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
2879 + * aspace_get_block -
2885 + * Returns: 0 on success, -EXXX on failure
2889 +aspace_get_block(struct inode *inode, sector_t lblock,
2890 + struct buffer_head *bh_result, int create)
2892 + struct gfs_sbd *sdp = vfs2sdp(inode->i_sb);
2893 + GFS_ASSERT_SBD(FALSE, sdp,);
2897 + * gfs_aspace_writepage - write an aspace page
2901 + * Returns: 0 on success, -EXXX on failure
2905 +gfs_aspace_writepage(struct page *page, struct writeback_control *wbc)
2907 + return block_write_full_page(page, aspace_get_block, wbc);
2911 + * stuck_releasepage - We're stuck in gfs_releasepage(). Print stuff out.
2912 + * @bh: the buffer we're stuck on
2917 +stuck_releasepage(struct buffer_head *bh)
2919 + struct gfs_sbd *sdp = vfs2sdp(bh->b_page->mapping->host->i_sb);
2920 + struct gfs_bufdata *bd = bh2bd(bh);
2922 + printk("GFS: fsid=%s: stuck in gfs_releasepage()...\n", sdp->sd_fsname);
2923 + printk("GFS: fsid=%s: blkno = %"PRIu64", bh->b_count = %d\n",
2925 + (uint64_t)bh->b_blocknr,
2926 + atomic_read(&bh->b_count));
2927 + printk("GFS: fsid=%s: bh2bd(bh) = %s\n",
2929 + (bd) ? "!NULL" : "NULL");
2932 + struct gfs_glock *gl = bd->bd_gl;
2934 + printk("GFS: fsid=%s: gl = (%u, %"PRIu64")\n",
2936 + gl->gl_name.ln_type,
2937 + gl->gl_name.ln_number);
2939 + printk("GFS: fsid=%s: bd_new_le.le_trans = %s\n",
2941 + (bd->bd_new_le.le_trans) ? "!NULL" : "NULL");
2942 + printk("GFS: fsid=%s: bd_incore_le.le_trans = %s\n",
2944 + (bd->bd_incore_le.le_trans) ? "!NULL" : "NULL");
2945 + printk("GFS: fsid=%s: bd_frozen = %s\n",
2947 + (bd->bd_frozen) ? "!NULL" : "NULL");
2948 + printk("GFS: fsid=%s: bd_pinned = %u\n",
2949 + sdp->sd_fsname, bd->bd_pinned);
2950 + printk("GFS: fsid=%s: bd_ail_tr_list = %s\n",
2952 + (list_empty(&bd->bd_ail_tr_list)) ? "Empty" : "!Empty");
2954 + if (gl->gl_ops == &gfs_inode_glops) {
2955 + struct gfs_inode *ip = gl2ip(gl);
2960 + printk("GFS: fsid=%s: ip = %"PRIu64"/%"PRIu64"\n",
2962 + ip->i_num.no_formal_ino,
2963 + ip->i_num.no_addr);
2964 + printk("GFS: fsid=%s: ip->i_count = %d, ip->i_vnode = %s\n",
2966 + atomic_read(&ip->i_count),
2967 + (ip->i_vnode) ? "!NULL" : "NULL");
2968 + for (x = 0; x < GFS_MAX_META_HEIGHT; x++)
2969 + printk("GFS: fsid=%s: ip->i_cache[%u] = %s\n",
2970 + sdp->sd_fsname, x,
2971 + (ip->i_cache[x]) ? "!NULL" : "NULL");
2978 + * gfs_aspace_releasepage - free the metadata associated with a page
2979 + * @page: the page that's being released
2980 + * @gfp_mask: huh??
2982 + * Call try_to_free_buffers() if the buffers in this page can be
2989 +gfs_aspace_releasepage(struct page *page, int gfp_mask)
2991 + struct inode *aspace = page->mapping->host;
2992 + struct gfs_sbd *sdp = vfs2sdp(aspace->i_sb);
2993 + struct buffer_head *bh, *head;
2994 + struct gfs_bufdata *bd;
2997 + if (!page_has_buffers(page))
3000 + head = bh = page_buffers(page);
3004 + while (atomic_read(&bh->b_count)) {
3005 + if (atomic_read(&aspace->i_writecount)) {
3006 + if (time_after_eq(jiffies,
3008 + sdp->sd_tune.gt_stall_secs * HZ)) {
3009 + stuck_releasepage(bh);
3022 + GFS_ASSERT_SBD(bd->bd_bh == bh, sdp,);
3023 + GFS_ASSERT_SBD(!bd->bd_new_le.le_trans, sdp,);
3024 + GFS_ASSERT_SBD(!bd->bd_incore_le.le_trans, sdp,);
3025 + GFS_ASSERT_SBD(!bd->bd_frozen, sdp,);
3026 + GFS_ASSERT_SBD(!bd->bd_pinned, sdp,);
3027 + GFS_ASSERT_SBD(list_empty(&bd->bd_ail_tr_list), sdp,);
3028 + kmem_cache_free(gfs_bufdata_cachep, bd);
3029 + atomic_dec(&sdp->sd_bufdata_count);
3033 + bh = bh->b_this_page;
3035 + while (bh != head);
3038 + return try_to_free_buffers(page);
3041 +static struct address_space_operations aspace_aops = {
3042 + .writepage = gfs_aspace_writepage,
3043 + .releasepage = gfs_aspace_releasepage,
3047 + * gfs_aspace_get - Get and initialize a struct inode structure
3048 + * @sdp: the filesystem the aspace is in
3050 + * Right now a struct inode is just a struct inode. Maybe Linux
3051 + * will supply a more lightweight address space construct (that works)
3054 + * Make sure pages/buffers in this aspace aren't in high memory.
3056 + * Returns: the aspace
3060 +gfs_aspace_get(struct gfs_sbd *sdp)
3062 + struct inode *aspace;
3064 + aspace = new_inode(sdp->sd_vfs);
3066 + mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL);
3067 + aspace->i_mapping->a_ops = &aspace_aops;
3068 + aspace->i_size = ~0ULL;
3069 + vn2ip(aspace) = NULL;
3070 + insert_inode_hash(aspace);
3077 + * gfs_aspace_put - get rid of an aspace
3083 +gfs_aspace_put(struct inode *aspace)
3085 + remove_inode_hash(aspace);
3090 + * gfs_ail_start_trans - Start I/O on a part of the AIL
3091 + * @sdp: the filesystem
3092 + * @tr: the part of the AIL
3097 +gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
3099 + struct list_head *head, *tmp, *prev;
3100 + struct gfs_bufdata *bd;
3101 + struct buffer_head *bh;
3107 + spin_lock(&sdp->sd_ail_lock);
3109 + for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
3111 + tmp = prev, prev = tmp->prev) {
3112 + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3115 + if (gfs_trylock_buffer(bh))
3118 + if (bd->bd_pinned) {
3119 + gfs_unlock_buffer(bh);
3123 + if (!buffer_busy(bh)) {
3124 + if (!buffer_uptodate(bh))
3125 + gfs_io_error_bh(sdp, bh);
3127 + list_del_init(&bd->bd_ail_tr_list);
3128 + list_del(&bd->bd_ail_gl_list);
3130 + gfs_unlock_buffer(bh);
3135 + if (buffer_dirty(bh)) {
3136 + list_move(&bd->bd_ail_tr_list, head);
3138 + spin_unlock(&sdp->sd_ail_lock);
3139 + wait_on_buffer(bh);
3140 + ll_rw_block(WRITE, 1, &bh);
3141 + spin_lock(&sdp->sd_ail_lock);
3143 + gfs_unlock_buffer(bh);
3148 + gfs_unlock_buffer(bh);
3151 + spin_unlock(&sdp->sd_ail_lock);
3156 + * gfs_ail_empty_trans - Check whether or not a trans in the AIL has been synced
3157 + * @sdp: the filesystem
3158 + * @tr: the transaction
3163 +gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
3165 + struct list_head *head, *tmp, *prev;
3166 + struct gfs_bufdata *bd;
3167 + struct buffer_head *bh;
3170 + spin_lock(&sdp->sd_ail_lock);
3172 + for (head = &tr->tr_ail_bufs, tmp = head->prev, prev = tmp->prev;
3174 + tmp = prev, prev = tmp->prev) {
3175 + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3178 + if (gfs_trylock_buffer(bh))
3181 + if (bd->bd_pinned || buffer_busy(bh)) {
3182 + gfs_unlock_buffer(bh);
3186 + if (!buffer_uptodate(bh))
3187 + gfs_io_error_bh(sdp, bh);
3189 + list_del_init(&bd->bd_ail_tr_list);
3190 + list_del(&bd->bd_ail_gl_list);
3192 + gfs_unlock_buffer(bh);
3196 + ret = list_empty(head);
3198 + spin_unlock(&sdp->sd_ail_lock);
3204 + * ail_empty_gl - remove all buffers for a given lock from the AIL
3207 + * None of the buffers should be dirty, locked, or pinned.
3211 +ail_empty_gl(struct gfs_glock *gl)
3213 + struct gfs_sbd *sdp = gl->gl_sbd;
3214 + struct gfs_bufdata *bd;
3215 + struct buffer_head *bh;
3217 + spin_lock(&sdp->sd_ail_lock);
3219 + while (!list_empty(&gl->gl_ail_bufs)) {
3220 + bd = list_entry(gl->gl_ail_bufs.next,
3221 + struct gfs_bufdata, bd_ail_gl_list);
3224 + GFS_ASSERT_GLOCK(!bd->bd_pinned && !buffer_busy(bh), gl,
3225 + printk("%u %.8lX\n", bd->bd_pinned, bh->b_state););
3226 + if (!buffer_uptodate(bh))
3227 + gfs_io_error_bh(sdp, bh);
3229 + list_del_init(&bd->bd_ail_tr_list);
3230 + list_del(&bd->bd_ail_gl_list);
3235 + spin_unlock(&sdp->sd_ail_lock);
3239 + * gfs_inval_buf - Invalidate all buffers associated with a glock
3245 +gfs_inval_buf(struct gfs_glock *gl)
3247 + struct inode *aspace = gl->gl_aspace;
3248 + struct address_space *mapping = gl->gl_aspace->i_mapping;
3252 + atomic_inc(&aspace->i_writecount);
3253 + truncate_inode_pages(mapping, 0);
3254 + atomic_dec(&aspace->i_writecount);
3256 + GFS_ASSERT_GLOCK(!mapping->nrpages, gl,);
3260 + * gfs_sync_buf - Sync all buffers associated with a glock
3262 + * @flags: DIO_START | DIO_WAIT
3267 +gfs_sync_buf(struct gfs_glock *gl, int flags)
3269 + struct address_space *mapping = gl->gl_aspace->i_mapping;
3272 + if (flags & DIO_START)
3273 + error = filemap_fdatawrite(mapping);
3274 + if (!error && (flags & DIO_WAIT))
3275 + error = filemap_fdatawait(mapping);
3276 + if (!error && (flags & (DIO_INVISIBLE | DIO_CHECK)) == DIO_CHECK)
3280 + gfs_io_error(gl->gl_sbd);
3284 + * getbuf - Get a buffer with a given address space
3285 + * @sdp: the filesystem
3286 + * @aspace: the address space
3287 + * @blkno: the block number
3288 + * @create: TRUE if the buffer should be created
3290 + * Returns: the buffer
3293 +static struct buffer_head *
3294 +getbuf(struct gfs_sbd *sdp, struct inode *aspace, uint64_t blkno, int create)
3296 + struct page *page;
3297 + struct buffer_head *bh;
3298 + unsigned int shift;
3299 + unsigned long index;
3300 + unsigned int bufnum;
3302 + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
3303 + index = blkno >> shift;
3304 + bufnum = blkno - (index << shift);
3307 + RETRY_MALLOC(page = grab_cache_page(aspace->i_mapping, index), page);
3309 + page = find_lock_page(aspace->i_mapping, index);
3314 + if (!page_has_buffers(page))
3315 + create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
3317 + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
3321 + if (!buffer_mapped(bh))
3322 + map_bh(bh, sdp->sd_vfs, blkno);
3324 + GFS_ASSERT_SBD(bh->b_bdev == sdp->sd_vfs->s_bdev &&
3325 + bh->b_blocknr == blkno,
3328 + unlock_page(page);
3329 + page_cache_release(page);
3335 + * gfs_dgetblk - Get a block
3336 + * @sdp: The GFS superblock
3337 + * @blkno: The block number
3338 + * @gl: The glock associated with this block
3340 + * Returns: The buffer
3343 +struct buffer_head *
3344 +gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl)
3346 + struct buffer_head *bh;
3349 + bh = getbuf(sdp, gl->gl_aspace, blkno, CREATE);
3351 + bh = sb_getblk(sdp->sd_vfs, blkno);
3357 + * gfs_dread - Read a block from disk
3358 + * @sdp: The GFS superblock
3359 + * @blkno: The block number
3360 + * @gl: The glock covering the block
3361 + * @flags: flags to gfs_dreread()
3362 + * @bhp: the place where the buffer is returned
3364 + * Returns: The buffer on success, NULL on failur
3368 +gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl, int flags,
3369 + struct buffer_head **bhp)
3373 + *bhp = gfs_dgetblk(sdp, blkno, gl);
3374 + error = gfs_dreread(sdp, *bhp, flags);
3382 + * gfs_prep_new_buffer - Mark a new buffer we just gfs_dgetblk()ed uptodate
3388 +gfs_prep_new_buffer(struct buffer_head *bh)
3390 + wait_on_buffer(bh);
3391 + clear_buffer_dirty(bh);
3392 + set_buffer_uptodate(bh);
3396 + * gfs_dreread - Reread a block from disk
3397 + * @sdp: the filesystem
3398 + * @bh: The block to read
3399 + * @flags: Flags that control the read
3401 + * Returns: 0 on success, -EXXX on failure
3405 +gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
3409 + if (flags & DIO_NEW) {
3410 + if (gfs_mhc_fish(sdp, bh))
3412 + clear_buffer_uptodate(bh);
3415 + if (flags & DIO_FORCE)
3416 + clear_buffer_uptodate(bh);
3418 + if ((flags & DIO_START) && !buffer_uptodate(bh))
3419 + ll_rw_block(READ, 1, &bh);
3421 + if (flags & DIO_WAIT) {
3422 + wait_on_buffer(bh);
3424 + if (!buffer_uptodate(bh)) {
3425 + gfs_io_error_bh(sdp, bh);
3434 + * gfs_dwrite - Write a buffer
3435 + * @sdp: the filesystem
3436 + * @bh: The buffer to write
3437 + * @flags: The type of write operation to do
3439 + * Returns: 0 on success, -EXXX on failure
3443 +gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags)
3447 + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3448 + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
3450 + if (flags & DIO_CLEAN) {
3452 + clear_buffer_dirty(bh);
3453 + unlock_buffer(bh);
3456 + if (flags & DIO_DIRTY)
3457 + mark_buffer_dirty(bh);
3459 + if ((flags & DIO_START) && buffer_dirty(bh)) {
3460 + wait_on_buffer(bh);
3461 + ll_rw_block(WRITE, 1, &bh);
3464 + if (flags & DIO_WAIT) {
3465 + wait_on_buffer(bh);
3467 + if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
3468 + gfs_io_error_bh(sdp, bh);
3477 + * gfs_attach_bufdata - attach a struct gfs_bufdata structure to a buffer
3478 + * @bh: The buffer to be attached to
3479 + * @gl: the glock the buffer belongs to
3484 +gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl)
3486 + struct gfs_bufdata *bd;
3488 + lock_page(bh->b_page);
3491 + unlock_page(bh->b_page);
3495 + RETRY_MALLOC(bd = kmem_cache_alloc(gfs_bufdata_cachep, GFP_KERNEL), bd);
3496 + atomic_inc(&gl->gl_sbd->sd_bufdata_count);
3498 + memset(bd, 0, sizeof(struct gfs_bufdata));
3503 + INIT_LE(&bd->bd_new_le, &gfs_buf_lops);
3504 + INIT_LE(&bd->bd_incore_le, &gfs_buf_lops);
3506 + init_MUTEX(&bd->bd_lock);
3508 + INIT_LIST_HEAD(&bd->bd_ail_tr_list);
3512 + unlock_page(bh->b_page);
3516 + * gfs_is_pinned - Figure out if a buffer is pinned or not
3517 + * @sdp: the filesystem the buffer belongs to
3518 + * @bh: The buffer to be pinned
3520 + * Returns: TRUE if the buffer is pinned, FALSE otherwise
3524 +gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh)
3526 + struct gfs_bufdata *bd = bh2bd(bh);
3530 + gfs_lock_buffer(bh);
3531 + if (bd->bd_pinned)
3533 + gfs_unlock_buffer(bh);
3540 + * gfs_dpin - Pin a metadata buffer in memory
3541 + * @sdp: the filesystem the buffer belongs to
3542 + * @bh: The buffer to be pinned
3547 +gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh)
3549 + struct gfs_bufdata *bd;
3552 + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3553 + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
3556 + GFS_ASSERT_SBD(bd, sdp,);
3558 + gfs_lock_buffer(bh);
3560 + GFS_ASSERT_GLOCK(!bd->bd_frozen, bd->bd_gl,);
3562 + if (!bd->bd_pinned++) {
3563 + wait_on_buffer(bh);
3565 + /* If this buffer is in the AIL and it has already been written,
3566 + remove it from the AIL. */
3568 + spin_lock(&sdp->sd_ail_lock);
3569 + if (!list_empty(&bd->bd_ail_tr_list) && !buffer_busy(bh)) {
3570 + list_del_init(&bd->bd_ail_tr_list);
3571 + list_del(&bd->bd_ail_gl_list);
3574 + spin_unlock(&sdp->sd_ail_lock);
3576 + clear_buffer_dirty(bh);
3577 + wait_on_buffer(bh);
3579 + if (!buffer_uptodate(bh))
3580 + gfs_io_error_bh(sdp, bh);
3582 + gfs_unlock_buffer(bh);
3584 + data = gmalloc(sdp->sd_sb.sb_bsize);
3586 + gfs_lock_buffer(bh);
3587 + if (bd->bd_pinned > 1) {
3588 + memcpy(data, bh->b_data, sdp->sd_sb.sb_bsize);
3589 + bd->bd_frozen = data;
3594 + gfs_unlock_buffer(bh);
3600 + * gfs_dunpin - Unpin a buffer
3601 + * @sdp: the filesystem the buffer belongs to
3602 + * @bh: The buffer to unpin
3603 + * @tr: The transaction in the AIL that contains this buffer
3608 +gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh, struct gfs_trans *tr)
3610 + struct gfs_bufdata *bd;
3612 + GFS_ASSERT_SBD(buffer_uptodate(bh), sdp,);
3615 + GFS_ASSERT_SBD(bd, sdp,);
3617 + gfs_lock_buffer(bh);
3619 + GFS_ASSERT_GLOCK(bd->bd_pinned, bd->bd_gl,);
3621 + if (bd->bd_pinned == 1)
3622 + mark_buffer_dirty(bh);
3626 + gfs_unlock_buffer(bh);
3628 + /* Add the buffer to the AIL
3629 + and get rid of an old reference if there is one */
3632 + spin_lock(&sdp->sd_ail_lock);
3634 + if (list_empty(&bd->bd_ail_tr_list))
3635 + list_add(&bd->bd_ail_gl_list, &bd->bd_gl->gl_ail_bufs);
3637 + list_del_init(&bd->bd_ail_tr_list);
3640 + list_add(&bd->bd_ail_tr_list, &tr->tr_ail_bufs);
3642 + spin_unlock(&sdp->sd_ail_lock);
3648 + * logbh_end_io - called at the end of a logbh write
3650 + * @uptodate: whether or not the write succeeded
3652 + * Don't do ENTER() AND EXIT() here.
3657 +logbh_end_io(struct buffer_head *bh, int uptodate)
3660 + set_buffer_uptodate(bh);
3662 + clear_buffer_uptodate(bh);
3663 + unlock_buffer(bh);
3667 + * gfs_logbh_init - Initialize a fake buffer head
3668 + * @sdp: the filesystem
3669 + * @bh: the buffer to initialize
3670 + * @blkno: the block address of the buffer
3671 + * @data: the data to be written
3676 +gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh,
3677 + uint64_t blkno, char *data)
3679 + memset(bh, 0, sizeof(struct buffer_head));
3680 + bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock);
3681 + atomic_set(&bh->b_count, 1);
3682 + set_bh_page(bh, virt_to_page(data), ((unsigned long)data) & (PAGE_SIZE - 1));
3683 + bh->b_blocknr = blkno;
3684 + bh->b_size = sdp->sd_sb.sb_bsize;
3685 + bh->b_bdev = sdp->sd_vfs->s_bdev;
3686 + init_buffer(bh, logbh_end_io, NULL);
3687 + INIT_LIST_HEAD(&bh->b_assoc_buffers);
3691 + * gfs_logbh_uninit - Clean up a fake buffer head
3692 + * @sdp: the filesystem
3693 + * @bh: the buffer to clean
3698 +gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh)
3700 + GFS_ASSERT_SBD(!buffer_busy(bh) &&
3701 + atomic_read(&bh->b_count) == 1,
3706 + * gfs_logbh_start - Start writing a fake buffer head
3707 + * @sdp: the filesystem
3708 + * @bh: the buffer to write
3710 + * Returns: 0 on success, -EXXX on error;
3714 +gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh)
3716 + submit_bh(WRITE, bh);
3721 + * gfs_logbh_wait - Wait for the write of a fake buffer head to complete
3722 + * @sdp: the filesystem
3723 + * @bh: the buffer to write
3725 + * Returns: 0 on success, -EXXX on error;
3729 +gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh)
3733 + wait_on_buffer(bh);
3735 + if (!buffer_uptodate(bh) || buffer_dirty(bh)) {
3736 + gfs_io_error_bh(sdp, bh);
3744 + * gfs_replay_buf - write a log buffer to its inplace location
3745 + * @gl: the journal's glock
3748 + * Returns: 0 on success, -EXXX on failure
3752 +gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh)
3754 + struct gfs_sbd *sdp = gl->gl_sbd;
3755 + struct gfs_bufdata *bd;
3759 + gfs_attach_bufdata(bh, gl);
3763 + mark_buffer_dirty(bh);
3765 + if (list_empty(&bd->bd_ail_tr_list)) {
3767 + list_add(&bd->bd_ail_tr_list, &sdp->sd_recovery_bufs);
3774 + * gfs_replay_check - Check up on journal replay
3775 + * @sdp: the filesystem
3780 +gfs_replay_check(struct gfs_sbd *sdp)
3782 + struct buffer_head *bh;
3783 + struct gfs_bufdata *bd;
3785 + while (!list_empty(&sdp->sd_recovery_bufs)) {
3786 + bd = list_entry(sdp->sd_recovery_bufs.prev,
3787 + struct gfs_bufdata, bd_ail_tr_list);
3790 + if (buffer_busy(bh)) {
3791 + list_move(&bd->bd_ail_tr_list,
3792 + &sdp->sd_recovery_bufs);
3795 + list_del_init(&bd->bd_ail_tr_list);
3796 + if (!buffer_uptodate(bh))
3797 + gfs_io_error_bh(sdp, bh);
3804 + * gfs_replay_wait - Wait for all replayed buffers to hit the disk
3805 + * @sdp: the filesystem
3810 +gfs_replay_wait(struct gfs_sbd *sdp)
3812 + struct list_head *head, *tmp, *prev;
3813 + struct buffer_head *bh;
3814 + struct gfs_bufdata *bd;
3816 + for (head = &sdp->sd_recovery_bufs, tmp = head->prev, prev = tmp->prev;
3818 + tmp = prev, prev = tmp->prev) {
3819 + bd = list_entry(tmp, struct gfs_bufdata, bd_ail_tr_list);
3822 + if (!buffer_busy(bh)) {
3823 + list_del_init(&bd->bd_ail_tr_list);
3824 + if (!buffer_uptodate(bh))
3825 + gfs_io_error_bh(sdp, bh);
3830 + if (buffer_dirty(bh)) {
3831 + wait_on_buffer(bh);
3832 + ll_rw_block(WRITE, 1, &bh);
3836 + while (!list_empty(head)) {
3837 + bd = list_entry(head->prev, struct gfs_bufdata, bd_ail_tr_list);
3840 + wait_on_buffer(bh);
3842 + GFS_ASSERT_SBD(!buffer_busy(bh), sdp,);
3844 + list_del_init(&bd->bd_ail_tr_list);
3845 + if (!buffer_uptodate(bh))
3846 + gfs_io_error_bh(sdp, bh);
3852 + * gfs_wipe_buffers - make buffers so they aren't dirty/pinned anymore
3853 + * @ip: the inode who owns the buffers
3854 + * @bstart: the first buffer in the run
3855 + * @blen: the number of buffers in the run
3860 +gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd,
3861 + uint64_t bstart, uint32_t blen)
3863 + struct gfs_sbd *sdp = ip->i_sbd;
3864 + struct inode *aspace = ip->i_gl->gl_aspace;
3865 + struct buffer_head *bh;
3866 + struct gfs_bufdata *bd;
3871 + bh = getbuf(sdp, aspace, bstart, NO_CREATE);
3876 + if (buffer_uptodate(bh)) {
3878 + gfs_lock_buffer(bh);
3879 + gfs_mhc_add(rgd, &bh, 1);
3880 + busy = bd->bd_pinned || buffer_busy(bh);
3881 + gfs_unlock_buffer(bh);
3886 + spin_lock(&sdp->sd_ail_lock);
3887 + if (!list_empty(&bd->bd_ail_tr_list)) {
3888 + list_del_init(&bd->bd_ail_tr_list);
3889 + list_del(&bd->bd_ail_gl_list);
3892 + spin_unlock(&sdp->sd_ail_lock);
3895 + GFS_ASSERT_INODE(!buffer_dirty(bh), ip,);
3896 + wait_on_buffer(bh);
3897 + GFS_ASSERT_INODE(!buffer_busy(bh), ip,);
3898 + gfs_mhc_add(rgd, &bh, 1);
3901 + GFS_ASSERT_INODE(!bd || !bd->bd_pinned, ip,);
3902 + GFS_ASSERT_INODE(!buffer_dirty(bh), ip,);
3903 + wait_on_buffer(bh);
3904 + GFS_ASSERT_INODE(!buffer_busy(bh), ip,);
3915 + gfs_depend_add(rgd, ip->i_num.no_formal_ino);
3919 + * gfs_sync_meta - sync all the buffers in a filesystem
3920 + * @sdp: the filesystem
3925 +gfs_sync_meta(struct gfs_sbd *sdp)
3927 + gfs_log_flush(sdp);
3929 + gfs_ail_start(sdp, DIO_ALL);
3930 + if (gfs_ail_empty(sdp))
3933 + current->state = TASK_UNINTERRUPTIBLE;
3934 + schedule_timeout(HZ / 10);
3939 + * gfs_flush_meta_cache - get rid of any references on buffers for this inode
3940 + * @ip: The GFS inode
3945 +gfs_flush_meta_cache(struct gfs_inode *ip)
3947 + struct buffer_head **bh_slot;
3950 + spin_lock(&ip->i_lock);
3952 + for (x = 0; x < GFS_MAX_META_HEIGHT; x++) {
3953 + bh_slot = &ip->i_cache[x];
3960 + spin_unlock(&ip->i_lock);
3964 + * gfs_get_meta_buffer - Get a metadata buffer
3965 + * @ip: The GFS inode
3966 + * @depth: The depth in the metadata tree
3967 + * @num: The block number (device relative) of the buffer
3968 + * @new: Non-zero if we may create a new buffer
3969 + * @bhp: the buffer is returned here
3971 + * Returns: 0 on success, -EXXX on failure
3975 +gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new,
3976 + struct buffer_head **bhp)
3978 + struct gfs_sbd *sdp = ip->i_sbd;
3979 + struct buffer_head *bh, **bh_slot = &ip->i_cache[height];
3980 + int flags = ((new) ? DIO_NEW : 0) | DIO_START | DIO_WAIT;
3983 + spin_lock(&ip->i_lock);
3986 + if (bh->b_blocknr == num)
3991 + spin_unlock(&ip->i_lock);
3994 + error = gfs_dreread(sdp, bh, flags);
4000 + error = gfs_dread(sdp, num, ip->i_gl, flags, &bh);
4004 + spin_lock(&ip->i_lock);
4005 + if (*bh_slot != bh) {
4011 + spin_unlock(&ip->i_lock);
4015 + GFS_ASSERT_INODE(height, ip,);
4017 + gfs_trans_add_bh(ip->i_gl, bh);
4018 + gfs_metatype_set(sdp, bh, GFS_METATYPE_IN, GFS_FORMAT_IN);
4019 + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4021 + gfs_metatype_check(sdp, bh,
4022 + (height) ? GFS_METATYPE_IN : GFS_METATYPE_DI);
4030 + * gfs_get_data_buffer - Get a data buffer
4031 + * @ip: The GFS inode
4032 + * @num: The block number (device relative) of the data block
4033 + * @new: Non-zero if this is a new allocation
4034 + * @bhp: the buffer is returned here
4036 + * Returns: 0 on success, -EXXX on failure
4040 +gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new,
4041 + struct buffer_head **bhp)
4043 + struct gfs_sbd *sdp = ip->i_sbd;
4044 + struct buffer_head *bh;
4047 + if (block == ip->i_num.no_addr) {
4048 + GFS_ASSERT_INODE(!new, ip,);
4050 + error = gfs_dread(sdp, block, ip->i_gl, DIO_START | DIO_WAIT, &bh);
4053 + gfs_metatype_check(sdp, bh, GFS_METATYPE_DI);
4054 + } else if (gfs_is_jdata(ip)) {
4056 + error = gfs_dread(sdp, block, ip->i_gl,
4057 + DIO_NEW | DIO_START | DIO_WAIT, &bh);
4060 + gfs_trans_add_bh(ip->i_gl, bh);
4061 + gfs_metatype_set(sdp, bh, GFS_METATYPE_JD, GFS_FORMAT_JD);
4062 + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4064 + error = gfs_dread(sdp, block, ip->i_gl,
4065 + DIO_START | DIO_WAIT, &bh);
4068 + gfs_metatype_check(sdp, bh, GFS_METATYPE_JD);
4072 + bh = gfs_dgetblk(sdp, block, ip->i_gl);
4073 + gfs_prep_new_buffer(bh);
4075 + error = gfs_dread(sdp, block, ip->i_gl,
4076 + DIO_START | DIO_WAIT, &bh);
4088 + * gfs_start_ra - start readahead on an extent of a file
4089 + * @gl: the glock the blocks belong to
4090 + * @dblock: the starting disk block
4091 + * @extlen: the number of blocks in the extent
4096 +gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen)
4098 + struct gfs_sbd *sdp = gl->gl_sbd;
4099 + struct inode *aspace = gl->gl_aspace;
4100 + struct buffer_head *first_bh, *bh;
4101 + uint32_t max_ra = sdp->sd_tune.gt_max_readahead >> sdp->sd_sb.sb_bsize_shift;
4104 + GFS_ASSERT_GLOCK(extlen, gl,);
4107 + if (extlen > max_ra)
4110 + first_bh = getbuf(sdp, aspace, dblock, CREATE);
4112 + if (buffer_uptodate(first_bh))
4114 + if (!buffer_locked(first_bh)) {
4115 + error = gfs_dreread(sdp, first_bh, DIO_START);
4124 + bh = getbuf(sdp, aspace, dblock, CREATE);
4126 + if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
4127 + error = gfs_dreread(sdp, bh, DIO_START);
4137 + if (buffer_uptodate(first_bh))
4144 diff -urN linux-orig/fs/gfs/dio.h linux-patched/fs/gfs/dio.h
4145 --- linux-orig/fs/gfs/dio.h 1969-12-31 18:00:00.000000000 -0600
4146 +++ linux-patched/fs/gfs/dio.h 2004-06-30 13:27:49.335712986 -0500
4148 +/******************************************************************************
4149 +*******************************************************************************
4151 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4152 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4154 +** This copyrighted material is made available to anyone wishing to use,
4155 +** modify, copy, or redistribute it subject to the terms and conditions
4156 +** of the GNU General Public License v.2.
4158 +*******************************************************************************
4159 +******************************************************************************/
4161 +#ifndef __DIO_DOT_H__
4162 +#define __DIO_DOT_H__
4164 +void gfs_ail_start_trans(struct gfs_sbd *sdp, struct gfs_trans *tr);
4165 +int gfs_ail_empty_trans(struct gfs_sbd *sdp, struct gfs_trans *tr);
4167 +/* Asynchronous I/O Routines */
4169 +struct buffer_head *gfs_dgetblk(struct gfs_sbd *sdp, uint64_t blkno,
4170 + struct gfs_glock *gl);
4171 +int gfs_dread(struct gfs_sbd *sdp, uint64_t blkno, struct gfs_glock *gl,
4172 + int flags, struct buffer_head **bhp);
4174 +void gfs_prep_new_buffer(struct buffer_head *bh);
4175 +int gfs_dreread(struct gfs_sbd *sdp, struct buffer_head *bh, int flags);
4176 +int gfs_dwrite(struct gfs_sbd *sdp, struct buffer_head *bh, int flags);
4178 +void gfs_attach_bufdata(struct buffer_head *bh, struct gfs_glock *gl);
4179 +int gfs_is_pinned(struct gfs_sbd *sdp, struct buffer_head *bh);
4180 +void gfs_dpin(struct gfs_sbd *sdp, struct buffer_head *bh);
4181 +void gfs_dunpin(struct gfs_sbd *sdp, struct buffer_head *bh,
4182 + struct gfs_trans *tr);
4185 +void gfs_lock_buffer(struct buffer_head *bh)
4187 + struct gfs_bufdata *bd = bh2bd(bh);
4188 + down(&bd->bd_lock);
4191 +int gfs_trylock_buffer(struct buffer_head *bh)
4193 + struct gfs_bufdata *bd = bh2bd(bh);
4194 + return down_trylock(&bd->bd_lock);
4197 +void gfs_unlock_buffer(struct buffer_head *bh)
4199 + struct gfs_bufdata *bd = bh2bd(bh);
4203 +void gfs_logbh_init(struct gfs_sbd *sdp, struct buffer_head *bh, uint64_t blkno,
4205 +void gfs_logbh_uninit(struct gfs_sbd *sdp, struct buffer_head *bh);
4206 +int gfs_logbh_start(struct gfs_sbd *sdp, struct buffer_head *bh);
4207 +int gfs_logbh_wait(struct gfs_sbd *sdp, struct buffer_head *bh);
4209 +int gfs_replay_buf(struct gfs_glock *gl, struct buffer_head *bh);
4210 +void gfs_replay_check(struct gfs_sbd *sdp);
4211 +void gfs_replay_wait(struct gfs_sbd *sdp);
4213 +void gfs_wipe_buffers(struct gfs_inode *ip, struct gfs_rgrpd *rgd,
4214 + uint64_t bstart, uint32_t blen);
4216 +void gfs_sync_meta(struct gfs_sbd *sdp);
4218 +/* Buffer Caching routines */
4220 +int gfs_get_meta_buffer(struct gfs_inode *ip, int height, uint64_t num, int new,
4221 + struct buffer_head **bhp);
4222 +int gfs_get_data_buffer(struct gfs_inode *ip, uint64_t block, int new,
4223 + struct buffer_head **bhp);
4224 +void gfs_start_ra(struct gfs_glock *gl, uint64_t dblock, uint32_t extlen);
4226 +static __inline__ int
4227 +gfs_get_inode_buffer(struct gfs_inode *ip, struct buffer_head **bhp)
4229 + return gfs_get_meta_buffer(ip, 0, ip->i_num.no_addr, FALSE, bhp);
4232 +struct inode *gfs_aspace_get(struct gfs_sbd *sdp);
4233 +void gfs_aspace_put(struct inode *aspace);
4235 +void gfs_inval_buf(struct gfs_glock *gl);
4236 +void gfs_sync_buf(struct gfs_glock *gl, int flags);
4238 +void gfs_flush_meta_cache(struct gfs_inode *ip);
4240 +/* Buffer Content Functions */
4243 + * gfs_buffer_clear - Zeros out a buffer
4244 + * @ip: The GFS inode
4245 + * @bh: The buffer to zero
4249 +static __inline__ void
4250 +gfs_buffer_clear(struct buffer_head *bh)
4252 + memset(bh->b_data, 0, bh->b_size);
4256 + * gfs_buffer_clear_tail - Clear buffer beyond the dinode
4257 + * @bh: The buffer containing the on-disk inode
4258 + * @head: the size of the head of the buffer
4260 + * Clears the remaining part of an on-disk inode that is not a dinode.
4261 + * i.e. The data part of a stuffed inode, or the top level of metadata
4262 + * of a non-stuffed inode.
4265 +static __inline__ void
4266 +gfs_buffer_clear_tail(struct buffer_head *bh, int head)
4268 + memset(bh->b_data + head, 0, bh->b_size - head);
4272 + * gfs_buffer_clear_ends - Zero out any bits of a buffer which are not being written
4274 + * @offset: Offset in buffer where write starts
4275 + * @amount: Amount of data being written
4276 + * @journaled: TRUE if this is a journaled buffer
4280 +static __inline__ void
4281 +gfs_buffer_clear_ends(struct buffer_head *bh, int offset, int amount,
4284 + int z_off1 = (journaled) ? sizeof(struct gfs_meta_header) : 0;
4285 + int z_len1 = offset - z_off1;
4286 + int z_off2 = offset + amount;
4287 + int z_len2 = (bh)->b_size - z_off2;
4290 + memset(bh->b_data + z_off1, 0, z_len1);
4293 + memset(bh->b_data + z_off2, 0, z_len2);
4297 + * gfs_buffer_copy_tail - copies the tail of one buffer to another
4298 + * @to_bh: the buffer to copy to
4299 + * @to_head: the size of the head of to_bh
4300 + * @from_bh: the buffer to copy from
4301 + * @from_head: the size of the head of from_bh
4303 + * from_head is guaranteed to bigger than to_head
4306 +static __inline__ void
4307 +gfs_buffer_copy_tail(struct buffer_head *to_bh, int to_head,
4308 + struct buffer_head *from_bh, int from_head)
4310 + memcpy(to_bh->b_data + to_head,
4311 + from_bh->b_data + from_head,
4312 + from_bh->b_size - from_head);
4313 + memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
4315 + from_head - to_head);
4319 + * gfs_buffer_print - print a buffer to the debug console
4321 + * @string: what to print before the contents of the buffer
4325 +static __inline__ void
4326 +gfs_buffer_print(struct buffer_head *bh, char *string)
4328 + unsigned int x, size = (bh)->b_size;
4329 + unsigned char *c = (bh)->b_data;
4331 + printk("%s\n", string);
4333 + for (x = 0; x < size; x++) {
4334 + printk("%.2X ", c[x]);
4342 +#endif /* __DIO_DOT_H__ */
4343 diff -urN linux-orig/fs/gfs/dir.c linux-patched/fs/gfs/dir.c
4344 --- linux-orig/fs/gfs/dir.c 1969-12-31 18:00:00.000000000 -0600
4345 +++ linux-patched/fs/gfs/dir.c 2004-06-30 13:27:49.335712986 -0500
4347 +/******************************************************************************
4348 +*******************************************************************************
4350 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4351 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4353 +** This copyrighted material is made available to anyone wishing to use,
4354 +** modify, copy, or redistribute it subject to the terms and conditions
4355 +** of the GNU General Public License v.2.
4357 +*******************************************************************************
4358 +******************************************************************************/
4361 +* Implements Extendible Hashing as described in:
4362 +* "Extendible Hashing" by Fagin, et al in
4363 +* __ACM Trans. on Database Systems__, Sept 1979.
4366 +* Here's the layout of dirents which is essentially the same as that of ext2
4367 +* within a single block. The field de_name_len is the number of bytes
4368 +* actually required for the name (no null terminator). The field de_rec_len
4369 +* is the number of bytes allocated to the dirent. The offset of the next
4370 +* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is
4371 +* deleted, the preceding dirent inherits its allocated space, ie
4372 +* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained
4373 +* by adding de_rec_len to the current dirent, this essentially causes the
4374 +* deleted dirent to get jumped over when iterating through all the dirents.
4375 +* When deleting the first dirent in a block, there is no previous dirent so
4376 +* the field de_ino is set to zero to designate it as deleted. When allocating
4377 +* a dirent, gfs_dirent_alloc iterates through the dirents in a block. If the
4378 +* first dirent has (de_ino == 0) and de_rec_len is large enough, this first
4379 +* dirent is allocated. Otherwise it must go through all the 'used' dirents
4380 +* searching for one in which the amount of total space minus the amount of
4381 +* used space will provide enough space for the new dirent.
4382 +* There are two types of blocks in which dirents reside. In a stuffed dinode,
4383 +* the dirents begin at offset sizeof(struct gfs_dinode) from the beginning of the block.
4384 +* In leaves, they begin at offset sizeof (struct gfs_leaf) from the beginning of the
4385 +* leaf block. The dirents reside in leaves when
4387 +* dip->i_di.di_regime == GFS_DIR_EXHASH.
4389 +* The dirents are in the stuffed dinode when dip->i_di.di_regime == GFS_DIR_LINEAR.
4390 +* When the dirents are in leaves, the actual contents of the directory file are
4391 +* used as an array of 64-bit block pointers pointing to the leaf blocks. The
4392 +* dirents are NOT in the directory file itself. There can be more than one block
4393 +* pointer in the array that points to the same leaf. In fact, when a directory is
4394 +* first converted from linear to exhash, all of the pointers point to the same
4395 +* leaf. When a leaf is completely full, the size of the hash table can be doubled
4396 +* unless it is already at the maximum size which is hard coded into
4397 +* GFS_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list but
4398 +* never before the maximum hash table size has been reached.
4401 +#include <linux/sched.h>
4402 +#include <linux/slab.h>
4403 +#include <linux/smp_lock.h>
4404 +#include <linux/spinlock.h>
4405 +#include <asm/semaphore.h>
4406 +#include <linux/completion.h>
4407 +#include <linux/buffer_head.h>
4420 +#define IS_LEAF (1)
4421 +#define IS_DINODE (2)
4424 +#define gfs_dir_hash2offset(h) (((uint64_t)(h)) >> 1)
4425 +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1))
4427 +#define gfs_dir_hash2offset(h) (((uint64_t)(h)))
4428 +#define gfs_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p))))
4431 +typedef int (*leaf_call_t) (struct gfs_inode *dip,
4432 + uint32_t index, uint32_t len, uint64_t leaf_no,
4436 + * int gfs_filecmp - Compare two filenames
4437 + * @file1: The first filename
4438 + * @file2: The second filename
4439 + * @len_of_file2: The length of the second file
4441 + * This routine compares two filenames and returns TRUE if they are equal.
4443 + * Returns: TRUE (!=0) if the files are the same, otherwise FALSE (0).
4447 +gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2)
4449 + if (file1->len != len_of_file2)
4451 + if (memcmp(file1->name, file2, file1->len))
4457 + * dirent_first - Return the first dirent
4458 + * @dip: the directory
4460 + * @dent: Pointer to list of dirents
4462 + * return first dirent whether bh points to leaf or stuffed dinode
4464 + * Returns: IS_LEAF or IS_DINODE
4468 +dirent_first(struct gfs_inode *dip, struct buffer_head *bh,
4469 + struct gfs_dirent **dent)
4471 + struct gfs_meta_header *h = (struct gfs_meta_header *)bh->b_data;
4473 + if (gfs32_to_cpu(h->mh_type) == GFS_METATYPE_LF) {
4474 + gfs_meta_check(dip->i_sbd, bh);
4475 + *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_leaf));
4478 + gfs_metatype_check(dip->i_sbd, bh, GFS_METATYPE_DI);
4479 + *dent = (struct gfs_dirent *)(bh->b_data + sizeof(struct gfs_dinode));
4485 + * dirent_next - Next dirent
4486 + * @dip: the directory
4488 + * @dent: Pointer to list of dirents
4490 + * Returns: 0 on success, error code otherwise
4494 +dirent_next(struct gfs_inode *dip, struct buffer_head *bh,
4495 + struct gfs_dirent **dent)
4497 + struct gfs_dirent *tmp, *cur;
4499 + uint32_t cur_rec_len;
4502 + bh_end = bh->b_data + bh->b_size;
4504 + cur_rec_len = gfs16_to_cpu(cur->de_rec_len);
4506 + if ((char *)cur + cur_rec_len >= bh_end) {
4507 + GFS_ASSERT_INODE((char *)cur + cur_rec_len == bh_end, dip,);
4511 + tmp = (struct gfs_dirent *)((char *)cur + cur_rec_len);
4513 + GFS_ASSERT_INODE((char *)tmp + gfs16_to_cpu(tmp->de_rec_len) <= bh_end,
4515 + /* Only the first dent could ever have de_ino == 0 */
4516 + GFS_ASSERT_INODE(tmp->de_inum.no_formal_ino, dip,);
4524 + * dirent_del - Delete a dirent
4525 + * @dip: The GFS inode
4527 + * @prev: The previous dirent
4528 + * @cur: The current dirent
4533 +dirent_del(struct gfs_inode *dip, struct buffer_head *bh,
4534 + struct gfs_dirent *prev, struct gfs_dirent *cur)
4536 + uint32_t cur_rec_len, prev_rec_len;
4538 + GFS_ASSERT_INODE(cur->de_inum.no_formal_ino, dip,);
4540 + gfs_trans_add_bh(dip->i_gl, bh);
4542 + /* If there is no prev entry, this is the first entry in the block.
4543 + The de_rec_len is already as big as it needs to be. Just zero
4544 + out the inode number and return. */
4547 + cur->de_inum.no_formal_ino = 0; /* No endianess worries */
4551 + /* Combine this dentry with the previous one. */
4553 + prev_rec_len = gfs16_to_cpu(prev->de_rec_len);
4554 + cur_rec_len = gfs16_to_cpu(cur->de_rec_len);
4556 + GFS_ASSERT_INODE((char *)prev + prev_rec_len == (char *)cur, dip,);
4557 + GFS_ASSERT_INODE((char *)cur + cur_rec_len <=
4558 + bh->b_data + bh->b_size, dip,);
4560 + prev_rec_len += cur_rec_len;
4561 + prev->de_rec_len = cpu_to_gfs16(prev_rec_len);
4565 + * gfs_dirent_alloc - Allocate a directory entry
4566 + * @dip: The GFS inode
4568 + * @name_len: The length of the name
4569 + * @dent_out: Pointer to list of dirents
4571 + * Returns: 0 on success, error code otherwise
4575 +gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh, int name_len,
4576 + struct gfs_dirent **dent_out)
4578 + struct gfs_dirent *dent, *new;
4579 + unsigned int rec_len = GFS_DIRENT_SIZE(name_len);
4580 + unsigned int entries = 0, offset = 0, x = 0;
4583 + type = dirent_first(dip, bh, &dent);
4585 + if (type == IS_LEAF) {
4586 + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4587 + entries = gfs16_to_cpu(leaf->lf_entries);
4588 + offset = sizeof(struct gfs_leaf);
4590 + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4591 + entries = gfs32_to_cpu(dinode->di_entries);
4592 + offset = sizeof(struct gfs_dinode);
4596 + gfs_trans_add_bh(dip->i_gl, bh);
4598 + dent->de_rec_len = bh->b_size - offset;
4599 + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4600 + dent->de_name_len = cpu_to_gfs16(name_len);
4607 + uint32_t cur_rec_len, cur_name_len;
4609 + cur_rec_len = gfs16_to_cpu(dent->de_rec_len);
4610 + cur_name_len = gfs16_to_cpu(dent->de_name_len);
4612 + if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) ||
4613 + (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len)) {
4614 + gfs_trans_add_bh(dip->i_gl, bh);
4616 + if (dent->de_inum.no_formal_ino) {
4617 + new = (struct gfs_dirent *)((char *)dent +
4618 + GFS_DIRENT_SIZE(cur_name_len));
4619 + memset(new, 0, sizeof(struct gfs_dirent));
4621 + new->de_rec_len = cpu_to_gfs16(cur_rec_len -
4622 + GFS_DIRENT_SIZE(cur_name_len));
4623 + new->de_name_len = cpu_to_gfs16(name_len);
4625 + dent->de_rec_len = cur_rec_len - gfs16_to_cpu(new->de_rec_len);
4626 + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4632 + dent->de_name_len = cpu_to_gfs16(name_len);
4638 + GFS_ASSERT_INODE(x < entries, dip,);
4640 + if (dent->de_inum.no_formal_ino)
4643 + while (dirent_next(dip, bh, &dent) == 0);
4649 + * dirent_fits - See if we can fit a entry in this buffer
4650 + * @dip: The GFS inode
4652 + * @name_len: The length of the name
4654 + * Returns: TRUE if it can fit, FALSE otherwise
4658 +dirent_fits(struct gfs_inode *dip, struct buffer_head *bh, int name_len)
4660 + struct gfs_dirent *dent;
4661 + unsigned int rec_len = GFS_DIRENT_SIZE(name_len);
4662 + unsigned int entries = 0, x = 0;
4665 + type = dirent_first(dip, bh, &dent);
4667 + if (type == IS_LEAF) {
4668 + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4669 + entries = gfs16_to_cpu(leaf->lf_entries);
4671 + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4672 + entries = gfs32_to_cpu(dinode->di_entries);
4679 + uint32_t cur_rec_len, cur_name_len;
4681 + cur_rec_len = gfs16_to_cpu(dent->de_rec_len);
4682 + cur_name_len = gfs16_to_cpu(dent->de_name_len);
4684 + if ((!dent->de_inum.no_formal_ino && cur_rec_len >= rec_len) ||
4685 + (cur_rec_len >= GFS_DIRENT_SIZE(cur_name_len) + rec_len))
4688 + GFS_ASSERT_INODE(x < entries, dip,);
4690 + if (dent->de_inum.no_formal_ino)
4693 + while (dirent_next(dip, bh, &dent) == 0);
4709 +leaf_search(struct gfs_inode *dip,
4710 + struct buffer_head *bh, struct qstr *filename,
4711 + struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev)
4714 + struct gfs_dirent *dent, *prev = NULL;
4715 + unsigned int entries = 0, x = 0;
4718 + type = dirent_first(dip, bh, &dent);
4720 + if (type == IS_LEAF) {
4721 + struct gfs_leaf *leaf = (struct gfs_leaf *)bh->b_data;
4722 + entries = gfs16_to_cpu(leaf->lf_entries);
4723 + } else if (type == IS_DINODE) {
4724 + struct gfs_dinode *dinode = (struct gfs_dinode *)bh->b_data;
4725 + entries = gfs32_to_cpu(dinode->di_entries);
4728 + hash = gfs_dir_hash(filename->name, filename->len);
4731 + if (!dent->de_inum.no_formal_ino) {
4736 + if (gfs32_to_cpu(dent->de_hash) == hash &&
4737 + gfs_filecmp(filename, (char *)(dent + 1),
4738 + gfs16_to_cpu(dent->de_name_len))) {
4741 + *dent_prev = prev;
4746 + GFS_ASSERT_INODE(x < entries, dip,);
4750 + while (dirent_next(dip, bh, &dent) == 0);
4756 + * get_leaf - Get leaf
4761 + * Returns: 0 on success, error code otherwise
4765 +get_leaf(struct gfs_inode *dip, uint64_t leaf_no, struct buffer_head **bhp)
4767 + struct gfs_sbd *sdp = dip->i_sbd;
4770 + error = gfs_dread(sdp, leaf_no, dip->i_gl, DIO_START | DIO_WAIT, bhp);
4772 + gfs_metatype_check(sdp, *bhp, GFS_METATYPE_LF);
4778 + * get_leaf_nr - Get a leaf number associated with the index
4779 + * @dip: The GFS inode
4783 + * Returns: 0 on success, error code otherwise
4787 +get_leaf_nr(struct gfs_inode *dip, uint32_t index, uint64_t *leaf_out)
4792 + error = gfs_internal_read(dip, (char *)&leaf_no,
4793 + index * sizeof(uint64_t),
4794 + sizeof(uint64_t));
4795 + if (error != sizeof(uint64_t))
4796 + return (error < 0) ? error : -EIO;
4798 + *leaf_out = gfs64_to_cpu(leaf_no);
4804 + * get_first_leaf - Get first leaf
4805 + * @dip: The GFS inode
4809 + * Returns: 0 on success, error code otherwise
4813 +get_first_leaf(struct gfs_inode *dip, uint32_t index,
4814 + struct buffer_head **bh_out)
4819 + error = get_leaf_nr(dip, index, &leaf_no);
4821 + error = get_leaf(dip, leaf_no, bh_out);
4827 + * get_next_leaf - Get next leaf
4828 + * @dip: The GFS inode
4829 + * @bh_in: The buffer
4832 + * Returns: 0 on success, error code otherwise
4836 +get_next_leaf(struct gfs_inode *dip, struct buffer_head *bh_in,
4837 + struct buffer_head **bh_out)
4839 + struct gfs_leaf *leaf;
4842 + leaf = (struct gfs_leaf *)bh_in->b_data;
4844 + if (!leaf->lf_next)
4847 + error = get_leaf(dip, gfs64_to_cpu(leaf->lf_next), bh_out);
4853 + * linked_leaf_search - Linked leaf search
4854 + * @dip: The GFS inode
4855 + * @filename: The filename to search for
4860 + * Returns: 0 on sucess, error code otherwise
4864 +linked_leaf_search(struct gfs_inode *dip, struct qstr *filename,
4865 + struct gfs_dirent **dent_out, struct gfs_dirent **dent_prev,
4866 + struct buffer_head **bh_out)
4868 + struct buffer_head *bh = NULL, *bh_next;
4869 + uint32_t hsize, index;
4873 + hsize = 1 << dip->i_di.di_depth;
4874 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
4876 + /* Figure out the address of the leaf node. */
4878 + hash = gfs_dir_hash(filename->name, filename->len);
4879 + index = hash >> (32 - dip->i_di.di_depth);
4881 + error = get_first_leaf(dip, index, &bh_next);
4885 + /* Find the entry */
4893 + error = leaf_search(dip, bh, filename, dent_out, dent_prev);
4907 + error = get_next_leaf(dip, bh, &bh_next);
4917 + * dir_make_exhash - Convet a stuffed directory into an ExHash directory
4918 + * @dip: The GFS inode
4920 + * Returns: 0 on success, error code otherwise
4924 +dir_make_exhash(struct gfs_inode *dip)
4926 + struct gfs_sbd *sdp = dip->i_sbd;
4927 + struct gfs_dirent *dent;
4928 + struct buffer_head *bh, *dibh;
4929 + struct gfs_leaf *leaf;
4935 + error = gfs_get_inode_buffer(dip, &dibh);
4939 + /* Allocate a new block for the first leaf node */
4941 + error = gfs_metaalloc(dip, &bn);
4945 + /* Turn over a new leaf */
4947 + error = gfs_dread(sdp, bn, dip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh);
4951 + gfs_trans_add_bh(dip->i_gl, bh);
4952 + gfs_metatype_set(sdp, bh, GFS_METATYPE_LF, GFS_FORMAT_LF);
4953 + gfs_buffer_clear_tail(bh, sizeof(struct gfs_meta_header));
4955 + /* Fill in the leaf structure */
4957 + leaf = (struct gfs_leaf *)bh->b_data;
4959 + GFS_ASSERT_INODE(dip->i_di.di_entries < (1 << 16), dip,);
4961 + leaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
4962 + leaf->lf_entries = cpu_to_gfs16(dip->i_di.di_entries);
4964 + /* Copy dirents */
4966 + gfs_buffer_copy_tail(bh, sizeof(struct gfs_leaf), dibh,
4967 + sizeof(struct gfs_dinode));
4969 + /* Find last entry */
4972 + dirent_first(dip, bh, &dent);
4975 + if (!dent->de_inum.no_formal_ino)
4977 + if (++x == dip->i_di.di_entries)
4980 + while (dirent_next(dip, bh, &dent) == 0);
4982 + /* Adjust the last dirent's record length
4983 + (Remember that dent still points to the last entry.) */
4985 + dent->de_rec_len = gfs16_to_cpu(dent->de_rec_len) +
4986 + sizeof(struct gfs_dinode) -
4987 + sizeof(struct gfs_leaf);
4988 + dent->de_rec_len = cpu_to_gfs16(dent->de_rec_len);
4992 + /* We're done with the new leaf block, now setup the new
4995 + gfs_trans_add_bh(dip->i_gl, dibh);
4996 + gfs_buffer_clear_tail(dibh, sizeof (struct gfs_dinode));
4998 + lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs_dinode));
5000 + for (x = sdp->sd_hash_ptrs; x--; lp++)
5001 + *lp = cpu_to_gfs64(bn);
5003 + dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2;
5004 + dip->i_di.di_blocks++;
5005 + dip->i_di.di_flags |= GFS_DIF_EXHASH;
5006 + dip->i_di.di_payload_format = 0;
5008 + for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ;
5009 + dip->i_di.di_depth = y;
5011 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5023 + * dir_split_leaf - Split a leaf block into two
5024 + * @dip: The GFS inode
5028 + * Returns: 0 on success, error code on failure
5032 +dir_split_leaf(struct gfs_inode *dip, uint32_t index, uint64_t leaf_no)
5034 + struct gfs_sbd *sdp = dip->i_sbd;
5035 + struct buffer_head *nbh, *obh, *dibh;
5036 + struct gfs_leaf *nleaf, *oleaf;
5037 + struct gfs_dirent *dent, *prev = NULL, *next = NULL, *new;
5038 + uint32_t start, len, half_len, divider;
5040 + uint32_t name_len;
5041 + int x, moved = FALSE;
5044 + /* Allocate the new leaf block */
5046 + error = gfs_metaalloc(dip, &bn);
5050 + /* Get the new leaf block */
5052 + error = gfs_dread(sdp, bn, dip->i_gl,
5053 + DIO_NEW | DIO_START | DIO_WAIT, &nbh);
5057 + gfs_trans_add_bh(dip->i_gl, nbh);
5058 + gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF, GFS_FORMAT_LF);
5059 + gfs_buffer_clear_tail(nbh, sizeof (struct gfs_meta_header));
5061 + nleaf = (struct gfs_leaf *)nbh->b_data;
5063 + nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
5065 + /* Get the old leaf block */
5067 + error = get_leaf(dip, leaf_no, &obh);
5071 + gfs_trans_add_bh(dip->i_gl, obh);
5073 + oleaf = (struct gfs_leaf *)obh->b_data;
5075 + /* Compute the start and len of leaf pointers in the hash table. */
5077 + len = 1 << (dip->i_di.di_depth - gfs16_to_cpu(oleaf->lf_depth));
5078 + GFS_ASSERT_INODE(len != 1, dip,);
5079 + half_len = len >> 1;
5081 + start = (index & ~(len - 1));
5083 + /* Change the pointers.
5084 + Don't bother distinguishing stuffed from non-stuffed.
5085 + This code is complicated enough already. */
5087 + lp = gmalloc(half_len * sizeof(uint64_t));
5089 + error = gfs_internal_read(dip, (char *)lp, start * sizeof(uint64_t),
5090 + half_len * sizeof(uint64_t));
5091 + if (error != half_len * sizeof(uint64_t)) {
5097 + /* Change the pointers */
5099 + for (x = 0; x < half_len; x++)
5100 + lp[x] = cpu_to_gfs64(bn);
5102 + error = gfs_internal_write(dip, (char *)lp, start * sizeof(uint64_t),
5103 + half_len * sizeof(uint64_t));
5104 + if (error != half_len * sizeof(uint64_t)) {
5112 + /* Compute the divider */
5114 + divider = (start + half_len) << (32 - dip->i_di.di_depth);
5116 + /* Copy the entries */
5118 + dirent_first(dip, obh, &dent);
5122 + if (dirent_next(dip, obh, &next))
5125 + if (dent->de_inum.no_formal_ino &&
5126 + gfs32_to_cpu(dent->de_hash) < divider) {
5127 + name_len = gfs16_to_cpu(dent->de_name_len);
5129 + error = gfs_dirent_alloc(dip, nbh, name_len, &new);
5130 + GFS_ASSERT_INODE(!error, dip,);
5132 + new->de_inum = dent->de_inum; /* No endianness worries */
5133 + new->de_hash = dent->de_hash; /* No endianness worries */
5134 + new->de_type = dent->de_type; /* No endianness worries */
5135 + memcpy((char *)(new + 1), (char *)(dent + 1),
5138 + nleaf->lf_entries = gfs16_to_cpu(nleaf->lf_entries) + 1;
5139 + nleaf->lf_entries = cpu_to_gfs16(nleaf->lf_entries);
5141 + dirent_del(dip, obh, prev, dent);
5143 + GFS_ASSERT_INODE(gfs16_to_cpu(oleaf->lf_entries), dip,);
5144 + oleaf->lf_entries = gfs16_to_cpu(oleaf->lf_entries) - 1;
5145 + oleaf->lf_entries = cpu_to_gfs16(oleaf->lf_entries);
5158 + /* If none of the entries got moved into the new leaf,
5159 + artificially fill in the first entry. */
5162 + error = gfs_dirent_alloc(dip, nbh, 0, &new);
5163 + GFS_ASSERT_INODE(!error, dip,);
5164 + new->de_inum.no_formal_ino = 0;
5167 + oleaf->lf_depth = gfs16_to_cpu(oleaf->lf_depth) + 1;
5168 + oleaf->lf_depth = cpu_to_gfs16(oleaf->lf_depth);
5169 + nleaf->lf_depth = oleaf->lf_depth;
5171 + error = gfs_get_inode_buffer(dip, &dibh);
5172 + GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */
5174 + dip->i_di.di_blocks++;
5176 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5195 + * dir_double_exhash - Double size of ExHash table
5196 + * @dip: The GFS dinode
5198 + * Returns: 0 on success, error code on failure
5202 +dir_double_exhash(struct gfs_inode *dip)
5204 + struct gfs_sbd *sdp = dip->i_sbd;
5205 + struct buffer_head *dibh;
5208 + uint64_t *from, *to;
5213 + hsize = 1 << dip->i_di.di_depth;
5214 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5216 + /* Allocate both the "from" and "to" buffers in one big chunk */
5218 + buf = gmalloc(3 * sdp->sd_hash_bsize);
5220 + for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) {
5221 + error = gfs_internal_read(dip, (char *)buf,
5222 + block * sdp->sd_hash_bsize,
5223 + sdp->sd_hash_bsize);
5224 + if (error != sdp->sd_hash_bsize) {
5231 + to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize);
5233 + for (x = sdp->sd_hash_ptrs; x--; from++) {
5234 + *to++ = *from; /* No endianess worries */
5238 + error = gfs_internal_write(dip, (char *)buf + sdp->sd_hash_bsize,
5239 + block * sdp->sd_sb.sb_bsize,
5240 + sdp->sd_sb.sb_bsize);
5241 + if (error != sdp->sd_sb.sb_bsize) {
5250 + error = gfs_get_inode_buffer(dip, &dibh);
5251 + GFS_ASSERT_INODE(!error, dip,); /* Pinned in gfs_internal_write() */
5253 + dip->i_di.di_depth++;
5255 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5267 + * compare_dents - compare directory entries by hash value
5271 + * When comparing the hash entries of @a to @b:
5278 +compare_dents(const void *a, const void *b)
5280 + struct gfs_dirent *dent_a, *dent_b;
5281 + uint32_t hash_a, hash_b;
5284 + dent_a = *(struct gfs_dirent **)a;
5285 + hash_a = dent_a->de_hash;
5286 + hash_a = gfs32_to_cpu(hash_a);
5288 + dent_b = *(struct gfs_dirent **)b;
5289 + hash_b = dent_b->de_hash;
5290 + hash_b = gfs32_to_cpu(hash_b);
5292 + if (hash_a > hash_b)
5294 + else if (hash_a < hash_b)
5297 + unsigned int len_a = gfs16_to_cpu(dent_a->de_name_len);
5298 + unsigned int len_b = gfs16_to_cpu(dent_b->de_name_len);
5300 + if (len_a > len_b)
5302 + else if (len_a < len_b)
5305 + ret = memcmp((char *)(dent_a + 1),
5306 + (char *)(dent_b + 1),
5314 + * do_filldir_main - read out directory entries
5315 + * @dip: The GFS inode
5316 + * @offset: The offset in the file to read from
5317 + * @opaque: opaque data to pass to filldir
5318 + * @filldir: The function to pass entries to
5319 + * @darr: an array of struct gfs_dirent pointers to read
5320 + * @entries: the number of entries in darr
5321 + * @copied: pointer to int that's non-zero if a entry has been copied out
5323 + * Jump through some hoops to make sure that if there are hash collsions,
5324 + * they are read out at the beginning of a buffer. We want to minimize
5325 + * the possibility that they will fall into different readdir buffers or
5326 + * that someone will want to seek to that location.
5328 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5332 +do_filldir_main(struct gfs_inode *dip, uint64_t *offset,
5333 + void *opaque, gfs_filldir_t filldir,
5334 + struct gfs_dirent **darr, uint32_t entries, int *copied)
5336 + struct gfs_dirent *dent, *dent_next;
5337 + struct gfs_inum inum;
5338 + uint64_t off, off_next;
5339 + unsigned int x, y;
5343 + gfs_sort(darr, entries, sizeof(struct gfs_dirent *), compare_dents);
5345 + dent_next = darr[0];
5346 + off_next = gfs32_to_cpu(dent_next->de_hash);
5347 + off_next = gfs_dir_hash2offset(off_next);
5349 + for (x = 0, y = 1; x < entries; x++, y++) {
5353 + if (y < entries) {
5354 + dent_next = darr[y];
5355 + off_next = gfs32_to_cpu(dent_next->de_hash);
5356 + off_next = gfs_dir_hash2offset(off_next);
5358 + if (off < *offset)
5362 + if (off_next == off) {
5363 + if (*copied && !run)
5369 + if (off < *offset)
5374 + gfs_inum_in(&inum, (char *)&dent->de_inum);
5376 + error = filldir(opaque, (char *)(dent + 1),
5377 + gfs16_to_cpu(dent->de_name_len),
5379 + gfs16_to_cpu(dent->de_type));
5386 + /* Increment the *offset by one, so the next time we come into the do_filldir fxn,
5387 + we get the next entry instead of the last one in the current leaf */
5395 + * do_filldir_single - Read directory entries out of a single block
5396 + * @dip: The GFS inode
5397 + * @offset: The offset in the file to read from
5398 + * @opaque: opaque data to pass to filldir
5399 + * @filldir: The function to pass entries to
5401 + * @entries: the number of entries in the block
5402 + * @copied: pointer to int that's non-zero if a entry has been copied out
5404 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5408 +do_filldir_single(struct gfs_inode *dip, uint64_t *offset,
5409 + void *opaque, gfs_filldir_t filldir,
5410 + struct buffer_head *bh, uint32_t entries, int *copied)
5412 + struct gfs_dirent **darr;
5413 + struct gfs_dirent *de;
5414 + unsigned int e = 0;
5420 + darr = gmalloc(entries * sizeof(struct gfs_dirent *));
5422 + dirent_first(dip, bh, &de);
5424 + if (!de->de_inum.no_formal_ino)
5428 + while (dirent_next(dip, bh, &de) == 0);
5430 + GFS_ASSERT_INODE(e == entries, dip,);
5432 + error = do_filldir_main(dip, offset, opaque, filldir, darr,
5441 + * do_filldir_multi - Read directory entries out of a linked leaf list
5442 + * @dip: The GFS inode
5443 + * @offset: The offset in the file to read from
5444 + * @opaque: opaque data to pass to filldir
5445 + * @filldir: The function to pass entries to
5446 + * @bh: the first leaf in the list
5447 + * @copied: pointer to int that's non-zero if a entry has been copied out
5449 + * Returns: 0 on success, -EXXX on failure, >0 on exception from filldir
5453 +do_filldir_multi(struct gfs_inode *dip, uint64_t *offset,
5454 + void *opaque, gfs_filldir_t filldir,
5455 + struct buffer_head *bh, int *copied)
5457 + struct buffer_head **larr = NULL;
5458 + struct gfs_dirent **darr;
5459 + struct gfs_leaf *leaf;
5460 + struct buffer_head *tmp_bh;
5461 + struct gfs_dirent *de;
5462 + unsigned int entries, e = 0;
5463 + unsigned int leaves = 0, l = 0;
5468 + /* Count leaves and entries */
5470 + leaf = (struct gfs_leaf *)bh->b_data;
5471 + entries = gfs16_to_cpu(leaf->lf_entries);
5472 + ln = leaf->lf_next;
5475 + ln = gfs64_to_cpu(ln);
5477 + error = get_leaf(dip, ln, &tmp_bh);
5481 + leaf = (struct gfs_leaf *)tmp_bh->b_data;
5482 + if (leaf->lf_entries) {
5483 + entries += gfs16_to_cpu(leaf->lf_entries);
5486 + ln = leaf->lf_next;
5491 + /* Bail out if there's nothing to do */
5496 + /* Alloc arrays */
5499 + larr = gmalloc(leaves * sizeof(struct buffer_head *));
5501 + darr = gmalloc(entries * sizeof(struct gfs_dirent *));
5503 + /* Fill in arrays */
5505 + leaf = (struct gfs_leaf *)bh->b_data;
5506 + if (leaf->lf_entries) {
5507 + dirent_first(dip, bh, &de);
5509 + if (!de->de_inum.no_formal_ino)
5513 + while (dirent_next(dip, bh, &de) == 0);
5515 + ln = leaf->lf_next;
5518 + ln = gfs64_to_cpu(ln);
5520 + error = get_leaf(dip, ln, &tmp_bh);
5524 + leaf = (struct gfs_leaf *)tmp_bh->b_data;
5525 + if (leaf->lf_entries) {
5526 + dirent_first(dip, tmp_bh, &de);
5528 + if (!de->de_inum.no_formal_ino)
5532 + while (dirent_next(dip, tmp_bh, &de) == 0);
5534 + larr[l++] = tmp_bh;
5536 + ln = leaf->lf_next;
5538 + ln = leaf->lf_next;
5543 + GFS_ASSERT_INODE(l == leaves, dip,);
5544 + GFS_ASSERT_INODE(e == entries, dip,);
5548 + error = do_filldir_main(dip, offset, opaque, filldir, darr,
5556 + for (x = 0; x < l; x++)
5567 + * @dip: The GFS inode
5575 +dir_e_search(struct gfs_inode *dip, struct qstr *filename,
5576 + struct gfs_inum *inum, unsigned int *type)
5578 + struct buffer_head *bh;
5579 + struct gfs_dirent *dent;
5582 + error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
5587 + gfs_inum_in(inum, (char *)&dent->de_inum);
5589 + *type = gfs16_to_cpu(dent->de_type);
5598 + * @dip: The GFS inode
5606 +dir_e_add(struct gfs_inode *dip, struct qstr *filename,
5607 + struct gfs_inum *inum, unsigned int type)
5609 + struct gfs_sbd *sdp = dip->i_sbd;
5610 + struct buffer_head *bh, *nbh, *dibh;
5611 + struct gfs_leaf *leaf, *nleaf;
5612 + struct gfs_dirent *dent;
5613 + uint32_t hsize, index;
5615 + uint64_t leaf_no, bn;
5619 + hsize = 1 << dip->i_di.di_depth;
5620 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5622 + /* Figure out the address of the leaf node. */
5624 + hash = gfs_dir_hash(filename->name, filename->len);
5625 + index = hash >> (32 - dip->i_di.di_depth);
5627 + error = get_leaf_nr(dip, index, &leaf_no);
5631 + /* Add entry to the leaf */
5634 + error = get_leaf(dip, leaf_no, &bh);
5638 + leaf = (struct gfs_leaf *)bh->b_data;
5640 + if (gfs_dirent_alloc(dip, bh, filename->len, &dent)) {
5642 + if (gfs16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) {
5643 + /* Can we split the leaf? */
5647 + error = dir_split_leaf(dip, index, leaf_no);
5653 + } else if (dip->i_di.di_depth < GFS_DIR_MAX_DEPTH) {
5654 + /* Can we double the hash table? */
5658 + error = dir_double_exhash(dip);
5664 + } else if (leaf->lf_next) {
5665 + /* Can we try the next leaf in the list? */
5666 + leaf_no = gfs64_to_cpu(leaf->lf_next);
5671 + /* Create a new leaf and add it to the list. */
5673 + error = gfs_metaalloc(dip, &bn);
5679 + error = gfs_dread(sdp, bn, dip->i_gl,
5680 + DIO_NEW | DIO_START | DIO_WAIT,
5687 + gfs_trans_add_bh(dip->i_gl, nbh);
5688 + gfs_metatype_set(sdp, nbh, GFS_METATYPE_LF,
5690 + gfs_buffer_clear_tail(nbh,
5691 + sizeof(struct gfs_meta_header));
5693 + gfs_trans_add_bh(dip->i_gl, bh);
5694 + leaf->lf_next = cpu_to_gfs64(bn);
5696 + nleaf = (struct gfs_leaf *)nbh->b_data;
5697 + nleaf->lf_depth = leaf->lf_depth;
5698 + nleaf->lf_dirent_format = cpu_to_gfs32(GFS_FORMAT_DE);
5700 + if (gfs_dirent_alloc(dip, nbh, filename->len, &dent))
5701 + GFS_ASSERT_INODE(FALSE, dip,);
5703 + dip->i_di.di_blocks++;
5712 + /* If the gfs_dirent_alloc() succeeded, it pinned the "bh". */
5714 + gfs_inum_out(inum, (char *)&dent->de_inum);
5715 + dent->de_hash = cpu_to_gfs32(hash);
5716 + dent->de_type = cpu_to_gfs16(type);
5717 + memcpy((char *)(dent + 1), filename->name, filename->len);
5719 + leaf->lf_entries = gfs16_to_cpu(leaf->lf_entries) + 1;
5720 + leaf->lf_entries = cpu_to_gfs16(leaf->lf_entries);
5724 + error = gfs_get_inode_buffer(dip, &dibh);
5728 + dip->i_di.di_entries++;
5729 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5731 + gfs_trans_add_bh(dip->i_gl, dibh);
5732 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5743 + * @dip: The GFS inode
5750 +dir_e_del(struct gfs_inode *dip, struct qstr *filename)
5752 + struct buffer_head *bh, *dibh;
5753 + struct gfs_dirent *dent, *prev;
5754 + struct gfs_leaf *leaf;
5755 + unsigned int entries;
5758 + error = linked_leaf_search(dip, filename, &dent, &prev, &bh);
5759 + GFS_ASSERT_INODE(error != -ENOENT, dip,);
5763 + dirent_del(dip, bh, prev, dent); /* Pins bh */
5765 + leaf = (struct gfs_leaf *)bh->b_data;
5766 + entries = gfs16_to_cpu(leaf->lf_entries);
5767 + GFS_ASSERT_INODE(entries, dip,);
5769 + leaf->lf_entries = cpu_to_gfs16(entries);
5773 + error = gfs_get_inode_buffer(dip, &dibh);
5777 + GFS_ASSERT_INODE(dip->i_di.di_entries, dip,);
5778 + dip->i_di.di_entries--;
5779 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5781 + gfs_trans_add_bh(dip->i_gl, dibh);
5782 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5789 + * dir_e_read - Reads the entries from a directory into a filldir buffer
5790 + * @dip: dinode pointer
5791 + * @offset: the hash of the last entry read shifted to the right once
5792 + * @opaque: buffer for the filldir function to fill
5793 + * @filldir: points to the filldir function to use
5798 +dir_e_read(struct gfs_inode *dip, uint64_t *offset, void *opaque,
5799 + gfs_filldir_t filldir)
5801 + struct gfs_sbd *sdp = dip->i_sbd;
5802 + struct buffer_head *bh;
5803 + struct gfs_leaf leaf;
5804 + uint32_t hsize, len;
5805 + uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
5806 + uint32_t hash, index;
5808 + int copied = FALSE;
5811 + hsize = 1 << dip->i_di.di_depth;
5812 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
5814 + hash = gfs_dir_offset2hash(*offset);
5815 + index = hash >> (32 - dip->i_di.di_depth);
5817 + lp = gmalloc(sdp->sd_hash_bsize);
5819 + while (index < hsize) {
5820 + lp_offset = index & (sdp->sd_hash_ptrs - 1);
5821 + ht_offset = index - lp_offset;
5823 + if (ht_offset_cur != ht_offset) {
5824 + error = gfs_internal_read(dip, (char *)lp,
5825 + ht_offset * sizeof(uint64_t),
5826 + sdp->sd_hash_bsize);
5827 + if (error != sdp->sd_hash_bsize) {
5832 + ht_offset_cur = ht_offset;
5835 + error = get_leaf(dip, gfs64_to_cpu(lp[lp_offset]), &bh);
5839 + gfs_leaf_in(&leaf, bh->b_data);
5842 + error = do_filldir_multi(dip, offset,
5846 + error = do_filldir_single(dip, offset,
5848 + bh, leaf.lf_entries,
5859 + len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
5860 + index = (index & ~(len - 1)) + len;
5871 + * @dip: The GFS inode
5879 +dir_e_mvino(struct gfs_inode *dip, struct qstr *filename,
5880 + struct gfs_inum *inum, unsigned int new_type)
5882 + struct buffer_head *bh, *dibh;
5883 + struct gfs_dirent *dent;
5886 + error = linked_leaf_search(dip, filename, &dent, NULL, &bh);
5887 + GFS_ASSERT_INODE(error != -ENOENT, dip,);
5891 + gfs_trans_add_bh(dip->i_gl, bh);
5893 + gfs_inum_out(inum, (char *)&dent->de_inum);
5894 + dent->de_type = cpu_to_gfs16(new_type);
5898 + error = gfs_get_inode_buffer(dip, &dibh);
5902 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5904 + gfs_trans_add_bh(dip->i_gl, dibh);
5905 + gfs_dinode_out(&dip->i_di, dibh->b_data);
5913 + * @dip: The GFS inode
5921 +dir_l_search(struct gfs_inode *dip, struct qstr *filename,
5922 + struct gfs_inum *inum, unsigned int *type)
5924 + struct buffer_head *dibh;
5925 + struct gfs_dirent *dent;
5928 + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
5930 + error = gfs_get_inode_buffer(dip, &dibh);
5934 + error = leaf_search(dip, dibh, filename, &dent, NULL);
5937 + gfs_inum_in(inum, (char *)&dent->de_inum);
5939 + *type = gfs16_to_cpu(dent->de_type);
5949 + * @dip: The GFS inode
5958 +dir_l_add(struct gfs_inode *dip, struct qstr *filename,
5959 + struct gfs_inum *inum, unsigned int type)
5961 + struct buffer_head *dibh;
5962 + struct gfs_dirent *dent;
5965 + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
5967 + error = gfs_get_inode_buffer(dip, &dibh);
5971 + if (gfs_dirent_alloc(dip, dibh, filename->len, &dent)) {
5974 + error = dir_make_exhash(dip);
5976 + error = dir_e_add(dip, filename, inum, type);
5981 + /* gfs_dirent_alloc() pins */
5983 + gfs_inum_out(inum, (char *)&dent->de_inum);
5984 + dent->de_hash = gfs_dir_hash(filename->name, filename->len);
5985 + dent->de_hash = cpu_to_gfs32(dent->de_hash);
5986 + dent->de_type = cpu_to_gfs16(type);
5987 + memcpy((char *)(dent + 1), filename->name, filename->len);
5989 + dip->i_di.di_entries++;
5990 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
5992 + gfs_dinode_out(&dip->i_di, dibh->b_data);
6000 + * @dip: The GFS inode
6007 +dir_l_del(struct gfs_inode *dip, struct qstr *filename)
6009 + struct buffer_head *dibh;
6010 + struct gfs_dirent *dent, *prev;
6013 + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6015 + error = gfs_get_inode_buffer(dip, &dibh);
6019 + error = leaf_search(dip, dibh, filename, &dent, &prev);
6020 + GFS_ASSERT_INODE(!error, dip,);
6022 + dirent_del(dip, dibh, prev, dent);
6024 + /* dirent_del() pins */
6026 + GFS_ASSERT_INODE(dip->i_di.di_entries, dip,);
6027 + dip->i_di.di_entries--;
6029 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
6031 + gfs_dinode_out(&dip->i_di, dibh->b_data);
6049 +dir_l_read(struct gfs_inode *dip, uint64_t *offset, void *opaque,
6050 + gfs_filldir_t filldir)
6052 + struct buffer_head *dibh;
6053 + int copied = FALSE;
6056 + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6058 + if (!dip->i_di.di_entries)
6061 + error = gfs_get_inode_buffer(dip, &dibh);
6065 + error = do_filldir_single(dip, offset,
6067 + dibh, dip->i_di.di_entries,
6087 +dir_l_mvino(struct gfs_inode *dip, struct qstr *filename,
6088 + struct gfs_inum *inum, unsigned int new_type)
6090 + struct buffer_head *dibh;
6091 + struct gfs_dirent *dent;
6094 + GFS_ASSERT_INODE(gfs_is_stuffed(dip), dip,);
6096 + error = gfs_get_inode_buffer(dip, &dibh);
6100 + error = leaf_search(dip, dibh, filename, &dent, NULL);
6101 + GFS_ASSERT_INODE(!error, dip,);
6103 + gfs_trans_add_bh(dip->i_gl, dibh);
6105 + gfs_inum_out(inum, (char *)&dent->de_inum);
6106 + dent->de_type = cpu_to_gfs16(new_type);
6108 + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds();
6110 + gfs_dinode_out(&dip->i_di, dibh->b_data);
6118 + * gfs_dir_search - Search a directory
6119 + * @dip: The GFS inode
6123 + * This routine searches a directory for a file or another directory.
6124 + * Assumes a glock is held on dip.
6126 + * Returns: Inode number if found, -EXXXX on failure.
6130 +gfs_dir_search(struct gfs_inode *dip, struct qstr *filename,
6131 + struct gfs_inum *inum, unsigned int *type)
6135 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6137 + if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6138 + error = dir_e_search(dip, filename, inum, type);
6140 + error = dir_l_search(dip, filename, inum, type);
6146 + * gfs_dir_add - Add new filename into directory
6147 + * @dip: The GFS inode
6148 + * @filename: The new name
6149 + * @inode: The inode number of the entry
6150 + * @type: The type of the entry
6152 + * Returns: 0 on success, error code on failure
6156 +gfs_dir_add(struct gfs_inode *dip, struct qstr *filename,
6157 + struct gfs_inum *inum, unsigned int type)
6161 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6163 + if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6164 + error = dir_e_add(dip, filename, inum, type);
6166 + error = dir_l_add(dip, filename, inum, type);
6172 + * gfs_dir_del - Delete a directory entry
6173 + * @dip: The GFS inode
6174 + * @filename: The filename
6176 + * Returns: 0 on success, error code on failure
6180 +gfs_dir_del(struct gfs_inode *dip, struct qstr *filename)
6184 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6186 + if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6187 + error = dir_e_del(dip, filename);
6189 + error = dir_l_del(dip, filename);
6195 + * gfs_dir_read - Translate a GFS filename
6196 + * @dip: The GFS inode
6201 + * Returns: 0 on success, error code otherwise
6205 +gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque,
6206 + gfs_filldir_t filldir)
6210 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6212 + if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6213 + error = dir_e_read(dip, offset, opaque, filldir);
6215 + error = dir_l_read(dip, offset, opaque, filldir);
6221 + * gfs_dir_mvino - Change inode number of directory entry
6222 + * @dip: The GFS inode
6226 + * This routine changes the inode number of a directory entry. It's used
6227 + * by rename to change ".." when a directory is moved.
6228 + * Assumes a glock is held on dvp.
6230 + * Returns: 0 on success, -EXXXX on failure
6234 +gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename,
6235 + struct gfs_inum *inum, unsigned int new_type)
6239 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6241 + if (dip->i_di.di_flags & GFS_DIF_EXHASH)
6242 + error = dir_e_mvino(dip, filename, inum, new_type);
6244 + error = dir_l_mvino(dip, filename, inum, new_type);
6250 + * foreach_leaf - call a function for each leaf in a directory
6251 + * @dip: the directory
6252 + * @lc: the function to call for each each
6253 + * @data: private data to pass to it
6255 + * Returns: 0 on success, -EXXX on failure
6259 +foreach_leaf(struct gfs_inode *dip, leaf_call_t lc, void *data)
6261 + struct gfs_sbd *sdp = dip->i_sbd;
6262 + struct buffer_head *bh;
6263 + struct gfs_leaf leaf;
6264 + uint32_t hsize, len;
6265 + uint32_t ht_offset, lp_offset, ht_offset_cur = -1;
6266 + uint32_t index = 0;
6271 + GFS_ASSERT_INODE(dip->i_di.di_flags & GFS_DIF_EXHASH, dip,);
6272 + hsize = 1 << dip->i_di.di_depth;
6273 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size, dip,);
6275 + lp = gmalloc(sdp->sd_hash_bsize);
6277 + while (index < hsize) {
6278 + lp_offset = index & (sdp->sd_hash_ptrs - 1);
6279 + ht_offset = index - lp_offset;
6281 + if (ht_offset_cur != ht_offset) {
6282 + error = gfs_internal_read(dip, (char *)lp,
6283 + ht_offset * sizeof(uint64_t),
6284 + sdp->sd_hash_bsize);
6285 + if (error != sdp->sd_hash_bsize) {
6290 + ht_offset_cur = ht_offset;
6293 + leaf_no = gfs64_to_cpu(lp[lp_offset]);
6295 + error = get_leaf(dip, leaf_no, &bh);
6298 + gfs_leaf_in(&leaf, bh->b_data);
6301 + len = 1 << (dip->i_di.di_depth - leaf.lf_depth);
6303 + error = lc(dip, index, len, leaf_no, data);
6307 + index = (index & ~(len - 1)) + len;
6312 + GFS_ASSERT_INODE(index == hsize, dip,);
6321 + * leaf_free - Deallocate a directory leaf
6322 + * @dip: the directory
6323 + * @index: the hash table offset in the directory
6324 + * @len: the number of pointers to this leaf
6325 + * @leaf_no: the leaf number
6328 + * Returns: 0 on success, -EXXX on failure
6332 +leaf_free(struct gfs_inode *dip,
6333 + uint32_t index, uint32_t len,
6334 + uint64_t leaf_no, void *data)
6336 + struct gfs_sbd *sdp = dip->i_sbd;
6337 + struct gfs_holder ri_gh;
6338 + struct gfs_leaf tmp_leaf;
6339 + struct gfs_rgrp_list rlist;
6340 + struct buffer_head *bh, *dibh;
6342 + unsigned int rg_blocks = 0;
6344 + unsigned int x, size = len * sizeof(uint64_t);
6347 + memset(&rlist, 0, sizeof(struct gfs_rgrp_list));
6349 + ht = gmalloc(size);
6350 + memset(ht, 0, size);
6352 + gfs_alloc_get(dip);
6354 + error = gfs_quota_hold_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
6358 + error = gfs_rindex_hold(sdp, &ri_gh);
6362 + /* Count the number of leaves */
6364 + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
6365 + error = get_leaf(dip, blk, &bh);
6368 + gfs_leaf_in(&tmp_leaf, (bh)->b_data);
6371 + gfs_rlist_add(sdp, &rlist, blk);
6374 + gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
6376 + error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
6380 + for (x = 0; x < rlist.rl_rgrps; x++) {
6381 + struct gfs_rgrpd *rgd;
6382 + rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
6383 + rg_blocks += rgd->rd_ri.ri_length;
6386 + /* Trans may require:
6387 + All the bitmaps that were reserved.
6388 + One block for the dinode.
6389 + All the hash blocks that will be changed.
6390 + One block for a quota change. */
6392 + error = gfs_trans_begin(sdp,
6393 + rg_blocks + 1 + (DIV_RU(size, sdp->sd_jbsize) + 1),
6396 + goto fail_rg_gunlock;
6398 + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) {
6399 + error = get_leaf(dip, blk, &bh);
6401 + goto fail_end_trans;
6402 + gfs_leaf_in(&tmp_leaf, bh->b_data);
6405 + gfs_metafree(dip, blk, 1);
6407 + dip->i_di.di_blocks--;
6410 + error = gfs_internal_write(dip, ht, index * sizeof(uint64_t), size);
6411 + if (error != size) {
6414 + goto fail_end_trans;
6417 + error = gfs_get_inode_buffer(dip, &dibh);
6419 + goto fail_end_trans;
6421 + gfs_trans_add_bh(dip->i_gl, dibh);
6422 + gfs_dinode_out(&dip->i_di, dibh->b_data);
6425 + gfs_trans_end(sdp);
6427 + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
6428 + gfs_rlist_free(&rlist);
6429 + gfs_glock_dq_uninit(&ri_gh);
6430 + gfs_quota_unhold_m(dip);
6431 + gfs_alloc_put(dip);
6437 + gfs_trans_end(sdp);
6440 + gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
6443 + gfs_rlist_free(&rlist);
6444 + gfs_glock_dq_uninit(&ri_gh);
6447 + gfs_quota_unhold_m(dip);
6450 + gfs_alloc_put(dip);
6457 + * gfs_dir_exhash_free - free all the leaf block in a directory
6458 + * @dip: the directory
6460 + * Returns: 0 on success, -EXXX on failure
6464 +gfs_dir_exhash_free(struct gfs_inode *dip)
6466 + struct gfs_sbd *sdp = dip->i_sbd;
6467 + struct buffer_head *bh;
6470 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6472 + error = foreach_leaf(dip, leaf_free, NULL);
6476 + /* Make this a regular file in case we crash.
6477 + (We don't want to free these blocks a second time.) */
6479 + error = gfs_trans_begin(sdp, 1, 0);
6483 + error = gfs_get_inode_buffer(dip, &bh);
6487 + gfs_trans_add_bh(dip->i_gl, bh);
6488 + ((struct gfs_dinode *)bh->b_data)->di_type = cpu_to_gfs16(GFS_FILE_REG);
6492 + gfs_trans_end(sdp);
6497 + gfs_trans_end(sdp);
6502 + * gfs_diradd_alloc_required - figure out if an entry addition is going to require an allocation
6503 + * @ip: the file being written to
6504 + * @filname: the filename that's going to be added
6505 + * @alloc_required: the int is set to TRUE if an alloc is required, FALSE otherwise
6507 + * Returns: 0 on success, -EXXX on error
6511 +gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename,
6512 + int *alloc_required)
6514 + struct buffer_head *bh = NULL, *bh_next;
6515 + uint32_t hsize, hash, index;
6518 + *alloc_required = FALSE;
6520 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6522 + if (dip->i_di.di_flags & GFS_DIF_EXHASH) {
6523 + hsize = 1 << dip->i_di.di_depth;
6524 + GFS_ASSERT_INODE(hsize * sizeof(uint64_t) == dip->i_di.di_size,
6527 + hash = gfs_dir_hash(filename->name, filename->len);
6528 + index = hash >> (32 - dip->i_di.di_depth);
6530 + error = get_first_leaf(dip, index, &bh_next);
6540 + if (dirent_fits(dip, bh, filename->len))
6543 + error = get_next_leaf(dip, bh, &bh_next);
6544 + if (error == -ENOENT) {
6545 + *alloc_required = TRUE;
6554 + error = gfs_get_inode_buffer(dip, &bh);
6558 + if (!dirent_fits(dip, bh, filename->len))
6559 + *alloc_required = TRUE;
6568 + * do_gdm - copy out one leaf (or list of leaves)
6569 + * @dip: the directory
6570 + * @index: the hash table offset in the directory
6571 + * @len: the number of pointers to this leaf
6572 + * @leaf_no: the leaf number
6573 + * @data: a pointer to a struct gfs_user_buffer structure
6575 + * Returns: 0 on success, -EXXX on failure
6579 +do_gdm(struct gfs_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no,
6582 + struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
6583 + struct gfs_leaf leaf;
6584 + struct buffer_head *bh;
6588 + for (blk = leaf_no; blk; blk = leaf.lf_next) {
6589 + error = get_leaf(dip, blk, &bh);
6593 + gfs_leaf_in(&leaf, bh->b_data);
6595 + error = gfs_add_bh_to_ub(ub, bh);
6607 + * gfs_get_dir_meta - return all the leaf blocks of a directory
6608 + * @dip: the directory
6609 + * @ub: the structure representing the meta
6611 + * Returns: 0 on success, -EXXX on failure
6615 +gfs_get_dir_meta(struct gfs_inode *dip, struct gfs_user_buffer *ub)
6617 + GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
6618 + return foreach_leaf(dip, do_gdm, ub);
6620 diff -urN linux-orig/fs/gfs/dir.h linux-patched/fs/gfs/dir.h
6621 --- linux-orig/fs/gfs/dir.h 1969-12-31 18:00:00.000000000 -0600
6622 +++ linux-patched/fs/gfs/dir.h 2004-06-30 13:27:49.335712986 -0500
6624 +/******************************************************************************
6625 +*******************************************************************************
6627 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6628 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6630 +** This copyrighted material is made available to anyone wishing to use,
6631 +** modify, copy, or redistribute it subject to the terms and conditions
6632 +** of the GNU General Public License v.2.
6634 +*******************************************************************************
6635 +******************************************************************************/
6637 +#ifndef __DIR_DOT_H__
6638 +#define __DIR_DOT_H__
6641 + * gfs_filldir_t - Report a directory entry to the caller of gfs_dir_read()
6642 + * @opaque: opaque data used by the function
6643 + * @name: the name of the directory entry
6644 + * @length: the length of the name
6645 + * @offset: the entry's offset in the directory
6646 + * @inum: the inode number the entry points to
6647 + * @type: the type of inode the entry points to
6649 + * Returns: 0 on success, 1 if buffer full
6652 +typedef int (*gfs_filldir_t) (void *opaque,
6653 + const char *name, unsigned int length,
6655 + struct gfs_inum *inum, unsigned int type);
6657 +int gfs_filecmp(struct qstr *file1, char *file2, int len_of_file2);
6658 +int gfs_dirent_alloc(struct gfs_inode *dip, struct buffer_head *bh,
6659 + int name_len, struct gfs_dirent **dent_out);
6661 +int gfs_dir_search(struct gfs_inode *dip, struct qstr *filename,
6662 + struct gfs_inum *inum, unsigned int *type);
6663 +int gfs_dir_add(struct gfs_inode *dip, struct qstr *filename,
6664 + struct gfs_inum *inum, unsigned int type);
6665 +int gfs_dir_del(struct gfs_inode *dip, struct qstr *filename);
6666 +int gfs_dir_read(struct gfs_inode *dip, uint64_t * offset, void *opaque,
6667 + gfs_filldir_t filldir);
6668 +int gfs_dir_mvino(struct gfs_inode *dip, struct qstr *filename,
6669 + struct gfs_inum *new_inum, unsigned int new_type);
6671 +int gfs_dir_exhash_free(struct gfs_inode *dip);
6673 +int gfs_diradd_alloc_required(struct gfs_inode *dip, struct qstr *filename,
6674 + int *alloc_required);
6676 +int gfs_get_dir_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
6678 +#endif /* __DIR_DOT_H__ */
6679 diff -urN linux-orig/fs/gfs/eattr.c linux-patched/fs/gfs/eattr.c
6680 --- linux-orig/fs/gfs/eattr.c 1969-12-31 18:00:00.000000000 -0600
6681 +++ linux-patched/fs/gfs/eattr.c 2004-06-30 13:27:49.337712522 -0500
6683 +/******************************************************************************
6684 +*******************************************************************************
6686 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6687 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6689 +** This copyrighted material is made available to anyone wishing to use,
6690 +** modify, copy, or redistribute it subject to the terms and conditions
6691 +** of the GNU General Public License v.2.
6693 +*******************************************************************************
6694 +******************************************************************************/
6696 +#include <linux/sched.h>
6697 +#include <linux/slab.h>
6698 +#include <linux/smp_lock.h>
6699 +#include <linux/spinlock.h>
6700 +#include <asm/semaphore.h>
6701 +#include <linux/completion.h>
6702 +#include <linux/buffer_head.h>
6703 +#include <asm/uaccess.h>
6704 +#include <linux/xattr_acl.h>
6717 +#define GFS_EA_REC_LEN(x) gfs32_to_cpu((x)->ea_rec_len)
6718 +#define GFS_EA_NAME(x) ((char *)(x) + sizeof(struct gfs_ea_header))
6719 +#define GFS_EA_DATA_PTRS(x) ((uint64_t *)((char *)(x) + sizeof(struct gfs_ea_header) + (((x)->ea_name_len + 7) & ~7)))
6721 +#define GFS_EA_NEXT(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_REC_LEN(x))
6722 +#define GFS_EA_FREESPACE(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_SIZE(x))
6724 +#define GFS_EAREQ_IS_STUFFED(x, y) (((sizeof(struct gfs_ea_header) + (x)->es_data_len + (x)->es_name_len + 7) & ~7) <= y)
6726 +#define GFS_EADATA_NUM_PTRS(x, y) (((x) + (y) - 1) / (y))
6728 +#define GFS_EA_SIZE(x) ((sizeof(struct gfs_ea_header) + (x)->ea_name_len + (GFS_EA_IS_UNSTUFFED(x)? (8 * (x)->ea_num_ptrs) : GFS_EA_DATA_LEN(x)) + 7) & ~ 7)
6730 +#define GFS_EACMD_VALID(x) ((x) <= GFS_EACMD_REMOVE)
6732 +#define GFS_EA_IS_LAST(x) ((x)->ea_flags & GFS_EAFLAG_LAST)
6734 +#define GFS_EA_STRLEN(x) ((x)->ea_name_len + 1 + (((x)->ea_type == GFS_EATYPE_USR)? 5 : 7))
6736 +#define GFS_FIRST_EA(x) ((struct gfs_ea_header *) ((x)->b_data + sizeof(struct gfs_meta_header)))
6739 +#define EA_DEALLOC 2
6741 +static struct buffer_head *alloc_eattr_blk(struct gfs_sbd *sdp,
6742 + struct gfs_inode *alloc_ip,
6743 + struct gfs_inode *ip,
6744 + uint64_t * block);
6747 + * can_replace - returns true if ea is large enough to hold the data in
6751 +static __inline__ int
6752 +can_replace(struct gfs_ea_header *ea, struct gfs_easet_io *req,
6753 + uint32_t avail_size)
6756 + GFS_EA_REC_LEN(ea) - sizeof (struct gfs_ea_header) -
6759 + if (GFS_EAREQ_IS_STUFFED(req, avail_size) && !GFS_EA_IS_UNSTUFFED(ea))
6760 + return (req->es_data_len <= data_space);
6762 + return (GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size) <=
6767 + * get_req_size - returns the acutal number of bytes the request will take up
6768 + * (not counting any unstuffed data blocks)
6771 +static __inline__ uint32_t
6772 +get_req_size(struct gfs_easet_io *req, uint32_t avail_size)
6775 + ((sizeof (struct gfs_ea_header) + req->es_data_len +
6776 + req->es_name_len + 7) & ~7);
6778 + if (size <= avail_size)
6781 + return ((sizeof (struct gfs_ea_header) + req->es_name_len + 7) & ~7) +
6782 + (8 * GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size));
6786 + * gfs_ea_write_permission - decides if the user has permission to write to
6788 + * @req: the write request
6789 + * @ip: inode of file with the ea
6791 + * Returns: 0 on success, -EXXX on error
6795 +gfs_ea_write_permission(struct gfs_easet_io *req, struct gfs_inode *ip)
6797 + struct inode *inode = gfs_iget(ip, NO_CREATE);
6800 + GFS_ASSERT_INODE(inode, ip,);
6802 + if (req->es_type == GFS_EATYPE_USR) {
6803 + if (!S_ISREG(inode->i_mode) &&
6804 + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
6807 + error = permission(inode, MAY_WRITE, NULL);
6808 + if (error == -EACCES)
6811 + } else if (req->es_type == GFS_EATYPE_SYS) {
6812 + if (IS_ACCESS_ACL(req->es_name, req->es_name_len))
6813 + error = gfs_validate_acl(ip, req->es_data,
6814 + req->es_data_len, 1);
6815 + else if (IS_DEFAULT_ACL(req->es_name, req->es_name_len))
6816 + error = gfs_validate_acl(ip, req->es_data,
6817 + req->es_data_len, 0);
6819 + if (!capable(CAP_SYS_ADMIN))
6823 + error = -EOPNOTSUPP;
6831 + * gfs_ea_read_permission - decides if the user has permission to read from
6833 + * @req: the read request
6834 + * @ip: inode of file with the ea
6836 + * Returns: 0 on success, -EXXX on error
6840 +gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip)
6842 + struct inode *inode = gfs_iget(ip, NO_CREATE);
6845 + GFS_ASSERT_INODE(inode, ip,);
6847 + if (req->eg_type == GFS_EATYPE_USR){
6848 + error = permission(inode, MAY_READ, NULL);
6849 + if (error == -EACCES)
6852 + else if (req->eg_type == GFS_EATYPE_SYS) {
6853 + if (IS_ACCESS_ACL(req->eg_name, req->eg_name_len) ||
6854 + IS_DEFAULT_ACL(req->eg_name, req->eg_name_len))
6857 + if (!capable(CAP_SYS_ADMIN))
6861 + error = -EOPNOTSUPP;
6869 + * gfs_es_memcpy - gfs memcpy wrapper with a return value
6874 +gfs_ea_memcpy(void *dest, void *src, unsigned long size)
6876 + memcpy(dest, src, size);
6881 + * gfs_ea_copy_to_user - copy_to_user wrapper
6885 +gfs_ea_copy_to_user(void *dest, void *src, unsigned long size)
6888 + error = (copy_to_user(dest, src, size)) ? -EFAULT : 0;
6893 + * Returns: 1 if find_direct_eattr should stop checking (if the eattr was found
6894 + * location will be set)
6895 + * 0 if find_eattr should keep on checking
6899 +find_direct_eattr(struct gfs_inode *ip, uint64_t blkno, char *name,
6900 + int name_len, int type, struct gfs_ea_location *location)
6903 + struct buffer_head *bh;
6904 + struct gfs_sbd *sdp = ip->i_sbd;
6905 + struct gfs_ea_header *curr, *prev = NULL;
6907 + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
6910 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
6912 + (struct gfs_ea_header *) ((bh)->b_data +
6913 + sizeof (struct gfs_meta_header));
6914 + if (curr->ea_type == GFS_EATYPE_UNUSED) {
6915 + if (GFS_EA_IS_LAST(curr))
6917 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
6919 + curr = GFS_EA_NEXT(curr);
6921 + if (type != curr->ea_type && ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
6922 + if (type == GFS_EATYPE_SYS)
6927 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
6929 + if (type == curr->ea_type && name_len == curr->ea_name_len &&
6930 + !memcmp(name, GFS_EA_NAME(curr), name_len)) {
6931 + location->bh = bh;
6932 + location->ea = curr;
6933 + location->prev = prev;
6937 + if (GFS_EA_IS_LAST(curr))
6940 + curr = GFS_EA_NEXT(curr);
6951 + * find_eattr - find a matching eattr
6953 + * Returns: 1 if ea found, 0 if no ea found, -EXXX on error
6956 +find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
6957 + struct gfs_ea_location *location)
6960 + struct buffer_head *bh;
6961 + struct gfs_sbd *sdp = ip->i_sbd;
6962 + uint64_t *eablk, *end;
6964 + memset(location, 0, sizeof (struct gfs_ea_location));
6966 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
6968 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
6969 + DIO_START | DIO_WAIT, &bh);
6972 + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
6974 + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
6977 + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
6978 + while (eablk < end && *eablk) {
6980 + find_direct_eattr(ip, gfs64_to_cpu(*eablk), name,
6981 + name_len, type, location);
6982 + if (err || location->ea)
6991 + find_direct_eattr(ip, ip->i_di.di_eattr, name, name_len,
6997 + return (location->ea != NULL);
7004 +make_space(struct gfs_inode *ip, struct buffer_head *bh, uint32_t size,
7005 + uint64_t blkno, struct gfs_ea_location *avail)
7007 + struct gfs_sbd *sdp = ip->i_sbd;
7008 + uint32_t free_size, avail_size;
7009 + struct gfs_ea_header *ea, *new_ea;
7013 + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7014 + ea = GFS_FIRST_EA(bh);
7015 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7016 + if (ea->ea_type == GFS_EATYPE_UNUSED) {
7017 + free_size = GFS_EA_REC_LEN(ea);
7018 + ea = GFS_EA_NEXT(ea);
7020 + while (free_size < size) {
7021 + free_size += (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
7022 + if (GFS_EA_IS_LAST(ea))
7024 + ea = GFS_EA_NEXT(ea);
7026 + if (free_size < size)
7028 + buf = gmalloc(avail_size);
7030 + free_size = avail_size;
7031 + ea = GFS_FIRST_EA(bh);
7032 + if (ea->ea_type == GFS_EATYPE_UNUSED)
7033 + ea = GFS_EA_NEXT(ea);
7034 + new_ea = (struct gfs_ea_header *) buf;
7035 + new_ea->ea_flags = 0;
7036 + new_ea->ea_rec_len = cpu_to_gfs32(size);
7037 + new_ea->ea_num_ptrs = 0;
7038 + new_ea->ea_type = GFS_EATYPE_UNUSED;
7039 + free_size -= size;
7040 + new_ea = GFS_EA_NEXT(new_ea);
7042 + memcpy(new_ea, ea, GFS_EA_SIZE(ea));
7043 + if (GFS_EA_IS_LAST(ea))
7045 + new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
7046 + free_size -= GFS_EA_SIZE(ea);
7047 + ea = GFS_EA_NEXT(ea);
7048 + new_ea = GFS_EA_NEXT(new_ea);
7050 + new_ea->ea_rec_len = cpu_to_gfs32(free_size);
7051 + memcpy(GFS_FIRST_EA(bh), buf, avail_size);
7053 + avail->ea = GFS_FIRST_EA(bh);
7054 + avail->prev = NULL;
7062 +expand_to_indirect(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7063 + struct buffer_head **bh)
7066 + struct gfs_sbd *sdp = ip->i_sbd;
7067 + struct buffer_head *bh1 = NULL, *bh2 = NULL, *indbh = NULL;
7068 + uint64_t blkno, *blkptr;
7069 + uint32_t free_size, avail_size;
7070 + struct gfs_ea_header *prev, *curr, *new_ea = NULL;
7072 + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7073 + free_size = avail_size;
7074 + ip->i_di.di_flags |= GFS_DIF_EA_INDIRECT;
7075 + blkno = ip->i_di.di_eattr;
7076 + err = gfs_metaalloc(alloc_ip, &ip->i_di.di_eattr);
7079 + ip->i_di.di_blocks++;
7080 + err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_NEW | DIO_START |
7081 + DIO_WAIT, &indbh);
7086 + gfs_trans_add_bh(ip->i_gl, indbh);
7087 + gfs_metatype_set(sdp, indbh, GFS_METATYPE_IN, GFS_FORMAT_IN);
7088 + memset((indbh)->b_data + sizeof (struct gfs_meta_header), 0,
7089 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
7090 + blkptr = (uint64_t *) ((indbh)->b_data + sizeof (struct gfs_indirect));
7091 + *blkptr++ = cpu_to_gfs64(blkno);
7093 + curr = GFS_FIRST_EA(bh1);
7094 + while (curr->ea_type != GFS_EATYPE_USR) {
7095 + if (GFS_EA_IS_LAST(curr))
7097 + free_size -= GFS_EA_REC_LEN(curr);
7099 + curr = GFS_EA_NEXT(curr);
7101 + if (!prev || prev->ea_type == GFS_EATYPE_UNUSED)
7103 + gfs_trans_add_bh(ip->i_gl, bh1);
7104 + prev->ea_rec_len = cpu_to_gfs32(GFS_EA_REC_LEN(prev) + free_size);
7105 + prev->ea_flags |= GFS_EAFLAG_LAST;
7106 + bh2 = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
7111 + free_size = avail_size;
7112 + new_ea = GFS_FIRST_EA(bh2);
7114 + memcpy(new_ea, curr, GFS_EA_SIZE(curr));
7115 + if (GFS_EA_IS_LAST(curr))
7117 + new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(curr));
7118 + free_size -= GFS_EA_SIZE(curr);
7119 + curr = GFS_EA_NEXT(curr);
7120 + new_ea = GFS_EA_NEXT(new_ea);
7122 + new_ea->ea_rec_len = cpu_to_gfs32(free_size);
7123 + *blkptr = cpu_to_gfs64(blkno);
7134 +find_direct_sys_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7135 + struct gfs_ea_location *avail)
7137 + struct gfs_ea_header *curr, *prev = NULL;
7139 + curr = GFS_FIRST_EA(bh);
7140 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7141 + if (curr->ea_type == GFS_EATYPE_UNUSED) {
7142 + if (GFS_EA_REC_LEN(curr) >= size) {
7144 + avail->prev = NULL;
7149 + curr = GFS_EA_NEXT(curr);
7151 + while (curr->ea_type == GFS_EATYPE_SYS) {
7152 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7153 + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7155 + avail->prev = prev;
7159 + if (GFS_EA_IS_LAST(curr))
7162 + curr = GFS_EA_NEXT(curr);
7164 + make_space(ip, bh, size, ip->i_di.di_eattr, avail);
7171 + * int find_indirect_space
7174 + * @blktype: returns the type of block GFS_EATYPE_...
7176 + * returns 0 on success, -EXXX on failure
7179 +find_indirect_space(struct gfs_inode *ip, uint64_t blkno, int type,
7180 + int size, struct gfs_ea_location *avail, int *blktype)
7183 + struct buffer_head *bh;
7184 + struct gfs_sbd *sdp = ip->i_sbd;
7185 + struct gfs_ea_header *curr, *prev = NULL;
7187 + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
7190 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7191 + curr = GFS_FIRST_EA(bh);
7192 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7193 + if (curr->ea_type == GFS_EATYPE_UNUSED) {
7194 + if (GFS_EA_IS_LAST(curr)) {
7196 + avail->prev = NULL;
7198 + *blktype = GFS_EATYPE_UNUSED;
7202 + curr = GFS_EA_NEXT(curr);
7204 + if (type != curr->ea_type) {
7205 + *blktype = curr->ea_type;
7209 + if (prev && GFS_EA_REC_LEN(prev) >= size) {
7211 + avail->prev = NULL;
7216 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7217 + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7219 + avail->prev = prev;
7223 + if (GFS_EA_IS_LAST(curr))
7226 + curr = GFS_EA_NEXT(curr);
7237 +find_indirect_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7238 + int size, struct buffer_head *bh,
7239 + struct gfs_ea_location *avail)
7242 + struct gfs_sbd *sdp = ip->i_sbd;
7243 + uint64_t *eablk, *end, *first_usr_blk = NULL;
7247 + eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
7249 + eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7251 + while (eablk < end && *eablk) {
7253 + find_indirect_space(ip, gfs64_to_cpu(*eablk),
7254 + GFS_EATYPE_SYS, size, avail, &blktype);
7257 + if (blktype == GFS_EATYPE_USR && !first_usr_blk)
7258 + first_usr_blk = eablk;
7260 + if (!first_usr_blk)
7262 + gfs_trans_add_bh(ip->i_gl, bh);
7264 + *eablk = *first_usr_blk;
7265 + *first_usr_blk = blkno;
7270 + if (eablk >= end) {
7274 + avail->bh = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
7279 + avail->ea = GFS_FIRST_EA(avail->bh);
7280 + avail->prev = NULL;
7281 + gfs_trans_add_bh(ip->i_gl, bh);
7282 + if (first_usr_blk) {
7283 + *eablk = *first_usr_blk;
7284 + *first_usr_blk = cpu_to_gfs64(blkno);
7286 + *eablk = cpu_to_gfs64(blkno);
7293 +find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
7294 + struct gfs_ea_location *avail)
7297 + struct buffer_head *bh;
7298 + struct gfs_sbd *sdp = ip->i_sbd;
7301 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
7306 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7307 + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
7308 + err = find_indirect_sys_space(alloc_ip, ip, size, bh, avail);
7310 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7311 + find_direct_sys_space(ip, size, bh, avail);
7313 + err = expand_to_indirect(alloc_ip, ip, &bh);
7317 + find_indirect_sys_space(alloc_ip, ip, size, bh,
7323 + if (avail->bh != bh)
7331 +get_blk_type(struct gfs_inode *ip, uint64_t blkno, int *blktype)
7334 + struct gfs_sbd *sdp = ip->i_sbd;
7335 + struct buffer_head *bh;
7336 + struct gfs_ea_header *ea;
7338 + err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
7341 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7342 + ea = GFS_FIRST_EA(bh);
7343 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7344 + if (ea->ea_type == GFS_EATYPE_UNUSED) {
7345 + if (GFS_EA_IS_LAST(ea)) {
7346 + *blktype = GFS_EATYPE_UNUSED;
7349 + ea = GFS_EA_NEXT(ea);
7350 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7352 + *blktype = ea->ea_type;
7362 +find_direct_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7363 + struct gfs_ea_location *avail)
7365 + struct gfs_ea_header *curr, *prev = NULL;
7367 + curr = GFS_FIRST_EA(bh);
7368 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7369 + if (curr->ea_type == GFS_EATYPE_UNUSED) {
7370 + if (GFS_EA_IS_LAST(curr)) {
7372 + avail->prev = NULL;
7377 + curr = GFS_EA_NEXT(curr);
7378 + if (curr->ea_type == GFS_EATYPE_USR
7379 + && GFS_EA_REC_LEN(prev) >= size) {
7381 + avail->prev = NULL;
7386 + while (curr->ea_type != GFS_EATYPE_USR) {
7387 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7388 + if (GFS_EA_IS_LAST(curr))
7391 + curr = GFS_EA_NEXT(curr);
7394 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7395 + if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7397 + avail->prev = prev;
7401 + if (GFS_EA_IS_LAST(curr))
7404 + curr = GFS_EA_NEXT(curr);
7412 +find_indirect_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
7413 + struct gfs_ea_location *avail)
7416 + struct gfs_sbd *sdp = ip->i_sbd;
7417 + uint64_t *eablk, *end, *last_sys_blk = NULL, *first_usr_blk = NULL;
7421 + eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
7423 + eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7425 + while (eablk < end && *eablk) {
7427 + find_indirect_space(ip, gfs64_to_cpu(*eablk),
7428 + GFS_EATYPE_USR, size, avail, &blktype);
7431 + if (blktype == GFS_EATYPE_SYS)
7432 + last_sys_blk = eablk;
7433 + if (blktype == GFS_EATYPE_USR && !first_usr_blk)
7434 + first_usr_blk = eablk;
7436 + if (first_usr_blk)
7438 + first_usr_blk = eablk + 1;
7439 + while (first_usr_blk < end && *first_usr_blk) {
7442 + gfs64_to_cpu(*first_usr_blk),
7444 + if (blktype == GFS_EATYPE_SYS)
7445 + last_sys_blk = first_usr_blk;
7446 + if (blktype == GFS_EATYPE_USR)
7450 + if (last_sys_blk > eablk) {
7451 + gfs_trans_add_bh(ip->i_gl, bh);
7453 + *eablk = *last_sys_blk;
7454 + *last_sys_blk = blkno;
7461 + if (eablk >= end) {
7465 + avail->bh = alloc_eattr_blk(sdp, ip, ip, &blkno);
7470 + avail->ea = GFS_FIRST_EA(avail->bh);
7471 + avail->prev = NULL;
7472 + gfs_trans_add_bh(ip->i_gl, bh);
7473 + *eablk = cpu_to_gfs64(blkno);
7480 +find_usr_space(struct gfs_inode *ip, int size, struct gfs_ea_location *avail)
7483 + struct buffer_head *bh;
7484 + struct gfs_sbd *sdp = ip->i_sbd;
7487 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
7492 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7493 + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
7494 + err = find_indirect_usr_space(ip, size, bh, avail);
7496 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7497 + find_direct_usr_space(ip, size, bh, avail);
7499 + err = expand_to_indirect(ip, ip, &bh);
7502 + err = find_indirect_usr_space(ip, size, bh, avail);
7507 + if (avail->bh != bh)
7515 +find_space(struct gfs_inode *ip, int size, int type,
7516 + struct gfs_ea_location *avail)
7520 + memset(avail, 0, sizeof (struct gfs_ea_location));
7522 + if (type == GFS_EATYPE_SYS)
7523 + err = find_sys_space(ip, ip, size, avail);
7525 + err = find_usr_space(ip, size, avail);
7531 +can_replace_in_block(struct gfs_inode *ip, int size,
7532 + struct gfs_ea_location found, struct gfs_ea_header **space)
7534 + struct gfs_ea_header *curr, *prev = NULL;
7537 + curr = GFS_FIRST_EA(found.bh);
7538 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7539 + if (curr->ea_type == GFS_EATYPE_UNUSED) {
7540 + if (GFS_EA_REC_LEN(curr) >= size) {
7545 + curr = GFS_EA_NEXT(curr);
7548 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
7549 + if (curr == found.ea) {
7551 + * See if there will be enough space after the old version of the eattr
7555 + if (prev->ea_type == GFS_EATYPE_UNUSED) {
7556 + if (GFS_EA_REC_LEN(prev) +
7557 + GFS_EA_REC_LEN(curr) >= size) {
7561 + } else if (GFS_EA_REC_LEN(prev) +
7562 + GFS_EA_REC_LEN(curr) >=
7563 + GFS_EA_SIZE(prev) + size) {
7567 + } else if (GFS_EA_REC_LEN(curr) >= size) {
7571 + } else if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
7575 + if (GFS_EA_IS_LAST(curr))
7578 + curr = GFS_EA_NEXT(curr);
7582 + return (*space != NULL);
7586 + * read_unstuffed - actually copies the unstuffed data into the
7591 +read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
7592 + struct gfs_ea_header *ea, uint32_t avail_size,
7593 + gfs_ea_copy_fn_t copy_fn)
7595 + struct buffer_head *bh[66]; /* This is the maximum number of data ptrs possible */
7597 + int max = GFS_EADATA_NUM_PTRS(GFS_EA_DATA_LEN(ea), avail_size);
7598 + int i, j, left = GFS_EA_DATA_LEN(ea);
7599 + char *outptr, *buf;
7600 + uint64_t *indptr = GFS_EA_DATA_PTRS(ea);
7602 + for (i = 0; i < max; i++) {
7604 + gfs_dread(sdp, gfs64_to_cpu(*indptr), ip->i_gl, DIO_START,
7608 + for (j = 0; j < i; j++)
7616 + for (i = 0; i < max; i++) {
7617 + err = gfs_dreread(sdp, bh[i], DIO_WAIT);
7619 + for (j = i; j < max; j++)
7623 + gfs_metatype_check(sdp, bh[i], GFS_METATYPE_EA);
7624 + buf = (bh[i])->b_data + sizeof (struct gfs_meta_header);
7626 + copy_fn(outptr, buf,
7627 + (avail_size > left) ? left : avail_size);
7629 + for (j = i; j < max; j++)
7633 + left -= avail_size;
7634 + outptr += avail_size;
7644 + * functionname - summary
7645 + * @param1: description
7646 + * @param2: description
7647 + * @param3: description
7649 + * Function description
7651 + * Returns: what is returned
7654 +get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
7655 + gfs_ea_copy_fn_t copy_fn)
7658 + struct gfs_ea_location location;
7659 + uint32_t avail_size;
7661 + avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7663 + err = find_eattr(ip, req->eg_name, req->eg_name_len, req->eg_type,
7671 + if (req->eg_data_len) {
7672 + if (req->eg_data_len < GFS_EA_DATA_LEN(location.ea))
7674 + else if (GFS_EA_IS_UNSTUFFED(location.ea))
7676 + read_unstuffed(req->eg_data, ip, sdp, location.ea,
7677 + avail_size, copy_fn);
7679 + err = copy_fn(req->eg_data, GFS_EA_DATA(location.ea),
7680 + GFS_EA_DATA_LEN(location.ea));
7682 + err = GFS_EA_DATA_LEN(location.ea);
7684 + err = GFS_EA_DATA_LEN(location.ea);
7686 + brelse(location.bh);
7693 + * functionname - summary
7694 + * @param1: description
7695 + * @param2: description
7696 + * @param3: description
7698 + * Function description
7700 + * Returns: what is returned
7703 +struct gfs_ea_header *
7704 +prep_ea(struct gfs_ea_header *ea)
7706 + struct gfs_ea_header *new = ea;
7708 + if (ea->ea_type == GFS_EATYPE_UNUSED) {
7709 + if (GFS_EA_IS_LAST(ea))
7710 + ea->ea_flags = GFS_EAFLAG_LAST;
7714 + new = GFS_EA_FREESPACE(ea);
7716 + cpu_to_gfs32(GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
7717 + ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
7718 + if (GFS_EA_IS_LAST(ea)) {
7719 + ea->ea_flags &= ~GFS_EAFLAG_LAST;
7720 + new->ea_flags = GFS_EAFLAG_LAST;
7722 + new->ea_flags = 0;
7729 + * replace_ea - replaces the existing data with the request data
7732 +replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_ea_header *ea,
7733 + struct gfs_easet_io *req)
7737 + uint32_t copy_size, data_left = req->es_data_len;
7738 + struct buffer_head *bh;
7739 + uint64_t *datablk = GFS_EA_DATA_PTRS(ea);
7740 + const char *dataptr = req->es_data;
7741 + uint32_t avail_size =
7742 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7744 + ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
7745 + if (!GFS_EA_IS_UNSTUFFED(ea))
7746 + memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
7748 + for (i = 0; i < ea->ea_num_ptrs && data_left > 0; i++) {
7749 + err = gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
7750 + DIO_START | DIO_WAIT, &bh);
7753 + gfs_trans_add_bh(ip->i_gl, bh);
7754 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
7756 + (data_left > avail_size) ? avail_size : data_left;
7757 + memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
7758 + dataptr, copy_size);
7759 + dataptr += copy_size;
7760 + data_left -= copy_size;
7764 + GFS_ASSERT_INODE(data_left == 0, ip,
7766 + ("req->es_data_len = %u, ea->ea_num_ptrs = %d\n",
7767 + req->es_data_len, ea->ea_num_ptrs);
7776 + * write_ea - writes the request info to an ea, creating new blocks if
7779 + * @sdp: superblock pointer
7780 + * @alloc_ip: inode that has the blocks reserved for allocation
7781 + * @ip: inode that is being modified
7782 + * @ea: the location of the new ea in a block
7783 + * @req: the write request
7785 + * Note: does not update ea_rec_len or the GFS_EAFLAG_LAST bin of ea_flags
7787 + * returns : 0 on success, -EXXX on error
7791 +write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, struct gfs_inode *ip,
7792 + struct gfs_ea_header *ea, struct gfs_easet_io *req)
7797 + const char *dataptr;
7798 + uint32_t data_left, copy;
7799 + uint32_t avail_size =
7800 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
7802 + struct buffer_head *bh = NULL;
7804 + ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
7805 + ea->ea_name_len = req->es_name_len;
7806 + ea->ea_type = req->es_type;
7809 + memcpy(GFS_EA_NAME(ea), req->es_name, req->es_name_len);
7811 + if (GFS_EAREQ_IS_STUFFED(req, avail_size)) {
7812 + ea->ea_num_ptrs = 0;
7813 + memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
7815 + blkptr = GFS_EA_DATA_PTRS(ea);
7816 + dataptr = req->es_data;
7817 + data_left = req->es_data_len;
7819 + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
7821 + for (i = 0; i < ea->ea_num_ptrs; i++) {
7823 + alloc_eattr_blk(sdp, alloc_ip, ip,
7824 + &temp)) == NULL) {
7829 + (data_left > avail_size) ? avail_size : data_left;
7830 + memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
7832 + *blkptr = cpu_to_gfs64(temp);
7834 + data_left -= copy;
7839 + GFS_ASSERT_INODE(!data_left, ip,);
7848 + * erase_ea_data_ptrs - deallocate all the unstuffed data blocks pointed to
7849 + * ea records in this block
7850 + * @sdp: the superblock
7852 + * @blk: the block to check for data pointers
7855 + * Returns: 0 on success, -EXXX on failure
7859 +erase_ea_data_ptrs(struct gfs_sbd *sdp, struct gfs_inode *ip,
7860 + struct buffer_head *dibh, uint64_t blk)
7862 + struct gfs_holder rgd_gh;
7864 + uint64_t *datablk;
7865 + struct buffer_head *eabh;
7867 + struct gfs_ea_header *ea;
7868 + struct gfs_rgrpd *rgd = NULL;
7870 + err = gfs_dread(sdp, blk, ip->i_gl, DIO_WAIT | DIO_START, &eabh);
7874 + gfs_metatype_check(sdp, eabh, GFS_METATYPE_EA);
7875 + buf = (eabh)->b_data + sizeof (struct gfs_meta_header);
7876 + ea = (struct gfs_ea_header *) buf;
7879 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
7880 + if (GFS_EA_IS_UNSTUFFED(ea)) {
7881 + datablk = GFS_EA_DATA_PTRS(ea);
7882 + rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*datablk));
7883 + GFS_ASSERT_INODE(rgd, ip,
7884 + printk("block = %" PRIu64 "\n",
7885 + gfs64_to_cpu(*datablk)););
7887 + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
7891 + /* Trans may require:
7892 + One block for the RG header. One block for each ea data block. One
7893 + One block for the dinode. One block for the current ea block.
7894 + One block for a quote change.
7897 + gfs_trans_begin(sdp,
7898 + 3 + ea->ea_num_ptrs, 1);
7900 + goto fail_glock_rg;
7901 + gfs_trans_add_bh(ip->i_gl, dibh);
7902 + for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
7903 + gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
7904 + ip->i_di.di_blocks--;
7906 + ea->ea_num_ptrs = 0;
7907 + gfs_trans_add_bh(ip->i_gl, eabh);
7908 + gfs_dinode_out(&ip->i_di, (dibh)->b_data);
7909 + gfs_trans_end(sdp);
7910 + gfs_glock_dq_uninit(&rgd_gh);
7912 + if (GFS_EA_IS_LAST(ea))
7914 + ea = GFS_EA_NEXT(ea);
7922 + gfs_glock_dq_uninit(&rgd_gh);
7932 + * gfs_ea_dealloc - deallocate the extended attribute fork
7935 + * Returns: 0 on success, -EXXX on failure
7939 +gfs_ea_dealloc(struct gfs_inode *ip)
7941 + struct gfs_holder ri_gh, rgd_gh;
7943 + struct gfs_sbd *sdp = ip->i_sbd;
7944 + struct buffer_head *dibh, *indbh = NULL;
7945 + uint64_t *startblk, *eablk, *end, *next;
7948 + struct gfs_rgrpd *rgd = NULL;
7950 + if (!ip->i_di.di_eattr)
7953 + gfs_alloc_get(ip);
7955 + err = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
7959 + err = gfs_rindex_hold(sdp, &ri_gh);
7961 + goto out_unhold_q;
7963 + err = gfs_get_inode_buffer(ip, &dibh);
7965 + goto out_rindex_release;
7967 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
7969 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
7970 + DIO_WAIT | DIO_START, &indbh);
7974 + gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN);
7977 + (uint64_t *) ((indbh)->b_data +
7978 + sizeof (struct gfs_indirect));
7981 + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
7983 + while (*eablk && eablk < end) {
7985 + erase_ea_data_ptrs(sdp, ip, dibh,
7986 + gfs64_to_cpu(*eablk));
7992 + startblk = eablk - 1;
7994 + (uint64_t *) ((indbh)->b_data +
7995 + sizeof (struct gfs_indirect));
7997 + while (startblk >= end) {
7998 + rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*startblk));
7999 + GFS_ASSERT_INODE(rgd, ip,);
8002 + next = eablk = startblk - 1;
8004 + while (eablk >= end) {
8006 + gfs_blk2rgrpd(sdp, gfs64_to_cpu(*eablk))) {
8007 + if (eablk != next) {
8019 + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
8022 + goto out_rindex_release;
8024 + /* Trans may require:
8025 + One block for the RG header. One block for each block from this
8026 + resource group. One block for the indirect ea block,
8027 + One block for the quote change */
8030 + gfs_trans_begin(sdp, 3 + num_blks,
8033 + goto out_gunlock_rg;
8035 + gfs_trans_add_bh(ip->i_gl, dibh);
8037 + while (startblk > next) {
8038 + gfs_metafree(ip, gfs64_to_cpu(*startblk), 1);
8039 + ip->i_di.di_blocks--;
8044 + gfs_trans_add_bh(ip->i_gl, indbh);
8045 + gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8047 + gfs_trans_end(sdp);
8049 + gfs_glock_dq_uninit(&rgd_gh);
8055 + err = erase_ea_data_ptrs(sdp, ip, dibh, ip->i_di.di_eattr);
8057 + goto out_rindex_release;
8060 + rgd = gfs_blk2rgrpd(sdp, ip->i_di.di_eattr);
8061 + GFS_ASSERT_INODE(rgd, ip,
8062 + printk("block = %" PRIu64 "\n", ip->i_di.di_eattr);
8065 + err = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
8067 + goto out_rindex_release;
8069 + err = gfs_trans_begin(sdp, 3, 1);
8071 + goto out_gunlock_rg;
8073 + gfs_metafree(ip, ip->i_di.di_eattr, 1);
8075 + ip->i_di.di_blocks--;
8076 + ip->i_di.di_eattr = 0;
8078 + gfs_trans_add_bh(ip->i_gl, dibh);
8079 + gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8081 + gfs_trans_end(sdp);
8084 + gfs_glock_dq_uninit(&rgd_gh);
8093 + out_rindex_release:
8094 + gfs_glock_dq_uninit(&ri_gh);
8097 + gfs_quota_unhold_m(ip);
8100 + gfs_alloc_put(ip);
8108 + * functionname - summary
8109 + * @param1: description
8110 + * @param2: description
8111 + * @param3: description
8113 + * Function description
8115 + * Returns: what is returned
8119 +remove_ea(struct gfs_inode *ip, struct gfs_ea_header *ea,
8120 + struct gfs_ea_header *prev)
8122 + uint64_t *datablk;
8125 + if (GFS_EA_IS_UNSTUFFED(ea)) {
8126 + datablk = GFS_EA_DATA_PTRS(ea);
8127 + for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
8128 + gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
8129 + ip->i_di.di_blocks--;
8133 + ea->ea_type = GFS_EATYPE_UNUSED;
8134 + ea->ea_num_ptrs = 0;
8136 + if (prev && prev != ea) {
8137 + prev->ea_rec_len =
8138 + cpu_to_gfs32(GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea));
8139 + if (GFS_EA_IS_LAST(ea))
8140 + prev->ea_flags |= GFS_EAFLAG_LAST;
8145 +init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
8146 + struct gfs_easet_io *req)
8149 + struct buffer_head *bh;
8150 + struct gfs_sbd *sdp = ip->i_sbd;
8151 + struct gfs_ea_header *ea;
8153 + err = gfs_metaalloc(dip, &ip->i_di.di_eattr);
8157 + err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
8158 + DIO_NEW | DIO_START | DIO_WAIT, &bh);
8162 + gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
8164 + ip->i_di.di_blocks++;
8166 + ea = GFS_FIRST_EA(bh);
8167 + ea->ea_flags = GFS_EAFLAG_LAST;
8169 + cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
8170 + ea->ea_num_ptrs = 0;
8171 + ea->ea_type = GFS_EATYPE_UNUSED;
8172 + err = write_ea(sdp, dip, ip, ea, req);
8176 + gfs_trans_add_bh(ip->i_gl, bh);
8186 +do_init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8187 + struct gfs_easet_io *req)
8190 + struct buffer_head *bh;
8191 + struct gfs_ea_header *ea;
8193 + bh = alloc_eattr_blk(sdp, ip, ip, &ip->i_di.di_eattr);
8195 + ea = GFS_FIRST_EA(bh);
8196 + err = write_ea(sdp, ip, ip, ea, req);
8205 + * init_eattr - initializes a new eattr block
8209 +init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req)
8212 + struct gfs_alloc *al;
8213 + uint32_t ea_metablks;
8214 + struct buffer_head *dibh;
8215 + struct posix_acl *acl = NULL;
8216 + uint32_t avail_size =
8217 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8220 + GFS_EAREQ_IS_STUFFED(req,
8221 + avail_size) ? 1 : (1 +
8222 + GFS_EADATA_NUM_PTRS(req->
8226 + if (IS_ACCESS_ACL(req->es_name, req->es_name_len)){
8227 + acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
8228 + if (IS_ERR(acl)) {
8229 + err = PTR_ERR(acl);
8234 + al = gfs_alloc_get(ip);
8236 + err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
8240 + al->al_requested_meta = ea_metablks;
8242 + err = gfs_inplace_reserve(ip);
8244 + goto out_gunlock_q;
8246 + err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
8250 + err = gfs_get_inode_buffer(ip, &dibh);
8254 + /* Trans may require:
8255 + A modified dinode, multiple EA metadata blocks, and all blocks for a RG
8259 + gfs_trans_begin(sdp,
8260 + 1 + ea_metablks + al->al_rgd->rd_ri.ri_length, 1);
8264 + err = do_init_eattr(sdp, ip, req);
8266 + goto out_end_trans;
8269 + gfs_acl_set_mode(ip, acl);
8271 + gfs_trans_add_bh(ip->i_gl, dibh);
8272 + gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8275 + gfs_trans_end(sdp);
8281 + gfs_inplace_release(ip);
8284 + gfs_quota_unlock_m(ip);
8287 + gfs_alloc_put(ip);
8288 + posix_acl_release(acl);
8295 + * alloc_eattr_blk - allocates a new block for extended attributes.
8296 + * @sdp: A pointer to the superblock
8297 + * @alloc_ip: A pointer to the inode that has reserved the blocks for
8299 + * @ip: A pointer to the inode that's getting extended attributes
8300 + * @block: the block allocated
8302 + * Returns: the buffer head on success, NULL on failure
8305 +static struct buffer_head *
8306 +alloc_eattr_blk(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
8307 + struct gfs_inode *ip, uint64_t * block)
8310 + struct buffer_head *bh = NULL;
8311 + struct gfs_ea_header *ea;
8313 + err = gfs_metaalloc(alloc_ip, block);
8318 + gfs_dread(sdp, *block, ip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh);
8322 + gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
8324 + ip->i_di.di_blocks++;
8326 + ea = GFS_FIRST_EA(bh);
8327 + ea->ea_flags = GFS_EAFLAG_LAST;
8329 + cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
8330 + ea->ea_num_ptrs = 0;
8331 + ea->ea_type = GFS_EATYPE_UNUSED;
8333 + gfs_trans_add_bh(ip->i_gl, bh);
8341 + * functionname - summary
8342 + * @param1: description
8343 + * @param2: description
8344 + * @param3: description
8346 + * Function description
8348 + * Returns: what is returned
8352 +list_direct_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
8353 + struct buffer_head *bh, struct gfs_eaget_io *req,
8354 + gfs_ea_copy_fn_t copy_fn, uint32_t * size)
8357 + struct gfs_ea_header *ea;
8361 + gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
8363 + ea = (struct gfs_ea_header *) ((bh)->b_data +
8364 + sizeof (struct gfs_meta_header));
8365 + if (ea->ea_type == GFS_EATYPE_UNUSED) {
8366 + if (GFS_EA_IS_LAST(ea))
8369 + ea = GFS_EA_NEXT(ea);
8373 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
8375 + if (req->eg_data_len) {
8376 + if (*size > req->eg_data_len) {
8382 + GFS_ASSERT_INODE(GFS_EATYPE_VALID(ea->ea_type), ip,);
8383 + if (ea->ea_type == GFS_EATYPE_USR) {
8384 + memcpy(ptr, "user.", 5);
8387 + memcpy(ptr, "system.", 7);
8390 + memcpy(ptr, GFS_EA_NAME(ea), ea->ea_name_len);
8391 + ptr += ea->ea_name_len;
8394 + copy_fn(req->eg_data + *size, buf,
8395 + GFS_EA_STRLEN(ea));
8400 + *size = *size + GFS_EA_STRLEN(ea);
8402 + if (GFS_EA_IS_LAST(ea))
8404 + ea = GFS_EA_NEXT(ea);
8413 + * functionname - summary
8414 + * @param1: description
8415 + * @param2: description
8416 + * @param3: description
8418 + * Function description
8420 + * Returns: what is returned
8424 +list_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
8425 + gfs_ea_copy_fn_t copy_fn)
8428 + struct buffer_head *bh, *eabh;
8429 + uint64_t *eablk, *end;
8430 + uint32_t size = 0;
8433 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
8438 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
8439 + gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
8441 + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
8444 + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
8446 + while (*eablk && eablk < end) {
8448 + gfs_dread(sdp, gfs64_to_cpu(*eablk), ip->i_gl,
8449 + DIO_START | DIO_WAIT, &eabh);
8452 + err = list_direct_ea(sdp, ip, eabh, req, copy_fn, &size);
8459 + err = list_direct_ea(sdp, ip, bh, req, copy_fn, &size);
8476 + * gfs_get_eattr - read an extended attribute, or a list of ea names
8477 + * @sdp: pointer to the superblock
8478 + * @ip: pointer to the inode for the target file
8479 + * @req: the request information
8480 + * @copy_fn: the function to use to do the actual copying
8482 + * Returns: actual size of data on success, -EXXX on error
8485 +gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8486 + struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn)
8488 + struct gfs_holder i_gh;
8491 + if (req->eg_name) {
8492 + err = gfs_ea_read_permission(req, ip);
8497 + /* This seems to be a read. Are we sure we don't want to acquire the lock in LM_ST_SHARED? */
8499 + err = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
8503 + if (ip->i_di.di_eattr == 0) {
8504 + if (!req->eg_name) {
8505 + if (!req->eg_data_len && req->eg_len) {
8506 + uint32_t no_data = 0;
8509 + copy_fn(req->eg_len, &no_data,
8510 + sizeof (uint32_t));
8519 + err = get_ea(sdp, ip, req, copy_fn);
8521 + err = list_ea(sdp, ip, req, copy_fn);
8524 + gfs_glock_dq_uninit(&i_gh);
8532 +do_set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
8533 + struct gfs_ea_location location)
8537 + uint32_t avail_size =
8538 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8539 + struct gfs_ea_location space;
8541 + req_size = get_req_size(req, avail_size);
8543 + if (location.ea) {
8544 + struct gfs_ea_header *new_space;
8545 + if (req->es_cmd == GFS_EACMD_REMOVE) {
8546 + remove_ea(ip, location.ea, location.prev);
8547 + gfs_trans_add_bh(ip->i_gl, location.bh);
8550 + if (can_replace(location.ea, req, avail_size)) {
8551 + err = replace_ea(sdp, ip, location.ea, req);
8553 + gfs_trans_add_bh(ip->i_gl, location.bh);
8557 + * This part is kind of confusing. If the inode has direct EAs
8558 + * Then adding another EA can't run it out of space, so it is safe to
8559 + * delete the EA before looking for space. If the inode has indirect
8560 + * EAs, there may not be enough space left, so first you check for space
8561 + * and they you delete the EA.
8563 + if ((ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) == 0) {
8564 + remove_ea(ip, location.ea, location.prev);
8565 + err = find_space(ip, req_size, req->es_type, &space);
8568 + new_space = prep_ea(space.ea);
8569 + err = write_ea(sdp, ip, ip, new_space, req);
8571 + gfs_trans_add_bh(ip->i_gl, location.bh);
8572 + gfs_trans_add_bh(ip->i_gl, space.bh);
8577 + if (can_replace_in_block(ip, req_size, location, &new_space)) {
8578 + remove_ea(ip, location.ea, location.prev);
8579 + new_space = prep_ea(new_space);
8580 + err = write_ea(sdp, ip, ip, new_space, req);
8582 + gfs_trans_add_bh(ip->i_gl, location.bh);
8585 + err = find_space(ip, req_size, req->es_type, &space);
8587 + /* You can return a non IO error here. If there is no space left,
8588 + * you can return -ENOSPC. So you must not have added a buffer to
8589 + * the transaction yet.
8592 + remove_ea(ip, location.ea, location.prev);
8593 + new_space = prep_ea(space.ea);
8594 + err = write_ea(sdp, ip, ip, new_space, req);
8596 + gfs_trans_add_bh(ip->i_gl, location.bh);
8597 + gfs_trans_add_bh(ip->i_gl, space.bh);
8602 + err = find_space(ip, req_size, req->es_type, &space);
8604 + /* you can also get -ENOSPC here */
8606 + space.ea = prep_ea(space.ea);
8607 + err = write_ea(sdp, ip, ip, space.ea, req);
8609 + gfs_trans_add_bh(ip->i_gl, space.bh);
8617 +set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
8618 + struct gfs_ea_location location)
8621 + struct gfs_alloc *al;
8622 + struct gfs_rgrpd *rgd = NULL;
8623 + struct buffer_head *dibh;
8624 + uint32_t avail_size =
8625 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8626 + int unstuffed_ea_blks = 0;
8627 + struct gfs_holder ri_gh, rgd_gh;
8628 + struct posix_acl *acl = NULL;
8630 + if (IS_ACCESS_ACL(req->es_name, req->es_name_len) && req->es_data){
8631 + acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
8632 + if (IS_ERR(acl)) {
8633 + err = PTR_ERR(acl);
8638 + err = gfs_get_inode_buffer(ip, &dibh);
8641 + al = gfs_alloc_get(ip);
8643 + err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
8648 + * worst case, you need to switch from direct to indirect, which can
8649 + * take up to 3 new blocks, and you need to create enough unstuffed data
8650 + * blocks to hold all the data
8652 + al->al_requested_meta = 3 + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
8654 + err = gfs_inplace_reserve(ip);
8656 + goto out_lock_quota;
8658 + err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
8662 + if (location.ea && GFS_EA_IS_UNSTUFFED(location.ea)) {
8664 + * If there is an EA, we might need to delete it.
8665 + * Since all unstuffed data blocks are added at the same time,
8666 + * they are all from the same resource group.
8668 + err = gfs_rindex_hold(sdp, &ri_gh);
8672 + gfs_blk2rgrpd(sdp,
8673 + gfs64_to_cpu(*GFS_EA_DATA_PTRS(location.ea)));
8674 + GFS_ASSERT_INODE(rgd, ip,
8675 + printk("block = %" PRIu64 "\n",
8676 + gfs64_to_cpu(*GFS_EA_DATA_PTRS
8680 + gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
8683 + unstuffed_ea_blks = location.ea->ea_num_ptrs;
8687 + * The transaction may require:
8688 + * Modifying the dinode block, Modifying the indirect ea block,
8689 + * modifying an ea block, all the allocation blocks, all the blocks for
8690 + * a RG bitmap, the RG header block, a RG block for each unstuffed data
8691 + * block you might be deleting.
8693 + err = gfs_trans_begin(sdp, 4 + al->al_requested_meta +
8694 + al->al_rgd->rd_ri.ri_length + unstuffed_ea_blks,
8699 + err = do_set_ea(sdp, ip, req, location);
8703 + gfs_acl_set_mode(ip, acl);
8704 + gfs_trans_add_bh(ip->i_gl, dibh);
8705 + gfs_dinode_out(&ip->i_di, (dibh)->b_data);
8708 + gfs_trans_end(sdp);
8712 + gfs_glock_dq_uninit(&rgd_gh);
8716 + gfs_glock_dq_uninit(&ri_gh);
8719 + gfs_inplace_release(ip);
8722 + gfs_quota_unlock_m(ip);
8725 + gfs_alloc_put(ip);
8729 + posix_acl_release(acl);
8736 + * gfs_set_eattr - sets (or creates or replaces) an extended attribute
8737 + * @sdp: pointer to the superblock
8738 + * @ip: pointer to the inode of the target file
8739 + * @req: request information
8741 + * Returns: 0 on success -EXXX on error
8744 +gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
8745 + struct gfs_easet_io *req)
8747 + struct gfs_holder i_gh;
8749 + uint32_t req_size;
8750 + uint32_t avail_size =
8751 + sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
8752 + struct gfs_ea_location location;
8754 + if (!GFS_EACMD_VALID(req->es_cmd)) {
8755 + err = -EOPNOTSUPP;
8759 + if (strlen(req->es_name) == 0) {
8764 + err = gfs_ea_write_permission(req, ip);
8768 + if ((req_size = get_req_size(req, avail_size)) > avail_size) {
8769 + /* This can only happen with 512 byte blocks */
8773 + err = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
8777 + if (ip->i_di.di_eattr == 0) {
8778 + if (req->es_cmd == GFS_EACMD_REPLACE
8779 + || req->es_cmd == GFS_EACMD_REMOVE) {
8783 + err = init_eattr(sdp, ip, req);
8787 + err = find_eattr(ip, req->es_name, req->es_name_len, req->es_type,
8791 + if (err == 0 && (req->es_cmd == GFS_EACMD_REPLACE ||
8792 + req->es_cmd == GFS_EACMD_REMOVE)) {
8796 + err = set_ea(sdp, ip, req, location);
8800 + brelse(location.bh);
8803 + gfs_glock_dq_uninit(&i_gh);
8810 + * gfs_set_eattr_ioctl - creates, modifies, or removes an extended attribute.
8811 + * @sdp: pointer to the superblock
8812 + * @ip: a pointer to the gfs inode for the file
8813 + * @arg: a pointer to gfs_set_eattr_io_t struct with the request
8815 + * Notes: ioctl wrapper for gfs_set_eattr
8816 + * Returns: 0 on success, -EXXX or error
8820 +gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
8822 + struct gfs_easet_io req;
8824 + char *name = NULL;
8825 + char *data = NULL;
8827 + if (copy_from_user(&req, arg, sizeof (struct gfs_easet_io))) {
8832 + name = gmalloc(req.es_name_len);
8834 + if (req.es_data) {
8835 + data = gmalloc(req.es_data_len);
8837 + if (copy_from_user(data, req.es_data, req.es_data_len)) {
8842 + if (copy_from_user(name, req.es_name, req.es_name_len)) {
8846 + req.es_data = data;
8847 + req.es_name = name;
8848 + err = gfs_set_eattr(sdp, ip, &req);
8860 + * gfs_get_eattr_ioctl - gets the value for the requested attribute name,
8861 + * or a list of all the extended attribute names.
8862 + * @sdp: pointer to the superblock
8863 + * @ip: a pointer to the inode for the file
8864 + * @arg: a pointer to the struct gfs_eaget_io struct holding the request
8866 + * Notes: ioctl wrapper for the gfs_get_eattr function
8867 + * Returns: 0 on success, -EXXX on error.
8871 +gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
8873 + struct gfs_eaget_io req;
8875 + char *name = NULL;
8878 + if (copy_from_user(&req, arg, sizeof (struct gfs_eaget_io))) {
8883 + if (req.eg_name) {
8884 + name = gmalloc(req.eg_name_len);
8886 + if (copy_from_user(name, req.eg_name, req.eg_name_len)) {
8890 + req.eg_name = name;
8892 + result = gfs_get_eattr(sdp, ip, &req, gfs_ea_copy_to_user);
8898 + if (result >= 0) {
8901 + gfs_ea_copy_to_user(req.eg_len, &size, sizeof(uint32_t));
8910 + * functionname - summary
8911 + * @param1: description
8912 + * @param2: description
8913 + * @param3: description
8915 + * Function description
8917 + * Returns: what is returned
8921 +gfs_get_direct_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub,
8924 + struct gfs_sbd *sdp = ip->i_sbd;
8925 + struct buffer_head *databh, *bh;
8926 + struct gfs_ea_header *ea;
8927 + uint64_t *datablk;
8931 + error = gfs_dread(sdp, blk, ip->i_gl, DIO_START | DIO_WAIT, &bh);
8935 + error = gfs_add_bh_to_ub(ub, bh);
8937 + ea = (struct gfs_ea_header *) ((bh)->b_data +
8938 + sizeof (struct gfs_meta_header));
8940 + GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
8942 + datablk = GFS_EA_DATA_PTRS(ea);
8944 + for (i = 0; i < ea->ea_num_ptrs; i++) {
8946 + gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
8947 + DIO_START | DIO_WAIT, &databh);
8951 + error = gfs_add_bh_to_ub(ub, databh);
8961 + if (GFS_EA_IS_LAST(ea))
8963 + ea = GFS_EA_NEXT(ea);
8975 + * gfs_get_eattr_meta - return all the eattr blocks of a file
8976 + * @dip: the directory
8977 + * @ub: the structure representing the user buffer to copy to
8979 + * Returns: 0 on success, -EXXX on failure
8983 +gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
8985 + struct gfs_sbd *sdp = ip->i_sbd;
8986 + struct buffer_head *bh;
8988 + uint64_t *eablk, *end;
8990 + if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
8992 + gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
8993 + DIO_WAIT | DIO_START, &bh);
8997 + error = gfs_add_bh_to_ub(ub, bh);
9000 + (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
9003 + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
9005 + while (*eablk && eablk < end) {
9007 + gfs_get_direct_eattr_meta(ip, ub,
9008 + gfs64_to_cpu(*eablk));
9017 + error = gfs_get_direct_eattr_meta(ip, ub, ip->i_di.di_eattr);
9023 diff -urN linux-orig/fs/gfs/eattr.h linux-patched/fs/gfs/eattr.h
9024 --- linux-orig/fs/gfs/eattr.h 1969-12-31 18:00:00.000000000 -0600
9025 +++ linux-patched/fs/gfs/eattr.h 2004-06-30 13:27:49.338712290 -0500
9027 +/******************************************************************************
9028 +*******************************************************************************
9030 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9031 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9033 +** This copyrighted material is made available to anyone wishing to use,
9034 +** modify, copy, or redistribute it subject to the terms and conditions
9035 +** of the GNU General Public License v.2.
9037 +*******************************************************************************
9038 +******************************************************************************/
9040 +#ifndef __EATTR_DOT_H__
9041 +#define __EATTR_DOT_H__
9043 +#define GFS_EA_MAY_WRITE 1
9044 +#define GFS_EA_MAY_READ 2
9046 +#define GFS_EA_DATA_LEN(x) gfs32_to_cpu((x)->ea_data_len)
9047 +#define GFS_EA_IS_UNSTUFFED(x) ((x)->ea_num_ptrs)
9048 +#define GFS_EA_DATA(x) ((char *)(x) + sizeof(struct gfs_ea_header) + (x)->ea_name_len)
9050 +struct gfs_ea_location {
9051 + struct buffer_head *bh;
9052 + struct gfs_ea_header *ea;
9053 + struct gfs_ea_header *prev;
9056 +#define GFS_POSIX_ACL_ACCESS "posix_acl_access"
9057 +#define GFS_POSIX_ACL_ACCESS_LEN 16
9058 +#define GFS_POSIX_ACL_DEFAULT "posix_acl_default"
9059 +#define GFS_POSIX_ACL_DEFAULT_LEN 17
9061 +#define IS_ACCESS_ACL(name, len) \
9062 + ((len) == GFS_POSIX_ACL_ACCESS_LEN && \
9063 + !memcmp(GFS_POSIX_ACL_ACCESS, (name), (len)))
9065 +#define IS_DEFAULT_ACL(name, len) \
9066 + ((len) == GFS_POSIX_ACL_DEFAULT_LEN && \
9067 + !memcmp(GFS_POSIX_ACL_DEFAULT, (name), (len)))
9069 +#define GFS_MAX_EA_ACL_BLKS 66 /* 65 for unstuffed data blocks, 1 for the ea
9072 +typedef int (*gfs_ea_copy_fn_t) (void *dest, void *src, unsigned long size);
9074 +int gfs_ea_memcpy(void *dest, void *src, unsigned long size);
9075 +int gfs_ea_copy_to_user(void *dest, void *src, unsigned long size);
9077 +int find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
9078 + struct gfs_ea_location *avail);
9080 +struct gfs_ea_header *prep_ea(struct gfs_ea_header *ea);
9082 +int write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
9083 + struct gfs_inode *ip, struct gfs_ea_header *ea,
9084 + struct gfs_easet_io *req);
9086 +int gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
9087 + struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn);
9088 +int gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
9089 + struct gfs_easet_io *req);
9091 +int gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
9092 +int gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
9094 +int gfs_ea_dealloc(struct gfs_inode *ip);
9096 +int gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
9098 +int replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
9099 + struct gfs_ea_header *ea, struct gfs_easet_io *req);
9101 +int find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
9102 + struct gfs_ea_location *location);
9104 +int read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
9105 + struct gfs_ea_header *ea, uint32_t avail_size,
9106 + gfs_ea_copy_fn_t copy_fn);
9108 +int get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
9109 + gfs_ea_copy_fn_t copy_fn);
9111 +int init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
9112 + struct gfs_easet_io *req);
9114 +int gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip);
9116 +#endif /* __EATTR_DOT_H__ */
9117 diff -urN linux-orig/fs/gfs/file.c linux-patched/fs/gfs/file.c
9118 --- linux-orig/fs/gfs/file.c 1969-12-31 18:00:00.000000000 -0600
9119 +++ linux-patched/fs/gfs/file.c 2004-06-30 13:27:49.339712058 -0500
9121 +/******************************************************************************
9122 +*******************************************************************************
9124 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9125 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9127 +** This copyrighted material is made available to anyone wishing to use,
9128 +** modify, copy, or redistribute it subject to the terms and conditions
9129 +** of the GNU General Public License v.2.
9131 +*******************************************************************************
9132 +******************************************************************************/
9134 +#include <linux/sched.h>
9135 +#include <linux/slab.h>
9136 +#include <linux/smp_lock.h>
9137 +#include <linux/spinlock.h>
9138 +#include <asm/semaphore.h>
9139 +#include <linux/completion.h>
9140 +#include <linux/buffer_head.h>
9141 +#include <asm/uaccess.h>
9151 + * gfs_copy2mem - Trivial copy function for gfs_readi()
9152 + * @bh: The buffer to copy from, or NULL meaning zero the buffer
9153 + * @buf: The buffer to copy/zero
9154 + * @offset: The offset in the buffer to copy from
9155 + * @size: The amount of data to copy/zero
9157 + * Returns: 0 on success, -EXXX on failure
9161 +gfs_copy2mem(struct buffer_head *bh, void **buf, unsigned int offset,
9162 + unsigned int size)
9164 + char **p = (char **)buf;
9167 + memcpy(*p, bh->b_data + offset, size);
9169 + memset(*p, 0, size);
9177 + * gfs_copy2user - Copy data to user space
9179 + * @buf: The destination of the data
9180 + * @offset: The offset into the buffer
9181 + * @size: The amount of data to copy
9183 + * Returns: 0 on success, -EXXX on failure
9187 +gfs_copy2user(struct buffer_head *bh, void **buf,
9188 + unsigned int offset, unsigned int size)
9190 + char **p = (char **)buf;
9194 + error = copy_to_user(*p, bh->b_data + offset, size);
9196 + error = clear_user(*p, size);
9207 + * gfs_readi - Read a file
9208 + * @ip: The GFS Inode
9209 + * @buf: The buffer to place result into
9210 + * @offset: File offset to begin reading from
9211 + * @size: Amount of data to transfer
9212 + * @copy_fn: Function to actually perform the copy
9214 + * The @copy_fn only copies a maximum of a single block at once so
9215 + * we are safe calling it with int arguments. It is done so that
9216 + * we don't needlessly put 64bit arguments on the stack and it
9217 + * also makes the code in the @copy_fn nicer too.
9219 + * Returns: The amount of data actually copied or the error
9223 +gfs_readi(struct gfs_inode *ip, void *buf,
9224 + uint64_t offset, unsigned int size,
9225 + read_copy_fn_t copy_fn)
9227 + struct gfs_sbd *sdp = ip->i_sbd;
9228 + struct buffer_head *bh;
9229 + uint64_t lblock, dblock;
9231 + uint32_t extlen = 0;
9232 + unsigned int amount;
9234 + int journaled = gfs_is_jdata(ip);
9238 + if (offset >= ip->i_di.di_size)
9241 + if ((offset + size) > ip->i_di.di_size)
9242 + size = ip->i_di.di_size - offset;
9249 + o = do_div(lblock, sdp->sd_jbsize);
9251 + lblock = offset >> sdp->sd_sb.sb_bsize_shift;
9252 + o = offset & (sdp->sd_sb.sb_bsize - 1);
9255 + if (gfs_is_stuffed(ip))
9256 + o += sizeof(struct gfs_dinode);
9257 + else if (journaled)
9258 + o += sizeof(struct gfs_meta_header);
9260 + while (copied < size) {
9261 + amount = size - copied;
9262 + if (amount > sdp->sd_sb.sb_bsize - o)
9263 + amount = sdp->sd_sb.sb_bsize - o;
9266 + error = gfs_block_map(ip, lblock, ¬_new,
9267 + &dblock, &extlen);
9273 + gfs_start_ra(ip->i_gl, dblock, extlen);
9276 + error = gfs_get_data_buffer(ip, dblock, not_new, &bh);
9285 + error = copy_fn(bh, &buf, o, amount);
9294 + o = (journaled) ? sizeof(struct gfs_meta_header) : 0;
9300 + return (copied) ? copied : error;
9304 + * gfs_copy_from_mem - Trivial copy function for gfs_writei()
9305 + * @ip: The file to write to
9306 + * @bh: The buffer to copy to or clear
9307 + * @buf: The buffer to copy from
9308 + * @offset: The offset in the buffer to write to
9309 + * @size: The amount of data to write
9310 + * @new: Flag indicating that remaining space in the buffer should be zeroed
9312 + * Returns: 0 on success, -EXXX on failure
9316 +gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9317 + unsigned int offset, unsigned int size, int new)
9319 + char **p = (char **)buf;
9322 + if (bh->b_blocknr == ip->i_num.no_addr) {
9323 + GFS_ASSERT_INODE(!new, ip,);
9324 + gfs_trans_add_bh(ip->i_gl, bh);
9325 + memcpy(bh->b_data + offset, *p, size);
9326 + } else if (gfs_is_jdata(ip)) {
9327 + gfs_trans_add_bh(ip->i_gl, bh);
9328 + memcpy(bh->b_data + offset, *p, size);
9330 + gfs_buffer_clear_ends(bh, offset, size, TRUE);
9332 + memcpy(bh->b_data + offset, *p, size);
9334 + gfs_buffer_clear_ends(bh, offset, size, FALSE);
9335 + error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9345 + * gfs_copy_from_user - Copy bytes from user space for gfs_writei()
9346 + * @ip: The file to write to
9347 + * @bh: The buffer to copy to or clear
9348 + * @buf: The buffer to copy from
9349 + * @offset: The offset in the buffer to write to
9350 + * @size: The amount of data to write
9351 + * @new: Flag indicating that remaining space in the buffer should be zeroed
9353 + * Returns: 0 on success, -EXXX on failure
9357 +gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9358 + unsigned int offset, unsigned int size, int new)
9360 + char **p = (char **)buf;
9363 + if (bh->b_blocknr == ip->i_num.no_addr) {
9364 + GFS_ASSERT_INODE(!new, ip,);
9365 + gfs_trans_add_bh(ip->i_gl, bh);
9366 + if (copy_from_user(bh->b_data + offset, *p, size))
9368 + } else if (gfs_is_jdata(ip)) {
9369 + gfs_trans_add_bh(ip->i_gl, bh);
9370 + if (copy_from_user(bh->b_data + offset, *p, size))
9373 + gfs_buffer_clear_ends(bh, offset, size, TRUE);
9375 + memset(bh->b_data + offset, 0, size);
9378 + if (copy_from_user(bh->b_data + offset, *p, size))
9382 + gfs_buffer_clear(bh);
9383 + gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9386 + gfs_buffer_clear_ends(bh, offset, size, FALSE);
9387 + error = gfs_dwrite(ip->i_sbd, bh, DIO_DIRTY);
9398 + * gfs_writei - Write bytes to a file
9399 + * @ip: The GFS inode
9400 + * @buf: The buffer containing information to be written
9401 + * @offset: The file offset to start writing at
9402 + * @size: The amount of data to write
9403 + * @copy_fn: Function to do the actual copying
9405 + * Returns: The number of bytes correctly written or error code
9409 +gfs_writei(struct gfs_inode *ip, void *buf,
9410 + uint64_t offset, unsigned int size,
9411 + write_copy_fn_t copy_fn)
9413 + struct gfs_sbd *sdp = ip->i_sbd;
9414 + struct buffer_head *dibh, *bh;
9415 + uint64_t lblock, dblock;
9417 + uint32_t extlen = 0;
9418 + unsigned int amount;
9420 + int journaled = gfs_is_jdata(ip);
9421 + const uint64_t start = offset;
9428 + if (gfs_is_stuffed(ip) &&
9429 + ((start + size) > (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)))) {
9430 + error = gfs_unstuff_dinode(ip, gfs_unstuffer_async, NULL);
9437 + o = do_div(lblock, sdp->sd_jbsize);
9439 + lblock = offset >> sdp->sd_sb.sb_bsize_shift;
9440 + o = offset & (sdp->sd_sb.sb_bsize - 1);
9443 + if (gfs_is_stuffed(ip))
9444 + o += sizeof(struct gfs_dinode);
9445 + else if (journaled)
9446 + o += sizeof(struct gfs_meta_header);
9448 + while (copied < size) {
9449 + amount = size - copied;
9450 + if (amount > sdp->sd_sb.sb_bsize - o)
9451 + amount = sdp->sd_sb.sb_bsize - o;
9455 + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
9458 + GFS_ASSERT_INODE(dblock, ip,);
9461 + if (journaled && extlen > 1)
9462 + gfs_start_ra(ip->i_gl, dblock, extlen);
9464 + error = gfs_get_data_buffer(ip, dblock,
9465 + (amount == sdp->sd_sb.sb_bsize) ? TRUE : new,
9470 + error = copy_fn(ip, bh, &buf, o, amount, new);
9480 + o = (journaled) ? sizeof(struct gfs_meta_header) : 0;
9484 + error = gfs_get_inode_buffer(ip, &dibh);
9488 + if (ip->i_di.di_size < start + copied)
9489 + ip->i_di.di_size = start + copied;
9490 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
9492 + gfs_trans_add_bh(ip->i_gl, dibh);
9493 + gfs_dinode_out(&ip->i_di, dibh->b_data);
9503 diff -urN linux-orig/fs/gfs/file.h linux-patched/fs/gfs/file.h
9504 --- linux-orig/fs/gfs/file.h 1969-12-31 18:00:00.000000000 -0600
9505 +++ linux-patched/fs/gfs/file.h 2004-06-30 13:27:49.339712058 -0500
9507 +/******************************************************************************
9508 +*******************************************************************************
9510 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9511 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9513 +** This copyrighted material is made available to anyone wishing to use,
9514 +** modify, copy, or redistribute it subject to the terms and conditions
9515 +** of the GNU General Public License v.2.
9517 +*******************************************************************************
9518 +******************************************************************************/
9520 +#ifndef __FILE_DOT_H__
9521 +#define __FILE_DOT_H__
9523 +typedef int (*read_copy_fn_t) (struct buffer_head * bh, void **buf,
9524 + unsigned int offset, unsigned int size);
9525 +typedef int (*write_copy_fn_t) (struct gfs_inode * ip, struct buffer_head * bh,
9526 + void **buf, unsigned int offset,
9527 + unsigned int size, int new);
9529 +int gfs_copy2mem(struct buffer_head *bh, void **buf,
9530 + unsigned int offset, unsigned int size);
9531 +int gfs_copy2user(struct buffer_head *bh, void **buf,
9532 + unsigned int offset, unsigned int size);
9533 +int gfs_readi(struct gfs_inode *ip, void *buf, uint64_t offset,
9534 + unsigned int size, read_copy_fn_t copy_fn);
9536 +int gfs_copy_from_mem(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9537 + unsigned int offset, unsigned int size, int new);
9538 +int gfs_copy_from_user(struct gfs_inode *ip, struct buffer_head *bh, void **buf,
9539 + unsigned int offset, unsigned int size, int new);
9540 +int gfs_writei(struct gfs_inode *ip, void *buf, uint64_t offset,
9541 + unsigned int size, write_copy_fn_t copy_fn);
9543 +static __inline__ int
9544 +gfs_internal_read(struct gfs_inode *ip, char *buf, uint64_t offset,
9545 + unsigned int size)
9547 + return gfs_readi(ip, buf, offset, size, gfs_copy2mem);
9550 +static __inline__ int
9551 +gfs_internal_write(struct gfs_inode *ip, char *buf, uint64_t offset,
9552 + unsigned int size)
9554 + return gfs_writei(ip, buf, offset, size, gfs_copy_from_mem);
9557 +#endif /* __FILE_DOT_H__ */
9558 diff -urN linux-orig/fs/gfs/fixed_div64.h linux-patched/fs/gfs/fixed_div64.h
9559 --- linux-orig/fs/gfs/fixed_div64.h 1969-12-31 18:00:00.000000000 -0600
9560 +++ linux-patched/fs/gfs/fixed_div64.h 2004-06-30 13:27:49.339712058 -0500
9563 + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
9565 + * This program is free software; you can redistribute it and/or modify it
9566 + * under the terms of version 2 of the GNU General Public License as
9567 + * published by the Free Software Foundation.
9569 + * This program is distributed in the hope that it would be useful, but
9570 + * WITHOUT ANY WARRANTY; without even the implied warranty of
9571 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
9573 + * Further, this software is distributed without any warranty that it is
9574 + * free of the rightful claim of any third person regarding infringement
9575 + * or the like. Any license provided herein, whether implied or
9576 + * otherwise, applies only to this software file. Patent licenses, if
9577 + * any, provided herein do not apply to combinations of this program with
9578 + * other software, or any other product whatsoever.
9580 + * You should have received a copy of the GNU General Public License along
9581 + * with this program; if not, write the Free Software Foundation, Inc., 59
9582 + * Temple Place - Suite 330, Boston MA 02111-1307, USA.
9584 + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
9585 + * Mountain View, CA 94043, or:
9587 + * http://www.sgi.com
9589 + * For further information regarding this notice, see:
9591 + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
9593 + * Additional munging:
9594 + * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9597 +#ifndef __FIXED_DIV64_DOT_H__
9598 +#define __FIXED_DIV64_DOT_H__
9600 +#include <asm/div64.h>
9602 +#if defined __i386__
9603 +/* For ia32 we need to pull some tricks to get past various versions
9604 + * of the compiler which do not like us using do_div in the middle
9605 + * of large functions.
9607 +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n)
9613 + mod = *(__u32 *)a % b;
9614 + *(__u32 *)a = *(__u32 *)a / b;
9618 + unsigned long __upper, __low, __high, __mod;
9619 + __u64 c = *(__u64 *)a;
9620 + __upper = __high = c >> 32;
9623 + __upper = __high % (b);
9624 + __high = __high / (b);
9626 + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
9627 + asm("":"=A" (c):"a" (__low),"d" (__high));
9637 +/* Side effect free 64 bit mod operation */
9638 +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n)
9642 + return *(__u32 *)a % b;
9645 + unsigned long __upper, __low, __high, __mod;
9646 + __u64 c = *(__u64 *)a;
9647 + __upper = __high = c >> 32;
9650 + __upper = __high % (b);
9651 + __high = __high / (b);
9653 + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
9654 + asm("":"=A" (c):"a" (__low),"d" (__high));
9663 +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n)
9669 + mod = *(__u32 *)a % b;
9670 + *(__u32 *)a = *(__u32 *)a / b;
9673 + mod = do_div(*(__u64 *)a, b);
9681 +/* Side effect free 64 bit mod operation */
9682 +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n)
9686 + return *(__u32 *)a % b;
9689 + __u64 c = *(__u64 *)a;
9690 + return do_div(c, b);
9700 +#define do_div(a, b) fixed_div64_do_div(&(a), (b), sizeof(a))
9701 +#define do_mod(a, b) fixed_div64_do_mod(&(a), (b), sizeof(a))
9703 +#endif /* __FIXED_DIV64_DOT_H__ */
9704 diff -urN linux-orig/fs/gfs/flock.c linux-patched/fs/gfs/flock.c
9705 --- linux-orig/fs/gfs/flock.c 1969-12-31 18:00:00.000000000 -0600
9706 +++ linux-patched/fs/gfs/flock.c 2004-06-30 13:27:49.339712058 -0500
9708 +/******************************************************************************
9709 +*******************************************************************************
9711 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9712 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9714 +** This copyrighted material is made available to anyone wishing to use,
9715 +** modify, copy, or redistribute it subject to the terms and conditions
9716 +** of the GNU General Public License v.2.
9718 +*******************************************************************************
9719 +******************************************************************************/
9721 +#include <linux/sched.h>
9722 +#include <linux/slab.h>
9723 +#include <linux/smp_lock.h>
9724 +#include <linux/spinlock.h>
9725 +#include <asm/semaphore.h>
9726 +#include <linux/completion.h>
9727 +#include <linux/buffer_head.h>
9735 + * gfs_flock - Acquire a flock on a file
9737 + * @ex: exclusive lock
9738 + * @wait: wait for lock
9740 + * Returns: 0 on success, -EXXX on failure
9744 +gfs_flock(struct gfs_file *fp, int ex, int wait)
9746 + struct gfs_holder *fl_gh = &fp->f_fl_gh;
9747 + struct gfs_inode *ip = fp->f_inode;
9748 + struct gfs_sbd *sdp = ip->i_sbd;
9749 + struct gfs_glock *gl;
9752 + down(&fp->f_fl_lock);
9754 + if (fl_gh->gh_gl) {
9755 + gfs_glock_dq_uninit(fl_gh);
9760 + error = gfs_glock_get(sdp,
9761 + ip->i_num.no_formal_ino, &gfs_flock_glops,
9766 + gfs_holder_init(gl, (ex) ? LM_ST_EXCLUSIVE : LM_ST_SHARED,
9767 + ((wait) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE,
9769 + fl_gh->gh_owner = NULL;
9771 + gfs_glock_put(gl);
9773 + error = gfs_glock_nq(fl_gh);
9775 + gfs_holder_uninit(fl_gh);
9776 + if (error == GLR_TRYFAILED) {
9777 + GFS_ASSERT_INODE(!wait, ip,);
9783 + up(&fp->f_fl_lock);
9789 + * gfs_funlock - Release a flock on a file
9795 +gfs_funlock(struct gfs_file *fp)
9797 + struct gfs_holder *fl_gh = &fp->f_fl_gh;
9799 + down(&fp->f_fl_lock);
9801 + gfs_glock_dq_uninit(fl_gh);
9802 + up(&fp->f_fl_lock);
9806 diff -urN linux-orig/fs/gfs/flock.h linux-patched/fs/gfs/flock.h
9807 --- linux-orig/fs/gfs/flock.h 1969-12-31 18:00:00.000000000 -0600
9808 +++ linux-patched/fs/gfs/flock.h 2004-06-30 13:27:49.339712058 -0500
9810 +/******************************************************************************
9811 +*******************************************************************************
9813 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9814 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9816 +** This copyrighted material is made available to anyone wishing to use,
9817 +** modify, copy, or redistribute it subject to the terms and conditions
9818 +** of the GNU General Public License v.2.
9820 +*******************************************************************************
9821 +******************************************************************************/
9823 +#ifndef __FLOCK_DOT_H__
9824 +#define __FLOCK_DOT_H__
9826 +int gfs_flock(struct gfs_file *fp, int ex, int wait);
9827 +int gfs_funlock(struct gfs_file *fp);
9829 +#endif /* __FLOCK_DOT_H__ */
9830 diff -urN linux-orig/fs/gfs/format.h linux-patched/fs/gfs/format.h
9831 --- linux-orig/fs/gfs/format.h 1969-12-31 18:00:00.000000000 -0600
9832 +++ linux-patched/fs/gfs/format.h 2004-06-30 13:27:49.340711826 -0500
9834 +/******************************************************************************
9835 +*******************************************************************************
9837 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9838 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9840 +** This copyrighted material is made available to anyone wishing to use,
9841 +** modify, copy, or redistribute it subject to the terms and conditions
9842 +** of the GNU General Public License v.2.
9844 +*******************************************************************************
9845 +******************************************************************************/
9847 +#ifndef __FORMAT_DOT_H__
9848 +#define __FORMAT_DOT_H__
9850 +static const uint32_t gfs_old_fs_formats[] = {
9858 +static const uint32_t gfs_old_multihost_formats[] = {
9863 +#endif /* __FORMAT_DOT_H__ */
9864 diff -urN linux-orig/fs/gfs/gfs.h linux-patched/fs/gfs/gfs.h
9865 --- linux-orig/fs/gfs/gfs.h 1969-12-31 18:00:00.000000000 -0600
9866 +++ linux-patched/fs/gfs/gfs.h 2004-06-30 13:27:49.340711826 -0500
9868 +/******************************************************************************
9869 +*******************************************************************************
9871 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9872 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9874 +** This copyrighted material is made available to anyone wishing to use,
9875 +** modify, copy, or redistribute it subject to the terms and conditions
9876 +** of the GNU General Public License v.2.
9878 +*******************************************************************************
9879 +******************************************************************************/
9881 +#ifndef __GFS_DOT_H__
9882 +#define __GFS_DOT_H__
9884 +#define GFS_RELEASE_NAME "<CVS>"
9886 +#include <linux/lm_interface.h>
9887 +#include <linux/gfs_ondisk.h>
9888 +#include <linux/gfs_ioctl.h>
9890 +#include "fixed_div64.h"
9892 +#include "incore.h"
9903 +#define NO_CREATE (0)
9906 +#if (BITS_PER_LONG == 64)
9907 +#define PRIu64 "lu"
9908 +#define PRId64 "ld"
9909 +#define PRIo64 "lo"
9910 +#define PRIx64 "lx"
9911 +#define PRIX64 "lX"
9912 +#define SCNu64 "lu"
9913 +#define SCNd64 "ld"
9914 +#define SCNo64 "lo"
9915 +#define SCNx64 "lx"
9916 +#define SCNX64 "lX"
9918 +#define PRIu64 "Lu"
9919 +#define PRId64 "Ld"
9920 +#define PRIo64 "Lo"
9921 +#define PRIx64 "Lx"
9922 +#define PRIX64 "LX"
9923 +#define SCNu64 "Lu"
9924 +#define SCNd64 "Ld"
9925 +#define SCNo64 "Lo"
9926 +#define SCNx64 "Lx"
9927 +#define SCNX64 "LX"
9930 +/* Divide x by y. Round up if there is a remainder. */
9931 +#define DIV_RU(x, y) (((x) + (y) - 1) / (y))
9933 +#define GFS_FAST_NAME_SIZE (8)
9935 +#define vfs2sdp(sb) ((struct gfs_sbd *)(sb)->s_fs_info)
9936 +#define vn2ip(inode) ((struct gfs_inode *)(inode)->u.generic_ip)
9937 +#define vf2fp(file) ((struct gfs_file *)(file)->private_data)
9938 +#define bh2bd(bh) ((struct gfs_bufdata *)(bh)->b_private)
9939 +#define current_transaction ((struct gfs_trans *)(current->journal_info))
9941 +#define gl2ip(gl) ((struct gfs_inode *)(gl)->gl_object)
9942 +#define gl2rgd(gl) ((struct gfs_rgrpd *)(gl)->gl_object)
9943 +#define gl2gl(gl) ((struct gfs_glock *)(gl)->gl_object)
9945 +#define gfs_meta_check(sdp, bh) \
9948 + uint32_t meta_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \
9949 + meta_check_magic = gfs32_to_cpu(meta_check_magic); \
9950 + GFS_ASSERT_SBD(meta_check_magic == GFS_MAGIC, (sdp), \
9951 + struct gfs_meta_header meta_check_mh; \
9952 + printk("Bad metadata at %"PRIu64"\n", (uint64_t)(bh)->b_blocknr); \
9953 + gfs_meta_header_in(&meta_check_mh, (bh)->b_data); \
9954 + gfs_meta_header_print(&meta_check_mh);); \
9958 +#define gfs_metatype_check(sdp, bh, type) \
9961 + uint32_t metatype_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \
9962 + uint32_t metatype_check_type = ((struct gfs_meta_header *)(bh)->b_data)->mh_type; \
9963 + metatype_check_magic = gfs32_to_cpu(metatype_check_magic); \
9964 + metatype_check_type = gfs32_to_cpu(metatype_check_type); \
9965 + GFS_ASSERT_SBD(metatype_check_magic == GFS_MAGIC && \
9966 + metatype_check_type == (type), (sdp), \
9967 + struct gfs_meta_header metatype_check_mh; \
9968 + printk("Bad metadata at %"PRIu64", should be %u\n", (uint64_t)(bh)->b_blocknr, (type)); \
9969 + gfs_meta_header_in(&metatype_check_mh, (bh)->b_data); \
9970 + gfs_meta_header_print(&metatype_check_mh);); \
9974 +#define gfs_metatype_set(sdp, bh, type, format) \
9977 + gfs_meta_check((sdp), (bh)); \
9978 + ((struct gfs_meta_header *)(bh)->b_data)->mh_type = cpu_to_gfs32((type)); \
9979 + ((struct gfs_meta_header *)(bh)->b_data)->mh_format = cpu_to_gfs32((format)); \
9983 +#define gfs_sprintf(fmt, args...) \
9986 + if (*count + 256 > size) { \
9987 + error = -ENOMEM; \
9990 + *count += snprintf(buf + *count, 256, fmt, ##args); \
9993 + printk(fmt, ##args); \
9997 +#endif /* __GFS_DOT_H__ */
9998 diff -urN linux-orig/fs/gfs/glock.c linux-patched/fs/gfs/glock.c
9999 --- linux-orig/fs/gfs/glock.c 1969-12-31 18:00:00.000000000 -0600
10000 +++ linux-patched/fs/gfs/glock.c 2004-06-30 13:27:49.341711594 -0500
10002 +/******************************************************************************
10003 +*******************************************************************************
10005 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
10006 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
10008 +** This copyrighted material is made available to anyone wishing to use,
10009 +** modify, copy, or redistribute it subject to the terms and conditions
10010 +** of the GNU General Public License v.2.
10012 +*******************************************************************************
10013 +******************************************************************************/
10015 +#include <linux/sched.h>
10016 +#include <linux/slab.h>
10017 +#include <linux/smp_lock.h>
10018 +#include <linux/spinlock.h>
10019 +#include <asm/semaphore.h>
10020 +#include <linux/completion.h>
10021 +#include <linux/buffer_head.h>
10022 +#include <asm/uaccess.h>
10026 +#include "glock.h"
10027 +#include "glops.h"
10028 +#include "inode.h"
10030 +#include "quota.h"
10031 +#include "recovery.h"
10033 +/* Must be kept in sync with the beginning of struct gfs_glock */
10034 +struct glock_plug {
10035 + struct list_head gl_list;
10036 + unsigned long gl_flags;
10039 +typedef void (*glock_examiner) (struct gfs_glock * gl);
10042 + * relaxed_state_ok - is a requested lock compatible with the current lock mode?
10043 + * @actual: the current state of the lock
10044 + * @requested: the lock state that was requested by the caller
10045 + * @flags: the modifier flags passed in by the caller
10047 + * Returns: TRUE if the locks are compatible, FALSE otherwise
10050 +static __inline__ int
10051 +relaxed_state_ok(unsigned int actual, unsigned requested, int flags)
10053 + if (actual == requested)
10056 + if (flags & GL_EXACT)
10059 + if (actual == LM_ST_EXCLUSIVE && requested == LM_ST_SHARED)
10062 + if (actual != LM_ST_UNLOCKED && (flags & LM_FLAG_ANY))
10069 + * gl_hash() - Turn glock number into hash bucket number
10070 + * @lock: The glock number
10072 + * Returns: The number of the corresponding hash bucket
10075 +static unsigned int
10076 +gl_hash(struct lm_lockname *name)
10080 + h = gfs_hash(&name->ln_number, sizeof(uint64_t));
10081 + h = gfs_hash_more(&name->ln_type, sizeof(unsigned int), h);
10082 + h &= GFS_GL_HASH_MASK;
10088 + * glock_hold() - increment reference count on glock
10089 + * @gl: The glock to put
10093 +static __inline__ void
10094 +glock_hold(struct gfs_glock *gl)
10096 + atomic_inc(&gl->gl_count);
10100 + * glock_put() - Decrement reference count on glock
10101 + * @gl: The glock to put
10105 +static __inline__ void
10106 +glock_put(struct gfs_glock *gl)
10108 + if (atomic_read(&gl->gl_count) == 1)
10109 + gfs_glock_schedule_for_reclaim(gl);
10110 + atomic_dec(&gl->gl_count);
10111 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) >= 0, gl,);
10115 + * queue_empty - check to see if a glock's queue is empty
10117 + * @head: the head of the queue to check
10119 + * Returns: TRUE if the queue is empty
10122 +static __inline__ int
10123 +queue_empty(struct gfs_glock *gl, struct list_head *head)
10126 + spin_lock(&gl->gl_spin);
10127 + empty = list_empty(head);
10128 + spin_unlock(&gl->gl_spin);
10133 + * search_bucket() - Find struct gfs_glock by lock number
10134 + * @bucket: the bucket to search
10135 + * @name: The lock name
10137 + * Returns: NULL, or the struct gfs_glock with the requested number
10140 +static struct gfs_glock *
10141 +search_bucket(struct gfs_gl_hash_bucket *bucket, struct lm_lockname *name)
10143 + struct list_head *tmp, *head;
10144 + struct gfs_glock *gl;
10146 + for (head = &bucket->hb_list, tmp = head->next;
10148 + tmp = tmp->next) {
10149 + gl = list_entry(tmp, struct gfs_glock, gl_list);
10151 + if (test_bit(GLF_PLUG, &gl->gl_flags))
10153 + if (!lm_name_equal(&gl->gl_name, name))
10165 + * gfs_glock_find() - Find glock by lock number
10166 + * @sdp: The GFS superblock
10167 + * @name: The lock name
10169 + * Figure out what bucket the lock is in, acquire the read lock on
10170 + * it and call search_bucket().
10172 + * Returns: NULL, or the struct gfs_glock with the requested number
10175 +struct gfs_glock *
10176 +gfs_glock_find(struct gfs_sbd *sdp, struct lm_lockname *name)
10178 + struct gfs_gl_hash_bucket *bucket = &sdp->sd_gl_hash[gl_hash(name)];
10179 + struct gfs_glock *gl;
10181 + read_lock(&bucket->hb_lock);
10182 + gl = search_bucket(bucket, name);
10183 + read_unlock(&bucket->hb_lock);
10189 + * glock_free() - Perform a few checks and then release struct gfs_glock
10190 + * @gl: The glock to release
10195 +glock_free(struct gfs_glock *gl)
10197 + struct gfs_sbd *sdp = gl->gl_sbd;
10198 + struct inode *aspace = gl->gl_aspace;
10200 + GFS_ASSERT_GLOCK(list_empty(&gl->gl_list), gl,);
10201 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) == 1, gl,);
10202 + GFS_ASSERT_GLOCK(list_empty(&gl->gl_holders), gl,);
10203 + GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters1), gl,);
10204 + GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters2), gl,);
10205 + GFS_ASSERT_GLOCK(gl->gl_state == LM_ST_UNLOCKED, gl,);
10206 + GFS_ASSERT_GLOCK(!gl->gl_object, gl,);
10207 + GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,);
10208 + GFS_ASSERT_GLOCK(list_empty(&gl->gl_reclaim), gl,);
10210 + sdp->sd_lockstruct.ls_ops->lm_put_lock(gl->gl_lock);
10213 + gfs_aspace_put(aspace);
10215 + kmem_cache_free(gfs_glock_cachep, gl);
10217 + atomic_dec(&sdp->sd_glock_count);
10221 + * gfs_glock_get() - Get a glock, or create one if one doesn't exist
10222 + * @sdp: The GFS superblock
10223 + * @number: the lock number
10224 + * @glops: The glock_operations to use
10225 + * @create: If FALSE, don't create the glock if it doesn't exist
10226 + * @glp: the glock is returned here
10228 + * Returns: 0 on success, -EXXX on failure
10232 +gfs_glock_get(struct gfs_sbd *sdp,
10233 + uint64_t number, struct gfs_glock_operations *glops,
10234 + int create, struct gfs_glock **glp)
10236 + struct lm_lockname name;
10237 + struct gfs_glock *gl, *tmp;
10238 + struct gfs_gl_hash_bucket *bucket;
10241 + name.ln_number = number;
10242 + name.ln_type = glops->go_type;
10243 + bucket = &sdp->sd_gl_hash[gl_hash(&name)];
10245 + read_lock(&bucket->hb_lock);
10246 + gl = search_bucket(bucket, &name);
10247 + read_unlock(&bucket->hb_lock);
10249 + if (gl || !create) {
10254 + gl = kmem_cache_alloc(gfs_glock_cachep, GFP_KERNEL);
10258 + memset(gl, 0, sizeof(struct gfs_glock));
10260 + INIT_LIST_HEAD(&gl->gl_list);
10261 + gl->gl_name = name;
10262 + atomic_set(&gl->gl_count, 1);
10264 + spin_lock_init(&gl->gl_spin);
10266 + gl->gl_state = LM_ST_UNLOCKED;
10267 + INIT_LIST_HEAD(&gl->gl_holders);
10268 + INIT_LIST_HEAD(&gl->gl_waiters1);
10269 + INIT_LIST_HEAD(&gl->gl_waiters2);
10271 + gl->gl_ops = glops;
10273 + INIT_LE(&gl->gl_new_le, &gfs_glock_lops);
10274 + INIT_LE(&gl->gl_incore_le, &gfs_glock_lops);
10276 + gl->gl_bucket = bucket;
10277 + INIT_LIST_HEAD(&gl->gl_reclaim);
10279 + gl->gl_sbd = sdp;
10281 + INIT_LIST_HEAD(&gl->gl_dirty_buffers);
10282 + INIT_LIST_HEAD(&gl->gl_ail_bufs);
10284 + if (glops == &gfs_inode_glops ||
10285 + glops == &gfs_rgrp_glops ||
10286 + glops == &gfs_meta_glops) {
10287 + gl->gl_aspace = gfs_aspace_get(sdp);
10288 + if (!gl->gl_aspace) {
10294 + error = sdp->sd_lockstruct.ls_ops->lm_get_lock(sdp->sd_lockstruct.ls_lockspace,
10298 + goto fail_aspace;
10300 + atomic_inc(&sdp->sd_glock_count);
10302 + write_lock(&bucket->hb_lock);
10303 + tmp = search_bucket(bucket, &name);
10305 + write_unlock(&bucket->hb_lock);
10309 + list_add_tail(&gl->gl_list, &bucket->hb_list);
10310 + write_unlock(&bucket->hb_lock);
10318 + if (gl->gl_aspace)
10319 + gfs_aspace_put(gl->gl_aspace);
10322 + kmem_cache_free(gfs_glock_cachep, gl);
10328 + * gfs_glock_hold() - As glock_hold(), but suitable for exporting
10329 + * @gl: The glock to hold
10334 +gfs_glock_hold(struct gfs_glock *gl)
10336 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
10341 + * gfs_glock_put() - As glock_put(), but suitable for exporting
10342 + * @gl: The glock to put
10347 +gfs_glock_put(struct gfs_glock *gl)
10353 + * gfs_holder_init - initialize a struct gfs_holder in the default way
10355 + * @state: the state we're requesting
10356 + * @flags: the modifier flags
10357 + * @gh: the holder structure
10362 +gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags,
10363 + struct gfs_holder *gh)
10365 + memset(gh, 0, sizeof(struct gfs_holder));
10367 + INIT_LIST_HEAD(&gh->gh_list);
10369 + gh->gh_owner = current;
10370 + gh->gh_state = state;
10371 + gh->gh_flags = flags;
10373 + if (gh->gh_state == LM_ST_EXCLUSIVE)
10374 + gh->gh_flags |= GL_LOCAL_EXCL;
10376 + init_completion(&gh->gh_wait);
10382 + * gfs_holder_reinit - reinitialize a struct gfs_holder so we can requeue it
10383 + * @state: the state we're requesting
10384 + * @flags: the modifier flags
10385 + * @gh: the holder structure
10387 + * Don't mess with the glock.
10392 +gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh)
10396 + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gh->gh_gl,);
10398 + gh->gh_state = state;
10399 + gh->gh_flags = flags;
10401 + if (gh->gh_state == LM_ST_EXCLUSIVE)
10402 + gh->gh_flags |= GL_LOCAL_EXCL;
10404 + alloced = test_bit(HIF_ALLOCED, &gh->gh_iflags);
10405 + memset(&gh->gh_iflags, 0, sizeof(unsigned long));
10407 + set_bit(HIF_ALLOCED, &gh->gh_iflags);
10411 + * gfs_holder_uninit - uninitialize a holder structure (drop reference on glock)
10412 + * @gh: the holder structure
10417 +gfs_holder_uninit(struct gfs_holder *gh)
10419 + struct gfs_glock *gl = gh->gh_gl;
10421 + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,);
10422 + gh->gh_gl = NULL;
10428 + * gfs_holder_get - get a struct gfs_holder structure
10430 + * @state: the state we're requesting
10431 + * @flags: the modifier flags
10433 + * Figure out how big an impact this function has. Either:
10434 + * 1) Replace it with a cache of structures hanging off the struct gfs_sbd
10435 + * 2) Get rid of it and call gmalloc() directly
10436 + * 3) Leave it like it is
10438 + * Returns: the holder structure
10441 +struct gfs_holder *
10442 +gfs_holder_get(struct gfs_glock *gl, unsigned int state, int flags)
10444 + struct gfs_holder *gh;
10446 + gh = gmalloc(sizeof(struct gfs_holder));
10447 + gfs_holder_init(gl, state, flags, gh);
10448 + set_bit(HIF_ALLOCED, &gh->gh_iflags);
10454 + * gfs_holder_put - get rid of a struct gfs_holder structure
10455 + * @gh: the holder structure
10460 +gfs_holder_put(struct gfs_holder *gh)
10462 + GFS_ASSERT_GLOCK(test_bit(HIF_ALLOCED, &gh->gh_iflags), gh->gh_gl,);
10463 + gfs_holder_uninit(gh);
10468 + * handle_recurse - put other holder structures (marked recursive) into the holders list
10469 + * @gh: the holder structure
10474 +handle_recurse(struct gfs_holder *gh)
10476 + struct gfs_glock *gl = gh->gh_gl;
10477 + struct list_head *tmp, *head, *next;
10478 + struct gfs_holder *tmp_gh;
10479 + int found = FALSE;
10481 + GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
10483 + for (head = &gl->gl_waiters2, tmp = head->next, next = tmp->next;
10485 + tmp = next, next = tmp->next) {
10486 + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
10487 + if (tmp_gh->gh_owner != gh->gh_owner)
10490 + GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags),
10493 + list_move_tail(&tmp_gh->gh_list, &gl->gl_holders);
10494 + tmp_gh->gh_error = 0;
10495 + set_bit(HIF_HOLDER, &tmp_gh->gh_iflags);
10497 + complete(&tmp_gh->gh_wait);
10502 + GFS_ASSERT_GLOCK(found, gl,);
10506 + * do_unrecurse - a recursive holder was just dropped of the waiters2 list
10507 + * @gh: the holder
10509 + * If there is only one other recursive holder, clear is HIF_RECURSE bit.
10510 + * If there is more than one, leave them alone.
10515 +do_unrecurse(struct gfs_holder *gh)
10517 + struct gfs_glock *gl = gh->gh_gl;
10518 + struct list_head *tmp, *head;
10519 + struct gfs_holder *tmp_gh, *last_gh = NULL;
10520 + int found = FALSE;
10522 + GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
10524 + for (head = &gl->gl_waiters2, tmp = head->next;
10526 + tmp = tmp->next) {
10527 + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
10528 + if (tmp_gh->gh_owner != gh->gh_owner)
10531 + GFS_ASSERT_GLOCK(test_bit(HIF_RECURSE, &tmp_gh->gh_iflags),
10538 + last_gh = tmp_gh;
10541 + GFS_ASSERT_GLOCK(found, gl,);
10542 + clear_bit(HIF_RECURSE, &last_gh->gh_iflags);
10546 + * rq_mutex - process a mutex request in the queue
10547 + * @gh: the glock holder
10549 + * Returns: TRUE if the queue is blocked,
10553 +rq_mutex(struct gfs_holder *gh)
10555 + struct gfs_glock *gl = gh->gh_gl;
10557 + list_del_init(&gh->gh_list);
10558 + /* gh->gh_error never examined. */
10559 + set_bit(GLF_LOCK, &gl->gl_flags);
10560 + complete(&gh->gh_wait);
10566 + * rq_promote - process a promote request in the queue
10567 + * @gh: the glock holder
10568 + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
10570 + * Returns: TRUE if the queue is blocked,
10574 +rq_promote(struct gfs_holder *gh, int promote_ok)
10576 + struct gfs_glock *gl = gh->gh_gl;
10577 + struct gfs_sbd *sdp = gl->gl_sbd;
10578 + struct gfs_glock_operations *glops = gl->gl_ops;
10581 + if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
10582 + if (list_empty(&gl->gl_holders)) {
10583 + if (promote_ok || GFS_ASYNC_LM(sdp)) {
10584 + gl->gl_req_gh = gh;
10585 + set_bit(GLF_LOCK, &gl->gl_flags);
10586 + spin_unlock(&gl->gl_spin);
10588 + if (atomic_read(&sdp->sd_reclaim_count) >
10589 + sdp->sd_tune.gt_reclaim_limit &&
10590 + !(gh->gh_flags & LM_FLAG_PRIORITY)) {
10591 + gfs_reclaim_glock(sdp);
10592 + gfs_reclaim_glock(sdp);
10595 + glops->go_xmote_th(gl, gh->gh_state,
10598 + spin_lock(&gl->gl_spin);
10600 + if (!test_and_set_bit(HIF_WAKEUP, &gh->gh_iflags))
10601 + complete(&gh->gh_wait);
10606 + if (list_empty(&gl->gl_holders)) {
10607 + set_bit(HIF_FIRST, &gh->gh_iflags);
10608 + set_bit(GLF_LOCK, &gl->gl_flags);
10611 + struct gfs_holder *next_gh;
10612 + if (gh->gh_flags & GL_LOCAL_EXCL)
10614 + next_gh = list_entry(gl->gl_holders.next, struct gfs_holder, gh_list);
10615 + if (next_gh->gh_flags & GL_LOCAL_EXCL)
10617 + recurse = test_bit(HIF_RECURSE, &gh->gh_iflags);
10620 + list_move_tail(&gh->gh_list, &gl->gl_holders);
10621 + gh->gh_error = 0;
10622 + set_bit(HIF_HOLDER, &gh->gh_iflags);
10625 + handle_recurse(gh);
10627 + complete(&gh->gh_wait);
10633 + * rq_demote - process a demote request in the queue
10634 + * @gh: the glock holder
10636 + * Returns: TRUE if the queue is blocked,
10640 +rq_demote(struct gfs_holder *gh)
10642 + struct gfs_glock *gl = gh->gh_gl;
10643 + struct gfs_glock_operations *glops = gl->gl_ops;
10645 + if (!list_empty(&gl->gl_holders))
10648 + if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) {
10649 + list_del_init(&gh->gh_list);
10650 + gh->gh_error = 0;
10651 + spin_unlock(&gl->gl_spin);
10652 + if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
10653 + gfs_holder_put(gh);
10655 + complete(&gh->gh_wait);
10656 + spin_lock(&gl->gl_spin);
10658 + gl->gl_req_gh = gh;
10659 + set_bit(GLF_LOCK, &gl->gl_flags);
10660 + spin_unlock(&gl->gl_spin);
10662 + if (gh->gh_state == LM_ST_UNLOCKED ||
10663 + gl->gl_state != LM_ST_EXCLUSIVE)
10664 + glops->go_drop_th(gl);
10666 + glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
10668 + spin_lock(&gl->gl_spin);
10675 + * run_queue - process holder structures on a glock
10677 + * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
10682 +run_queue(struct gfs_glock *gl, int promote_ok)
10684 + struct gfs_holder *gh;
10688 + if (test_bit(GLF_LOCK, &gl->gl_flags))
10691 + if (!list_empty(&gl->gl_waiters1)) {
10692 + gh = list_entry(gl->gl_waiters1.next,
10693 + struct gfs_holder, gh_list);
10695 + if (test_bit(HIF_MUTEX, &gh->gh_iflags))
10696 + blocked = rq_mutex(gh);
10698 + GFS_ASSERT_GLOCK(FALSE, gl,);
10700 + } else if (!list_empty(&gl->gl_waiters2)) {
10701 + gh = list_entry(gl->gl_waiters2.next,
10702 + struct gfs_holder, gh_list);
10704 + if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
10705 + blocked = rq_promote(gh, promote_ok);
10706 + else if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
10707 + blocked = rq_demote(gh);
10709 + GFS_ASSERT_GLOCK(FALSE, gl,);
10720 + * lock_on_glock - acquire a local lock on a glock
10726 +lock_on_glock(struct gfs_glock *gl)
10728 + struct gfs_holder gh;
10730 + gfs_holder_init(gl, 0, 0, &gh);
10731 + set_bit(HIF_MUTEX, &gh.gh_iflags);
10733 + spin_lock(&gl->gl_spin);
10734 + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
10735 + list_add_tail(&gh.gh_list, &gl->gl_waiters1);
10737 + complete(&gh.gh_wait);
10738 + spin_unlock(&gl->gl_spin);
10740 + wait_for_completion(&gh.gh_wait);
10741 + gfs_holder_uninit(&gh);
10745 + * trylock_on_glock - try to acquire a local lock on a glock
10748 + * Returns: TRUE if the glock is acquired
10752 +trylock_on_glock(struct gfs_glock *gl)
10754 + int acquired = TRUE;
10756 + spin_lock(&gl->gl_spin);
10757 + if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
10758 + acquired = FALSE;
10759 + spin_unlock(&gl->gl_spin);
10765 + * unlock_on_glock - release a local lock on a glock
10771 +unlock_on_glock(struct gfs_glock *gl)
10773 + spin_lock(&gl->gl_spin);
10774 + clear_bit(GLF_LOCK, &gl->gl_flags);
10775 + run_queue(gl, FALSE);
10776 + spin_unlock(&gl->gl_spin);
10780 + * handle_callback - add a demote request to a lock's queue
10782 + * @state: the state the callback is us to change to
10787 +handle_callback(struct gfs_glock *gl, unsigned int state)
10789 + struct list_head *tmp, *head;
10790 + struct gfs_holder *gh, *new_gh = NULL;
10792 + GFS_ASSERT_GLOCK(state != LM_ST_EXCLUSIVE, gl,);
10795 + spin_lock(&gl->gl_spin);
10797 + for (head = &gl->gl_waiters2, tmp = head->next;
10799 + tmp = tmp->next) {
10800 + gh = list_entry(tmp, struct gfs_holder, gh_list);
10801 + if (test_bit(HIF_DEMOTE, &gh->gh_iflags) &&
10802 + gl->gl_req_gh != gh) {
10803 + if (gh->gh_state != state)
10804 + gh->gh_state = LM_ST_UNLOCKED;
10810 + list_add(&new_gh->gh_list, &gl->gl_waiters2);
10813 + spin_unlock(&gl->gl_spin);
10815 + new_gh = gfs_holder_get(gl, state, LM_FLAG_TRY);
10816 + set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
10817 + set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
10818 + new_gh->gh_owner = NULL;
10824 + spin_unlock(&gl->gl_spin);
10827 + gfs_holder_put(new_gh);
10831 + * state_change - record that the glock is now in a different state
10833 + * @new_state the new state
10838 +state_change(struct gfs_glock *gl, unsigned int new_state)
10840 + struct gfs_sbd *sdp = gl->gl_sbd;
10841 + int held1, held2;
10843 + held1 = (gl->gl_state != LM_ST_UNLOCKED);
10844 + held2 = (new_state != LM_ST_UNLOCKED);
10846 + if (held1 != held2) {
10848 + atomic_inc(&sdp->sd_glock_held_count);
10851 + atomic_dec(&sdp->sd_glock_held_count);
10856 + gl->gl_state = new_state;
10860 + * xmote_bh - Called after the lock module is done acquiring a lock
10861 + * @gl: The glock in question
10862 + * @ret: the int returned from the lock module
10867 +xmote_bh(struct gfs_glock *gl, unsigned int ret)
10869 + struct gfs_glock_operations *glops = gl->gl_ops;
10870 + struct gfs_holder *gh = gl->gl_req_gh;
10871 + int prev_state = gl->gl_state;
10872 + int op_done = TRUE;
10874 + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
10875 + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
10876 + GFS_ASSERT_GLOCK(!(ret & LM_OUT_ASYNC), gl,);
10878 + state_change(gl, ret & LM_OUT_ST_MASK);
10880 + if (ret & LM_OUT_NEED_E)
10881 + handle_callback(gl, LM_ST_UNLOCKED);
10882 + else if (ret & LM_OUT_NEED_D)
10883 + handle_callback(gl, LM_ST_DEFERRED);
10884 + else if (ret & LM_OUT_NEED_S)
10885 + handle_callback(gl, LM_ST_SHARED);
10887 + if (ret & LM_OUT_LVB_INVALID)
10888 + set_bit(GLF_LVB_INVALID, &gl->gl_flags);
10890 + if (prev_state != LM_ST_UNLOCKED && !(ret & LM_OUT_CACHEABLE)) {
10891 + if (glops->go_inval)
10892 + glops->go_inval(gl, DIO_METADATA | DIO_DATA);
10893 + } else if (gl->gl_state == LM_ST_DEFERRED) {
10894 + /* We might not want to do this here.
10895 + Look at moving to the inode glops. */
10896 + if (glops->go_inval)
10897 + glops->go_inval(gl, DIO_DATA);
10900 + /* Deal with each possible exit condition */
10903 + gl->gl_stamp = jiffies;
10905 + else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) {
10906 + spin_lock(&gl->gl_spin);
10907 + list_del_init(&gh->gh_list);
10908 + if (gl->gl_state == gh->gh_state ||
10909 + gl->gl_state == LM_ST_UNLOCKED)
10910 + gh->gh_error = 0;
10912 + gh->gh_error = GLR_TRYFAILED;
10913 + spin_unlock(&gl->gl_spin);
10915 + if (ret & LM_OUT_CANCELED)
10916 + handle_callback(gl, LM_ST_UNLOCKED); /* Lame */
10918 + } else if (ret & LM_OUT_CANCELED) {
10919 + spin_lock(&gl->gl_spin);
10920 + list_del_init(&gh->gh_list);
10921 + gh->gh_error = GLR_CANCELED;
10922 + if (test_bit(HIF_RECURSE, &gh->gh_iflags))
10923 + do_unrecurse(gh);
10924 + spin_unlock(&gl->gl_spin);
10926 + } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
10927 + spin_lock(&gl->gl_spin);
10928 + list_move_tail(&gh->gh_list, &gl->gl_holders);
10929 + gh->gh_error = 0;
10930 + set_bit(HIF_HOLDER, &gh->gh_iflags);
10931 + spin_unlock(&gl->gl_spin);
10933 + set_bit(HIF_FIRST, &gh->gh_iflags);
10937 + } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
10938 + spin_lock(&gl->gl_spin);
10939 + list_del_init(&gh->gh_list);
10940 + gh->gh_error = GLR_TRYFAILED;
10941 + if (test_bit(HIF_RECURSE, &gh->gh_iflags))
10942 + do_unrecurse(gh);
10943 + spin_unlock(&gl->gl_spin);
10946 + GFS_ASSERT_GLOCK(FALSE, gl,);
10948 + if (glops->go_xmote_bh)
10949 + glops->go_xmote_bh(gl);
10952 + spin_lock(&gl->gl_spin);
10953 + gl->gl_req_gh = NULL;
10954 + gl->gl_req_bh = NULL;
10955 + clear_bit(GLF_LOCK, &gl->gl_flags);
10956 + run_queue(gl, FALSE);
10957 + spin_unlock(&gl->gl_spin);
10963 + if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
10964 + gfs_holder_put(gh);
10966 + complete(&gh->gh_wait);
10971 + * gfs_glock_xmote_th - Call into the lock module to acquire a glock
10972 + * @gl: The glock in question
10973 + * @state: the requested state
10974 + * @flags: modifier flags to the lock call
10979 +gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
10981 + struct gfs_sbd *sdp = gl->gl_sbd;
10982 + struct gfs_glock_operations *glops = gl->gl_ops;
10983 + int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
10984 + LM_FLAG_NOEXP | LM_FLAG_ANY |
10985 + LM_FLAG_PRIORITY);
10986 + unsigned int lck_ret;
10988 + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
10989 + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
10990 + GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,);
10991 + GFS_ASSERT_GLOCK(state != gl->gl_state, gl,);
10993 + if (gl->gl_state == LM_ST_EXCLUSIVE) {
10994 + if (glops->go_sync)
10995 + glops->go_sync(gl, DIO_METADATA | DIO_DATA);
10999 + gl->gl_req_bh = xmote_bh;
11001 + atomic_inc(&sdp->sd_lm_lock_calls);
11003 + lck_ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl->gl_lock,
11005 + state, lck_flags);
11007 + if (lck_ret & LM_OUT_ASYNC)
11008 + GFS_ASSERT_GLOCK(lck_ret == LM_OUT_ASYNC, gl,);
11010 + xmote_bh(gl, lck_ret);
11014 + * drop_bh - Called after a lock module unlock completes
11016 + * @ret: the return status
11018 + * Doesn't wake up the process waiting on the struct gfs_holder (if any)
11019 + * Doesn't drop the reference on the glock the top half took out
11024 +drop_bh(struct gfs_glock *gl, unsigned int ret)
11026 + struct gfs_glock_operations *glops = gl->gl_ops;
11027 + struct gfs_holder *gh = gl->gl_req_gh;
11029 + clear_bit(GLF_PREFETCH, &gl->gl_flags);
11031 + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11032 + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
11033 + GFS_ASSERT_GLOCK(!ret, gl,);
11035 + state_change(gl, LM_ST_UNLOCKED);
11037 + if (glops->go_inval)
11038 + glops->go_inval(gl, DIO_METADATA | DIO_DATA);
11041 + spin_lock(&gl->gl_spin);
11042 + list_del_init(&gh->gh_list);
11043 + gh->gh_error = 0;
11044 + spin_unlock(&gl->gl_spin);
11047 + if (glops->go_drop_bh)
11048 + glops->go_drop_bh(gl);
11050 + spin_lock(&gl->gl_spin);
11051 + gl->gl_req_gh = NULL;
11052 + gl->gl_req_bh = NULL;
11053 + clear_bit(GLF_LOCK, &gl->gl_flags);
11054 + run_queue(gl, FALSE);
11055 + spin_unlock(&gl->gl_spin);
11060 + if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
11061 + gfs_holder_put(gh);
11063 + complete(&gh->gh_wait);
11068 + * gfs_glock_drop_th - call into the lock module to unlock a lock
11074 +gfs_glock_drop_th(struct gfs_glock *gl)
11076 + struct gfs_sbd *sdp = gl->gl_sbd;
11077 + struct gfs_glock_operations *glops = gl->gl_ops;
11078 + unsigned int ret;
11080 + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11081 + GFS_ASSERT_GLOCK(queue_empty(gl, &gl->gl_holders), gl,);
11082 + GFS_ASSERT_GLOCK(gl->gl_state != LM_ST_UNLOCKED, gl,);
11084 + if (gl->gl_state == LM_ST_EXCLUSIVE) {
11085 + if (glops->go_sync)
11086 + glops->go_sync(gl, DIO_METADATA | DIO_DATA);
11090 + gl->gl_req_bh = drop_bh;
11092 + atomic_inc(&sdp->sd_lm_unlock_calls);
11094 + ret = sdp->sd_lockstruct.ls_ops->lm_unlock(gl->gl_lock, gl->gl_state);
11097 + drop_bh(gl, ret);
11099 + GFS_ASSERT_GLOCK(ret == LM_OUT_ASYNC, gl,);
11103 + * handle_cancels - cancel requests for locks stuck waiting on an expire flag
11104 + * @gh: the LM_FLAG_NOEXP holder waiting to acquire the lock
11109 +handle_cancels(struct gfs_holder *gh)
11111 + struct gfs_glock *gl = gh->gh_gl;
11113 + spin_lock(&gl->gl_spin);
11115 + while (gl->gl_req_gh != gh &&
11116 + !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
11117 + !test_bit(HIF_WAKEUP, &gh->gh_iflags) &&
11118 + !list_empty(&gh->gh_list)) {
11119 + if (gl->gl_req_bh) {
11120 + spin_unlock(&gl->gl_spin);
11121 + gl->gl_sbd->sd_lockstruct.ls_ops->lm_cancel(gl->gl_lock);
11123 + spin_lock(&gl->gl_spin);
11125 + spin_unlock(&gl->gl_spin);
11127 + spin_lock(&gl->gl_spin);
11131 + spin_unlock(&gl->gl_spin);
11135 + * glock_wait_internal - wait on a glock acquisition
11136 + * @gh: the glock holder
11138 + * Returns: 0 on success
11142 +glock_wait_internal(struct gfs_holder *gh)
11144 + struct gfs_glock *gl = gh->gh_gl;
11145 + struct gfs_glock_operations *glops = gl->gl_ops;
11148 + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
11149 + spin_lock(&gl->gl_spin);
11150 + if (gl->gl_req_gh != gh &&
11151 + !test_bit(HIF_HOLDER, &gh->gh_iflags) &&
11152 + !test_bit(HIF_WAKEUP, &gh->gh_iflags) &&
11153 + !list_empty(&gh->gh_list)) {
11154 + list_del_init(&gh->gh_list);
11155 + gh->gh_error = GLR_TRYFAILED;
11156 + if (test_bit(HIF_RECURSE, &gh->gh_iflags))
11157 + do_unrecurse(gh);
11158 + run_queue(gl, FALSE);
11159 + spin_unlock(&gl->gl_spin);
11160 + return GLR_TRYFAILED;
11162 + spin_unlock(&gl->gl_spin);
11165 + if (gh->gh_flags & LM_FLAG_NOEXP)
11166 + handle_cancels(gh);
11169 + wait_for_completion(&gh->gh_wait);
11171 + spin_lock(&gl->gl_spin);
11172 + if (test_and_clear_bit(HIF_WAKEUP, &gh->gh_iflags)) {
11173 + run_queue(gl, TRUE);
11174 + spin_unlock(&gl->gl_spin);
11176 + spin_unlock(&gl->gl_spin);
11181 + if (gh->gh_error)
11182 + return gh->gh_error;
11184 + GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,);
11185 + GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state, gh->gh_state,
11186 + gh->gh_flags), gl,);
11188 + if (test_bit(HIF_FIRST, &gh->gh_iflags)) {
11189 + GFS_ASSERT_GLOCK(test_bit(GLF_LOCK, &gl->gl_flags), gl,);
11191 + if (glops->go_lock) {
11192 + error = glops->go_lock(gl, gh->gh_flags);
11194 + spin_lock(&gl->gl_spin);
11195 + list_del_init(&gh->gh_list);
11196 + gh->gh_error = error;
11197 + if (test_and_clear_bit(HIF_RECURSE, &gh->gh_iflags))
11198 + do_unrecurse(gh);
11199 + spin_unlock(&gl->gl_spin);
11203 + spin_lock(&gl->gl_spin);
11204 + gl->gl_req_gh = NULL;
11205 + gl->gl_req_bh = NULL;
11206 + clear_bit(GLF_LOCK, &gl->gl_flags);
11207 + if (test_bit(HIF_RECURSE, &gh->gh_iflags))
11208 + handle_recurse(gh);
11209 + run_queue(gl, FALSE);
11210 + spin_unlock(&gl->gl_spin);
11217 + * add_to_queue - Add a holder to the wait queue (but look for recursion)
11218 + * @gh: the holder structure
11223 +add_to_queue(struct gfs_holder *gh)
11225 + struct gfs_glock *gl = gh->gh_gl;
11226 + struct list_head *tmp, *head;
11227 + struct gfs_holder *tmp_gh;
11229 + if (gh->gh_owner) {
11230 + for (head = &gl->gl_holders, tmp = head->next;
11232 + tmp = tmp->next) {
11233 + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
11234 + if (tmp_gh->gh_owner == gh->gh_owner) {
11235 + GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) ||
11236 + !(tmp_gh->gh_flags & LM_FLAG_ANY),
11238 + GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
11239 + !(gh->gh_flags & GL_LOCAL_EXCL),
11241 + GFS_ASSERT_GLOCK(relaxed_state_ok(gl->gl_state,
11246 + list_add_tail(&gh->gh_list, &gl->gl_holders);
11247 + set_bit(HIF_HOLDER, &gh->gh_iflags);
11249 + gh->gh_error = 0;
11250 + complete(&gh->gh_wait);
11256 + for (head = &gl->gl_waiters2, tmp = head->next;
11258 + tmp = tmp->next) {
11259 + tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
11260 + if (tmp_gh->gh_owner == gh->gh_owner) {
11261 + GFS_ASSERT_GLOCK(test_bit(HIF_PROMOTE,
11262 + &tmp_gh->gh_iflags),
11264 + GFS_ASSERT_GLOCK((gh->gh_flags & LM_FLAG_ANY) ||
11265 + !(tmp_gh->gh_flags & LM_FLAG_ANY),
11267 + GFS_ASSERT_GLOCK((tmp_gh->gh_flags & GL_LOCAL_EXCL) ||
11268 + !(gh->gh_flags & GL_LOCAL_EXCL),
11270 + GFS_ASSERT_GLOCK(relaxed_state_ok(tmp_gh->gh_state,
11275 + set_bit(HIF_RECURSE, &gh->gh_iflags);
11276 + set_bit(HIF_RECURSE, &tmp_gh->gh_iflags);
11278 + list_add_tail(&gh->gh_list, &gl->gl_waiters2);
11285 + if (gh->gh_flags & LM_FLAG_PRIORITY)
11286 + list_add(&gh->gh_list, &gl->gl_waiters2);
11288 + list_add_tail(&gh->gh_list, &gl->gl_waiters2);
11292 + * gfs_glock_nq - enqueue a struct gfs_holder onto a glock (acquire a glock)
11293 + * @gh: the holder structure
11295 + * if (gh->gh_flags & GL_ASYNC), this never returns an error
11297 + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure
11301 +gfs_glock_nq(struct gfs_holder *gh)
11303 + struct gfs_glock *gl = gh->gh_gl;
11304 + struct gfs_sbd *sdp = gl->gl_sbd;
11307 + GFS_ASSERT_GLOCK(list_empty(&gh->gh_list), gl,);
11308 + GFS_ASSERT_GLOCK(gh->gh_state != LM_ST_UNLOCKED, gl,);
11309 + GFS_ASSERT_GLOCK((gh->gh_flags & (LM_FLAG_ANY | GL_EXACT)) !=
11310 + (LM_FLAG_ANY | GL_EXACT), gl,);
11311 + GFS_ASSERT_GLOCK(GFS_ASYNC_LM(sdp) ||
11312 + !(gh->gh_flags & GL_ASYNC), gl,);
11314 + atomic_inc(&sdp->sd_glock_nq_calls);
11317 + set_bit(HIF_PROMOTE, &gh->gh_iflags);
11319 + spin_lock(&gl->gl_spin);
11320 + add_to_queue(gh);
11321 + run_queue(gl, TRUE);
11322 + spin_unlock(&gl->gl_spin);
11324 + if (!(gh->gh_flags & GL_ASYNC)) {
11325 + error = glock_wait_internal(gh);
11326 + if (error == GLR_CANCELED) {
11327 + current->state = TASK_UNINTERRUPTIBLE;
11328 + schedule_timeout(HZ);
11333 + clear_bit(GLF_PREFETCH, &gl->gl_flags);
11339 + * gfs_glock_poll - poll to see if an async request has been completed
11340 + * @gh: the holder
11342 + * Returns: TRUE if the request is ready to be gfs_glock_wait()ed on
11346 +gfs_glock_poll(struct gfs_holder *gh)
11348 + struct gfs_glock *gl = gh->gh_gl;
11349 + int ready = FALSE;
11351 + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,);
11352 + GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,);
11354 + spin_lock(&gl->gl_spin);
11356 + if (test_bit(HIF_HOLDER, &gh->gh_iflags))
11358 + else if (list_empty(&gh->gh_list)) {
11359 + if (gh->gh_error == GLR_CANCELED) {
11360 + spin_unlock(&gl->gl_spin);
11361 + current->state = TASK_UNINTERRUPTIBLE;
11362 + schedule_timeout(HZ);
11363 + gfs_glock_nq(gh);
11369 + spin_unlock(&gl->gl_spin);
11375 + * gfs_glock_wait - wait for a lock acquisition that ended in a GLR_ASYNC
11376 + * @gh: the holder structure
11378 + * Returns: 0, GLR_TRYFAILED, or -EXXX on failure
11382 +gfs_glock_wait(struct gfs_holder *gh)
11384 + struct gfs_glock *gl = gh->gh_gl;
11387 + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ASYNC, gl,);
11388 + GFS_ASSERT_GLOCK(!test_bit(HIF_WAKEUP, &gh->gh_iflags), gl,);
11390 + error = glock_wait_internal(gh);
11391 + if (error == GLR_CANCELED) {
11392 + current->state = TASK_UNINTERRUPTIBLE;
11393 + schedule_timeout(HZ);
11394 + gh->gh_flags &= ~GL_ASYNC;
11395 + error = gfs_glock_nq(gh);
11402 + * gfs_glock_dq - dequeue a struct gfs_holder from a glock (release a glock)
11403 + * @gh: the glock holder
11408 +gfs_glock_dq(struct gfs_holder *gh)
11410 + struct gfs_glock *gl = gh->gh_gl;
11411 + struct gfs_glock_operations *glops = gl->gl_ops;
11413 + GFS_ASSERT_GLOCK(!queue_empty(gl, &gh->gh_list), gl,);
11414 + GFS_ASSERT_GLOCK(test_bit(HIF_HOLDER, &gh->gh_iflags), gl,);
11416 + atomic_inc(&gl->gl_sbd->sd_glock_dq_calls);
11418 + if (gh->gh_flags & GL_SYNC)
11419 + set_bit(GLF_SYNC, &gl->gl_flags);
11420 + if (gh->gh_flags & GL_NOCACHE)
11421 + handle_callback(gl, LM_ST_UNLOCKED);
11423 + lock_on_glock(gl);
11425 + spin_lock(&gl->gl_spin);
11426 + list_del_init(&gh->gh_list);
11427 + if (list_empty(&gl->gl_holders)) {
11428 + spin_unlock(&gl->gl_spin);
11430 + if (glops->go_unlock)
11431 + glops->go_unlock(gl, gh->gh_flags);
11433 + if (test_bit(GLF_SYNC, &gl->gl_flags)) {
11434 + if (glops->go_sync)
11435 + glops->go_sync(gl,
11441 + gl->gl_stamp = jiffies;
11443 + spin_lock(&gl->gl_spin);
11446 + clear_bit(GLF_LOCK, &gl->gl_flags);
11447 + run_queue(gl, FALSE);
11448 + spin_unlock(&gl->gl_spin);
11452 + * gfs_glock_prefetch - Try to prefetch a glock
11454 + * @state: the state to prefetch in
11455 + * @flags: flags passed to go_xmote_th()
11460 +gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags)
11462 + struct gfs_glock_operations *glops = gl->gl_ops;
11464 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
11465 + GFS_ASSERT_GLOCK(state != LM_ST_UNLOCKED, gl,);
11466 + GFS_ASSERT_GLOCK((flags & (LM_FLAG_ANY | GL_EXACT)) !=
11467 + (LM_FLAG_ANY | GL_EXACT), gl,);
11469 + spin_lock(&gl->gl_spin);
11471 + if (test_bit(GLF_LOCK, &gl->gl_flags) ||
11472 + !list_empty(&gl->gl_holders) ||
11473 + !list_empty(&gl->gl_waiters1) ||
11474 + !list_empty(&gl->gl_waiters2) ||
11475 + relaxed_state_ok(gl->gl_state, state, flags)) {
11476 + spin_unlock(&gl->gl_spin);
11480 + set_bit(GLF_PREFETCH, &gl->gl_flags);
11482 + GFS_ASSERT_GLOCK(!gl->gl_req_gh, gl,);
11483 + set_bit(GLF_LOCK, &gl->gl_flags);
11484 + spin_unlock(&gl->gl_spin);
11486 + glops->go_xmote_th(gl, state, flags);
11488 + atomic_inc(&gl->gl_sbd->sd_glock_prefetch_calls);
11492 + * gfs_glock_force_drop - Force a glock to be uncached
11498 +gfs_glock_force_drop(struct gfs_glock *gl)
11500 + struct gfs_holder gh;
11502 + gfs_holder_init(gl, LM_ST_UNLOCKED, 0, &gh);
11503 + set_bit(HIF_DEMOTE, &gh.gh_iflags);
11504 + gh.gh_owner = NULL;
11506 + spin_lock(&gl->gl_spin);
11507 + list_add(&gh.gh_list, &gl->gl_waiters2);
11508 + run_queue(gl, FALSE);
11509 + spin_unlock(&gl->gl_spin);
11511 + wait_for_completion(&gh.gh_wait);
11512 + gfs_holder_uninit(&gh);
11516 + * gfs_glock_nq_init - intialize a holder and enqueue it on a glock
11518 + * @state: the state we're requesting
11519 + * @flags: the modifier flags
11520 + * @gh: the holder structure
11522 + * Returns: 0, GLR_*, or -EXXX
11526 +gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags,
11527 + struct gfs_holder *gh)
11531 + gfs_holder_init(gl, state, flags, gh);
11533 + error = gfs_glock_nq(gh);
11535 + gfs_holder_uninit(gh);
11541 + * gfs_glock_dq_uninit - dequeue a holder from a glock and initialize it
11542 + * @gh: the holder structure
11547 +gfs_glock_dq_uninit(struct gfs_holder *gh)
11549 + gfs_glock_dq(gh);
11550 + gfs_holder_uninit(gh);
11554 + * gfs_glock_nq_num - acquire a glock based on lock number
11555 + * @sdp: the filesystem
11556 + * @number: the lock number
11557 + * @glops: the glock operations for the type of glock
11558 + * @state: the state to acquire the glock in
11559 + * @flags: modifier flags for the aquisition
11560 + * @gh: the struct gfs_holder
11562 + * Returns: 0 on success, -EXXX on failure
11566 +gfs_glock_nq_num(struct gfs_sbd *sdp,
11567 + uint64_t number, struct gfs_glock_operations *glops,
11568 + unsigned int state, int flags, struct gfs_holder *gh)
11570 + struct gfs_glock *gl;
11573 + error = gfs_glock_get(sdp, number, glops, CREATE, &gl);
11575 + error = gfs_glock_nq_init(gl, state, flags, gh);
11583 + * glock_compare - Compare two struct gfs_glock structures for sorting
11584 + * @arg_a: the first structure
11585 + * @arg_b: the second structure
11590 +glock_compare(const void *arg_a, const void *arg_b)
11592 + struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a;
11593 + struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b;
11594 + struct lm_lockname *a = &gh_a->gh_gl->gl_name;
11595 + struct lm_lockname *b = &gh_b->gh_gl->gl_name;
11598 + if (a->ln_number > b->ln_number)
11600 + else if (a->ln_number < b->ln_number)
11603 + if (gh_a->gh_state == LM_ST_SHARED &&
11604 + gh_b->gh_state == LM_ST_EXCLUSIVE)
11606 + else if (!(gh_a->gh_flags & GL_LOCAL_EXCL) &&
11607 + (gh_b->gh_flags & GL_LOCAL_EXCL))
11615 + * nq_m_sync - synchonously acquire more than one glock in deadlock free order
11616 + * @num_gh: the number of structures
11617 + * @ghs: an array of struct gfs_holder structures
11619 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
11623 +nq_m_sync(unsigned int num_gh, struct gfs_holder *ghs)
11625 + struct gfs_holder *p[num_gh];
11629 + for (x = 0; x < num_gh; x++)
11632 + gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare);
11634 + for (x = 0; x < num_gh; x++) {
11635 + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
11637 + error = gfs_glock_nq(p[x]);
11640 + gfs_glock_dq(p[x]);
11649 + * gfs_glock_nq_m - acquire multiple glocks
11650 + * @num_gh: the number of structures
11651 + * @ghs: an array of struct gfs_holder structures
11653 + * Figure out how big an impact this function has. Either:
11654 + * 1) Replace this code with code that calls gfs_glock_prefetch()
11655 + * 2) Forget async stuff and just call nq_m_sync()
11656 + * 3) Leave it like it is
11658 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
11662 +gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs)
11666 + int borked = FALSE, serious = 0;
11669 + GFS_ASSERT(num_gh,);
11671 + if (num_gh == 1) {
11672 + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
11673 + error = gfs_glock_nq(ghs);
11677 + if (!GFS_ASYNC_LM(ghs->gh_gl->gl_sbd)) {
11678 + error = nq_m_sync(num_gh, ghs);
11682 + for (x = 0; x < num_gh; x++) {
11683 + ghs[x].gh_flags |= LM_FLAG_TRY | GL_ASYNC;
11684 + gfs_glock_nq(&ghs[x]);
11687 + for (x = 0; x < num_gh; x++) {
11688 + error = e[x] = glock_wait_internal(&ghs[x]);
11691 + if (error != GLR_TRYFAILED && error != GLR_CANCELED)
11699 + for (x = 0; x < num_gh; x++)
11701 + gfs_glock_dq(&ghs[x]);
11706 + for (x = 0; x < num_gh; x++)
11707 + gfs_holder_reinit(ghs[x].gh_state, ghs[x].gh_flags,
11709 + error = nq_m_sync(num_gh, ghs);
11716 + * gfs_glock_dq_m - release multiple glocks
11717 + * @num_gh: the number of structures
11718 + * @ghs: an array of struct gfs_holder structures
11723 +gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs)
11727 + for (x = 0; x < num_gh; x++)
11728 + gfs_glock_dq(&ghs[x]);
11732 + * gfs_glock_prefetch_num - prefetch a glock based on lock number
11733 + * @sdp: the filesystem
11734 + * @number: the lock number
11735 + * @glops: the glock operations for the type of glock
11736 + * @state: the state to acquire the glock in
11737 + * @flags: modifier flags for the aquisition
11739 + * Returns: 0 on success, -EXXX on failure
11743 +gfs_glock_prefetch_num(struct gfs_sbd *sdp,
11744 + uint64_t number, struct gfs_glock_operations *glops,
11745 + unsigned int state, int flags)
11747 + struct gfs_glock *gl;
11750 + if (atomic_read(&sdp->sd_reclaim_count) < sdp->sd_tune.gt_reclaim_limit) {
11751 + error = gfs_glock_get(sdp, number, glops, CREATE, &gl);
11753 + gfs_glock_prefetch(gl, state, flags);
11760 + * gfs_lvb_hold - attach a LVB from a glock
11761 + * @gl: The glock in question
11766 +gfs_lvb_hold(struct gfs_glock *gl)
11770 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
11772 + lock_on_glock(gl);
11774 + atomic_inc(&gl->gl_lvb_count);
11775 + if (atomic_read(&gl->gl_lvb_count) == 1) {
11777 + GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,);
11778 + error = gl->gl_sbd->sd_lockstruct.ls_ops->lm_hold_lvb(gl->gl_lock,
11782 + atomic_dec(&gl->gl_lvb_count);
11786 + unlock_on_glock(gl);
11792 + * gfs_lvb_unhold - detach a LVB from a glock
11793 + * @gl: The glock in question
11798 +gfs_lvb_unhold(struct gfs_glock *gl)
11802 + lock_on_glock(gl);
11804 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,);
11805 + if (atomic_dec_and_test(&gl->gl_lvb_count)) {
11806 + GFS_ASSERT_GLOCK(gl->gl_lvb, gl,);
11807 + gl->gl_sbd->sd_lockstruct.ls_ops->lm_unhold_lvb(gl->gl_lock,
11809 + gl->gl_lvb = NULL;
11813 + unlock_on_glock(gl);
11819 + * gfs_lvb_sync - sync a LVB
11820 + * @gl: The glock in question
11825 +gfs_lvb_sync(struct gfs_glock *gl)
11827 + GFS_ASSERT_GLOCK(atomic_read(&gl->gl_lvb_count), gl,);
11829 + lock_on_glock(gl);
11831 + GFS_ASSERT_GLOCK(gfs_glock_is_held_excl(gl), gl,);
11832 + gl->gl_sbd->sd_lockstruct.ls_ops->lm_sync_lvb(gl->gl_lock, gl->gl_lvb);
11834 + unlock_on_glock(gl);
11838 + * gfs_glock_cb - Callback used by locking module
11839 + * @fsdata: Pointer to the superblock
11840 + * @type: Type of callback
11841 + * @data: Type dependent data pointer
11843 + * Called by the locking module when it wants to tell us something.
11844 + * Either we need to drop a lock or another client expired.
11848 +gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data)
11850 + struct gfs_sbd *sdp = (struct gfs_sbd *)fsdata;
11851 + struct gfs_glock *gl;
11852 + struct lm_lockname *name = NULL;
11853 + unsigned int state = 0;
11854 + struct lm_async_cb *async;
11855 + unsigned int journal;
11857 + atomic_inc(&sdp->sd_lm_callbacks);
11860 + case LM_CB_NEED_E:
11861 + name = (struct lm_lockname *)data;
11862 + state = LM_ST_UNLOCKED;
11865 + case LM_CB_NEED_D:
11866 + name = (struct lm_lockname *)data;
11867 + state = LM_ST_DEFERRED;
11870 + case LM_CB_NEED_S:
11871 + name = (struct lm_lockname *)data;
11872 + state = LM_ST_SHARED;
11875 + case LM_CB_ASYNC:
11876 + async = (struct lm_async_cb *)data;
11878 + gl = gfs_glock_find(sdp, &async->lc_name);
11879 + GFS_ASSERT_SBD(gl, sdp,);
11880 + GFS_ASSERT_GLOCK(gl->gl_req_bh, gl,);
11881 + gl->gl_req_bh(gl, async->lc_ret);
11886 + case LM_CB_NEED_RECOVERY:
11887 + journal = *(unsigned int *)data;
11889 + gfs_add_dirty_j(sdp, journal);
11891 + if (test_bit(SDF_RECOVERD_RUN, &sdp->sd_flags))
11892 + wake_up_process(sdp->sd_recoverd_process);
11896 + case LM_CB_DROPLOCKS:
11897 + gfs_gl_hash_clear(sdp, FALSE);
11898 + gfs_quota_scan(sdp);
11902 + GFS_ASSERT_SBD(FALSE, sdp,
11903 + printk("type = %u\n", type););
11908 + gl = gfs_glock_find(sdp, name);
11910 + if (gl->gl_ops->go_callback)
11911 + gl->gl_ops->go_callback(gl, state);
11912 + handle_callback(gl, state);
11913 + spin_lock(&gl->gl_spin);
11914 + run_queue(gl, FALSE);
11915 + spin_unlock(&gl->gl_spin);
11922 + * gfs_try_toss_inode - try to remove a particular inode from GFS' cache
11923 + * sdp: the filesystem
11924 + * inum: the inode number
11929 +gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum)
11931 + struct gfs_glock *gl;
11932 + struct gfs_inode *ip;
11935 + error = gfs_glock_get(sdp,
11936 + inum->no_formal_ino, &gfs_inode_glops,
11938 + if (error || !gl)
11941 + if (!trylock_on_glock(gl))
11944 + if (!queue_empty(gl, &gl->gl_holders))
11951 + if (atomic_read(&ip->i_count))
11954 + gfs_inode_destroy(ip);
11957 + unlock_on_glock(gl);
11964 + * gfs_iopen_go_callback - Try to kick the inode/vnode associated with an iopen glock from memory
11965 + * @io_gl: the iopen glock
11966 + * @state: the state into which the glock should be put
11971 +gfs_iopen_go_callback(struct gfs_glock *io_gl, unsigned int state)
11973 + struct gfs_glock *i_gl;
11974 + struct gfs_inode *ip;
11976 + if (state != LM_ST_UNLOCKED)
11979 + spin_lock(&io_gl->gl_spin);
11980 + i_gl = gl2gl(io_gl);
11982 + glock_hold(i_gl);
11983 + spin_unlock(&io_gl->gl_spin);
11985 + spin_unlock(&io_gl->gl_spin);
11989 + if (trylock_on_glock(i_gl)) {
11990 + if (queue_empty(i_gl, &i_gl->gl_holders)) {
11991 + ip = gl2ip(i_gl);
11993 + gfs_try_toss_vnode(ip);
11994 + unlock_on_glock(i_gl);
11995 + gfs_glock_schedule_for_reclaim(i_gl);
11999 + unlock_on_glock(i_gl);
12007 + * demote_ok - check to see if it's ok to unlock a glock
12010 + * Returns: TRUE if it's ok
12014 +demote_ok(struct gfs_glock *gl)
12016 + struct gfs_sbd *sdp = gl->gl_sbd;
12017 + struct gfs_glock_operations *glops = gl->gl_ops;
12018 + int demote = TRUE;
12020 + if (test_bit(GLF_STICKY, &gl->gl_flags))
12022 + else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
12023 + demote = time_after_eq(jiffies,
12025 + sdp->sd_tune.gt_prefetch_secs * HZ);
12026 + else if (glops->go_demote_ok)
12027 + demote = glops->go_demote_ok(gl);
12033 + * gfs_glock_schedule_for_reclaim - Add a glock to the reclaim list
12039 +gfs_glock_schedule_for_reclaim(struct gfs_glock *gl)
12041 + struct gfs_sbd *sdp = gl->gl_sbd;
12043 + spin_lock(&sdp->sd_reclaim_lock);
12044 + if (list_empty(&gl->gl_reclaim)) {
12046 + list_add(&gl->gl_reclaim, &sdp->sd_reclaim_list);
12047 + atomic_inc(&sdp->sd_reclaim_count);
12049 + spin_unlock(&sdp->sd_reclaim_lock);
12051 + wake_up(&sdp->sd_reclaim_wchan);
12055 + * gfs_reclaim_glock - process an glock on the reclaim list
12056 + * @sdp: the filesystem
12061 +gfs_reclaim_glock(struct gfs_sbd *sdp)
12063 + struct gfs_glock *gl;
12064 + struct gfs_gl_hash_bucket *bucket;
12066 + spin_lock(&sdp->sd_reclaim_lock);
12068 + if (list_empty(&sdp->sd_reclaim_list)) {
12069 + spin_unlock(&sdp->sd_reclaim_lock);
12073 + gl = list_entry(sdp->sd_reclaim_list.next,
12074 + struct gfs_glock, gl_reclaim);
12075 + list_del_init(&gl->gl_reclaim);
12077 + spin_unlock(&sdp->sd_reclaim_lock);
12079 + atomic_dec(&sdp->sd_reclaim_count);
12080 + atomic_inc(&sdp->sd_reclaimed);
12082 + if (trylock_on_glock(gl)) {
12083 + if (queue_empty(gl, &gl->gl_holders)) {
12084 + if (gl->gl_ops == &gfs_inode_glops) {
12085 + struct gfs_inode *ip = gl2ip(gl);
12086 + if (ip && !atomic_read(&ip->i_count))
12087 + gfs_inode_destroy(ip);
12089 + if (gl->gl_state != LM_ST_UNLOCKED &&
12091 + handle_callback(gl, LM_ST_UNLOCKED);
12093 + unlock_on_glock(gl);
12096 + bucket = gl->gl_bucket;
12098 + write_lock(&bucket->hb_lock);
12099 + if (atomic_read(&gl->gl_count) == 1) {
12100 + list_del_init(&gl->gl_list);
12101 + write_unlock(&bucket->hb_lock);
12104 + write_unlock(&bucket->hb_lock);
12110 + * examine_bucket - Call a function for glock in a hash bucket
12111 + * @examiner: the function
12112 + * @sdp: the filesystem
12113 + * @bucket: the bucket
12115 + * Returns: TRUE if the bucket is has entries
12119 +examine_bucket(glock_examiner examiner,
12120 + struct gfs_sbd *sdp, struct gfs_gl_hash_bucket *bucket)
12122 + struct glock_plug plug;
12123 + struct list_head *tmp;
12124 + struct gfs_glock *gl;
12127 + memset(&plug.gl_flags, 0, sizeof(unsigned long));
12128 + set_bit(GLF_PLUG, &plug.gl_flags);
12130 + write_lock(&bucket->hb_lock);
12131 + list_add(&plug.gl_list, &bucket->hb_list);
12132 + write_unlock(&bucket->hb_lock);
12135 + write_lock(&bucket->hb_lock);
12138 + tmp = plug.gl_list.next;
12139 + if (tmp == &bucket->hb_list) {
12140 + list_del(&plug.gl_list);
12141 + entries = !list_empty(&bucket->hb_list);
12142 + write_unlock(&bucket->hb_lock);
12145 + gl = list_entry(tmp, struct gfs_glock, gl_list);
12147 + list_move(&plug.gl_list, &gl->gl_list);
12149 + if (test_bit(GLF_PLUG, &gl->gl_flags))
12157 + write_unlock(&bucket->hb_lock);
12164 + * scan_glock - lock at a glock and see if we can do stuff to it
12165 + * @gl: the glock to look at
12170 +scan_glock(struct gfs_glock *gl)
12172 + if (trylock_on_glock(gl)) {
12173 + if (queue_empty(gl, &gl->gl_holders)) {
12174 + if (gl->gl_ops == &gfs_inode_glops) {
12175 + struct gfs_inode *ip = gl2ip(gl);
12176 + if (ip && !atomic_read(&ip->i_count)) {
12177 + unlock_on_glock(gl);
12178 + gfs_glock_schedule_for_reclaim(gl);
12182 + if (gl->gl_state != LM_ST_UNLOCKED &&
12184 + unlock_on_glock(gl);
12185 + gfs_glock_schedule_for_reclaim(gl);
12190 + unlock_on_glock(gl);
12198 + * gfs_scand_internal - Look for glocks and inodes to toss from memory
12199 + * @sdp: the filesystem
12204 +gfs_scand_internal(struct gfs_sbd *sdp)
12208 + for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
12209 + examine_bucket(scan_glock, sdp, &sdp->sd_gl_hash[x]);
12215 + * clear_glock - lock at a glock and see if we can do stuff to it
12216 + * @gl: the glock to look at
12217 + * @timeout: demote locks left unused for longer than this many seconds
12222 +clear_glock(struct gfs_glock *gl)
12224 + struct gfs_sbd *sdp = gl->gl_sbd;
12225 + struct gfs_gl_hash_bucket *bucket = gl->gl_bucket;
12227 + spin_lock(&sdp->sd_reclaim_lock);
12228 + if (!list_empty(&gl->gl_reclaim)) {
12229 + list_del_init(&gl->gl_reclaim);
12230 + atomic_dec(&sdp->sd_reclaim_count);
12233 + spin_unlock(&sdp->sd_reclaim_lock);
12235 + if (trylock_on_glock(gl)) {
12236 + if (queue_empty(gl, &gl->gl_holders)) {
12237 + if (gl->gl_ops == &gfs_inode_glops) {
12238 + struct gfs_inode *ip = gl2ip(gl);
12239 + if (ip && !atomic_read(&ip->i_count))
12240 + gfs_inode_destroy(ip);
12242 + if (gl->gl_state != LM_ST_UNLOCKED)
12243 + handle_callback(gl, LM_ST_UNLOCKED);
12246 + unlock_on_glock(gl);
12249 + write_lock(&bucket->hb_lock);
12250 + if (atomic_read(&gl->gl_count) == 1) {
12251 + list_del_init(&gl->gl_list);
12252 + write_unlock(&bucket->hb_lock);
12255 + write_unlock(&bucket->hb_lock);
12261 + * gfs_gl_hash_clear - Empty out the glock hash table
12262 + * @sdp: the filesystem
12263 + * @wait: wait until it's all gone
12268 +gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait)
12279 + for (x = 0; x < GFS_GL_HASH_SIZE; x++)
12280 + if (examine_bucket(clear_glock, sdp, &sdp->sd_gl_hash[x]))
12283 + if (!wait || !cont)
12286 + if (time_after_eq(jiffies, t + sdp->sd_tune.gt_stall_secs * HZ)) {
12287 + printk("GFS: fsid=%s: Unmount seems to be stalled. Dumping lock state...\n",
12289 + gfs_dump_lockstate(sdp, NULL);
12293 + invalidate_inodes(sdp->sd_vfs);
12299 + * Diagnostic routines to help debug distributed deadlock
12303 + * dump_holder - print information about a glock holder
12304 + * @str: a string naming the type of holder
12305 + * @gh: the glock holder
12306 + * @buf: the buffer
12307 + * @size: the size of the buffer
12308 + * @count: where we are in the buffer
12310 + * Returns: 0 on success, -ENOBUFS when we run out of space
12314 +dump_holder(char *str, struct gfs_holder *gh,
12315 + char *buf, unsigned int size, unsigned int *count)
12320 + gfs_sprintf(" %s\n", str);
12321 + gfs_sprintf(" owner = %ld\n",
12322 + (gh->gh_owner) ? (long)gh->gh_owner->pid : -1);
12323 + gfs_sprintf(" gh_state = %u\n", gh->gh_state);
12324 + gfs_sprintf(" gh_flags =");
12325 + for (x = 0; x < 32; x++)
12326 + if (gh->gh_flags & (1 << x))
12327 + gfs_sprintf(" %u", x);
12328 + gfs_sprintf(" \n");
12329 + gfs_sprintf(" error = %d\n", gh->gh_error);
12330 + gfs_sprintf(" gh_iflags =");
12331 + for (x = 0; x < 32; x++)
12332 + if (test_bit(x, &gh->gh_iflags))
12333 + gfs_sprintf(" %u", x);
12334 + gfs_sprintf(" \n");
12341 + * dump_inode - print information about an inode
12343 + * @buf: the buffer
12344 + * @size: the size of the buffer
12345 + * @count: where we are in the buffer
12347 + * Returns: 0 on success, -ENOBUFS when we run out of space
12351 +dump_inode(struct gfs_inode *ip,
12352 + char *buf, unsigned int size, unsigned int *count)
12357 + gfs_sprintf(" Inode:\n");
12358 + gfs_sprintf(" num = %" PRIu64 "/%" PRIu64 "\n",
12359 + ip->i_num.no_formal_ino, ip->i_num.no_addr);
12360 + gfs_sprintf(" type = %u\n", ip->i_di.di_type);
12361 + gfs_sprintf(" i_count = %d\n", atomic_read(&ip->i_count));
12362 + gfs_sprintf(" i_flags =");
12363 + for (x = 0; x < 32; x++)
12364 + if (test_bit(x, &ip->i_flags))
12365 + gfs_sprintf(" %u", x);
12366 + gfs_sprintf(" \n");
12367 + gfs_sprintf(" vnode = %s\n", (ip->i_vnode) ? "yes" : "no");
12374 + * dump_glock - print information about a glock
12376 + * @buf: the buffer
12377 + * @size: the size of the buffer
12378 + * @count: where we are in the buffer
12380 + * Returns: 0 on success, -ENOBUFS when we run out of space
12384 +dump_glock(struct gfs_glock *gl,
12385 + char *buf, unsigned int size, unsigned int *count)
12387 + struct list_head *head, *tmp;
12388 + struct gfs_holder *gh;
12392 + spin_lock(&gl->gl_spin);
12394 + gfs_sprintf("Glock (%u, %" PRIu64 ")\n",
12395 + gl->gl_name.ln_type,
12396 + gl->gl_name.ln_number);
12397 + gfs_sprintf(" gl_flags =");
12398 + for (x = 0; x < 32; x++)
12399 + if (test_bit(x, &gl->gl_flags))
12400 + gfs_sprintf(" %u", x);
12401 + gfs_sprintf(" \n");
12402 + gfs_sprintf(" gl_count = %d\n", atomic_read(&gl->gl_count));
12403 + gfs_sprintf(" gl_state = %u\n", gl->gl_state);
12404 + gfs_sprintf(" lvb_count = %d\n", atomic_read(&gl->gl_lvb_count));
12405 + gfs_sprintf(" object = %s\n", (gl->gl_object) ? "yes" : "no");
12406 + if (gl->gl_aspace)
12407 + gfs_sprintf(" aspace = %lu\n",
12408 + gl->gl_aspace->i_mapping->nrpages);
12410 + gfs_sprintf(" aspace = no\n");
12411 + gfs_sprintf(" reclaim = %s\n",
12412 + (list_empty(&gl->gl_reclaim)) ? "no" : "yes");
12413 + if (gl->gl_req_gh) {
12414 + error = dump_holder("Request", gl->gl_req_gh, buf, size, count);
12418 + for (head = &gl->gl_holders, tmp = head->next;
12420 + tmp = tmp->next) {
12421 + gh = list_entry(tmp, struct gfs_holder, gh_list);
12422 + error = dump_holder("Holder", gh, buf, size, count);
12426 + for (head = &gl->gl_waiters1, tmp = head->next;
12428 + tmp = tmp->next) {
12429 + gh = list_entry(tmp, struct gfs_holder, gh_list);
12430 + error = dump_holder("Waiter1", gh, buf, size, count);
12434 + for (head = &gl->gl_waiters2, tmp = head->next;
12436 + tmp = tmp->next) {
12437 + gh = list_entry(tmp, struct gfs_holder, gh_list);
12438 + error = dump_holder("Waiter2", gh, buf, size, count);
12442 + if (gl->gl_ops == &gfs_inode_glops && gl2ip(gl)) {
12443 + if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
12444 + list_empty(&gl->gl_holders)) {
12445 + error = dump_inode(gl2ip(gl), buf, size, count);
12449 + gfs_sprintf(" Inode: busy\n");
12453 + spin_unlock(&gl->gl_spin);
12459 + * gfs_dump_lockstate - print out the current lockstate
12460 + * @sdp: the filesystem
12461 + * @ub: the buffer to copy the information into
12463 + * If @ub is NULL, dump the lockstate to the console.
12468 +gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub)
12470 + struct gfs_gl_hash_bucket *bucket;
12471 + struct list_head *tmp, *head;
12472 + struct gfs_glock *gl;
12473 + char *buf = NULL;
12474 + unsigned int size = sdp->sd_tune.gt_lockdump_size;
12475 + unsigned int x, count;
12479 + buf = kmalloc(size, GFP_KERNEL);
12484 + for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
12485 + bucket = &sdp->sd_gl_hash[x];
12488 + read_lock(&bucket->hb_lock);
12490 + for (head = &bucket->hb_list, tmp = head->next;
12492 + tmp = tmp->next) {
12493 + gl = list_entry(tmp, struct gfs_glock, gl_list);
12495 + if (test_bit(GLF_PLUG, &gl->gl_flags))
12498 + error = dump_glock(gl, buf, size, &count);
12503 + read_unlock(&bucket->hb_lock);
12509 + if (ub->ub_count + count > ub->ub_size) {
12513 + if (copy_to_user(ub->ub_data + ub->ub_count, buf, count)) {
12517 + ub->ub_count += count;
12526 diff -urN linux-orig/fs/gfs/glock.h linux-patched/fs/gfs/glock.h
12527 --- linux-orig/fs/gfs/glock.h 1969-12-31 18:00:00.000000000 -0600
12528 +++ linux-patched/fs/gfs/glock.h 2004-06-30 13:27:49.342711362 -0500
12530 +/******************************************************************************
12531 +*******************************************************************************
12533 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
12534 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
12536 +** This copyrighted material is made available to anyone wishing to use,
12537 +** modify, copy, or redistribute it subject to the terms and conditions
12538 +** of the GNU General Public License v.2.
12540 +*******************************************************************************
12541 +******************************************************************************/
12543 +#ifndef __GFS_GLOCK_DOT_H__
12544 +#define __GFS_GLOCK_DOT_H__
12547 +#define LM_FLAG_TRY (0x00000001)
12548 +#define LM_FLAG_TRY_1CB (0x00000002)
12549 +#define LM_FLAG_NOEXP (0x00000004)
12550 +#define LM_FLAG_ANY (0x00000008)
12551 +#define LM_FLAG_PRIORITY (0x00000010)
12553 +#define GL_LOCAL_EXCL (0x00000020)
12554 +#define GL_ASYNC (0x00000040)
12555 +#define GL_EXACT (0x00000080)
12556 +#define GL_SKIP (0x00000100)
12557 +#define GL_ATIME (0x00000200)
12558 +#define GL_NOCACHE (0x00000400)
12559 +#define GL_SYNC (0x00000800)
12561 +#define GLR_TRYFAILED (13)
12562 +#define GLR_CANCELED (14)
12564 +static __inline__ int
12565 +gfs_glock_is_locked_by_me(struct gfs_glock *gl)
12567 + struct list_head *tmp, *head;
12568 + struct gfs_holder *gh;
12569 + int locked = FALSE;
12571 + spin_lock(&gl->gl_spin);
12572 + for (head = &gl->gl_holders, tmp = head->next;
12574 + tmp = tmp->next) {
12575 + gh = list_entry(tmp, struct gfs_holder, gh_list);
12576 + if (gh->gh_owner == current) {
12581 + spin_unlock(&gl->gl_spin);
12585 +static __inline__ int
12586 +gfs_glock_is_held_excl(struct gfs_glock *gl)
12588 + return (gl->gl_state == LM_ST_EXCLUSIVE);
12590 +static __inline__ int
12591 +gfs_glock_is_held_dfrd(struct gfs_glock *gl)
12593 + return (gl->gl_state == LM_ST_DEFERRED);
12595 +static __inline__ int
12596 +gfs_glock_is_held_shrd(struct gfs_glock *gl)
12598 + return (gl->gl_state == LM_ST_SHARED);
12601 +#define GFS_ASYNC_LM(sdp) ((sdp)->sd_lockstruct.ls_flags & LM_LSFLAG_ASYNC)
12603 +struct gfs_glock *gfs_glock_find(struct gfs_sbd *sdp,
12604 + struct lm_lockname *name);
12605 +int gfs_glock_get(struct gfs_sbd *sdp,
12606 + uint64_t number, struct gfs_glock_operations *glops,
12607 + int create, struct gfs_glock **glp);
12608 +void gfs_glock_hold(struct gfs_glock *gl);
12609 +void gfs_glock_put(struct gfs_glock *gl);
12611 +void gfs_holder_init(struct gfs_glock *gl, unsigned int state, int flags,
12612 + struct gfs_holder *gh);
12613 +void gfs_holder_reinit(unsigned int state, int flags, struct gfs_holder *gh);
12614 +void gfs_holder_uninit(struct gfs_holder *gh);
12615 +struct gfs_holder *gfs_holder_get(struct gfs_glock *gl, unsigned int state,
12617 +void gfs_holder_put(struct gfs_holder *gh);
12619 +void gfs_glock_xmote_th(struct gfs_glock *gl, unsigned int state, int flags);
12620 +void gfs_glock_drop_th(struct gfs_glock *gl);
12622 +int gfs_glock_nq(struct gfs_holder *gh);
12623 +int gfs_glock_poll(struct gfs_holder *gh);
12624 +int gfs_glock_wait(struct gfs_holder *gh);
12625 +void gfs_glock_dq(struct gfs_holder *gh);
12627 +void gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags);
12628 +void gfs_glock_force_drop(struct gfs_glock *gl);
12630 +int gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags,
12631 + struct gfs_holder *gh);
12632 +void gfs_glock_dq_uninit(struct gfs_holder *gh);
12633 +int gfs_glock_nq_num(struct gfs_sbd *sdp,
12634 + uint64_t number, struct gfs_glock_operations *glops,
12635 + unsigned int state, int flags, struct gfs_holder *gh);
12637 +int gfs_glock_nq_m(unsigned int num_gh, struct gfs_holder *ghs);
12638 +void gfs_glock_dq_m(unsigned int num_gh, struct gfs_holder *ghs);
12640 +void gfs_glock_prefetch_num(struct gfs_sbd *sdp,
12641 + uint64_t number, struct gfs_glock_operations *glops,
12642 + unsigned int state, int flags);
12644 +/* Lock Value Block functions */
12646 +int gfs_lvb_hold(struct gfs_glock *gl);
12647 +void gfs_lvb_unhold(struct gfs_glock *gl);
12648 +void gfs_lvb_sync(struct gfs_glock *gl);
12650 +void gfs_glock_cb(lm_fsdata_t * fsdata, unsigned int type, void *data);
12652 +void gfs_try_toss_inode(struct gfs_sbd *sdp, struct gfs_inum *inum);
12653 +void gfs_iopen_go_callback(struct gfs_glock *gl, unsigned int state);
12655 +void gfs_glock_schedule_for_reclaim(struct gfs_glock *gl);
12656 +void gfs_reclaim_glock(struct gfs_sbd *sdp);
12658 +void gfs_scand_internal(struct gfs_sbd *sdp);
12659 +void gfs_gl_hash_clear(struct gfs_sbd *sdp, int wait);
12661 +int gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub);
12663 +#endif /* __GFS_GLOCK_DOT_H__ */
12664 diff -urN linux-orig/fs/gfs/glops.c linux-patched/fs/gfs/glops.c
12665 --- linux-orig/fs/gfs/glops.c 1969-12-31 18:00:00.000000000 -0600
12666 +++ linux-patched/fs/gfs/glops.c 2004-06-30 13:27:49.342711362 -0500
12668 +/******************************************************************************
12669 +*******************************************************************************
12671 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
12672 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
12674 +** This copyrighted material is made available to anyone wishing to use,
12675 +** modify, copy, or redistribute it subject to the terms and conditions
12676 +** of the GNU General Public License v.2.
12678 +*******************************************************************************
12679 +******************************************************************************/
12681 +#include <linux/sched.h>
12682 +#include <linux/slab.h>
12683 +#include <linux/smp_lock.h>
12684 +#include <linux/spinlock.h>
12685 +#include <asm/semaphore.h>
12686 +#include <linux/completion.h>
12687 +#include <linux/buffer_head.h>
12691 +#include "glock.h"
12692 +#include "glops.h"
12693 +#include "inode.h"
12696 +#include "recovery.h"
12700 + * meta_go_sync - sync out the metadata for this glock
12707 +meta_go_sync(struct gfs_glock *gl, int flags)
12709 + if (!(flags & DIO_METADATA))
12712 + if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
12713 + gfs_log_flush_glock(gl);
12714 + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12717 + clear_bit(GLF_DIRTY, &gl->gl_flags);
12718 + clear_bit(GLF_SYNC, &gl->gl_flags);
12722 + * meta_go_inval - invalidate the metadata for this glock
12729 +meta_go_inval(struct gfs_glock *gl, int flags)
12731 + if (!(flags & DIO_METADATA))
12734 + gfs_inval_buf(gl);
12739 + * meta_go_demote_ok - check to see if it's ok to unlock a glock
12742 + * Returns: TRUE if it's ok
12746 +meta_go_demote_ok(struct gfs_glock *gl)
12748 + return (gl->gl_aspace->i_mapping->nrpages) ? FALSE : TRUE;
12752 + * inode_go_xmote_th - promote/demote a glock
12754 + * @state: the requested state
12755 + * @flags: the flags passed into gfs_glock()
12760 +inode_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
12762 + if (gl->gl_state != LM_ST_UNLOCKED)
12763 + gfs_inval_pte(gl);
12764 + gfs_glock_xmote_th(gl, state, flags);
12768 + * inode_go_xmote_bh - promote/demote a glock
12771 + * This will be really broken when (no_formal_ino != no_addr)
12776 +inode_go_xmote_bh(struct gfs_glock *gl)
12778 + struct gfs_sbd *sdp = gl->gl_sbd;
12779 + struct gfs_holder *gh = gl->gl_req_gh;
12780 + struct buffer_head *bh;
12783 + if (gl->gl_state != LM_ST_UNLOCKED &&
12784 + (!gh || !(gh->gh_flags & GL_SKIP))) {
12785 + error = gfs_dread(sdp, gl->gl_name.ln_number, gl, DIO_START, &bh);
12792 + * inode_go_drop_th - unlock a glock
12798 +inode_go_drop_th(struct gfs_glock *gl)
12800 + gfs_inval_pte(gl);
12801 + gfs_glock_drop_th(gl);
12805 + * inode_go_sync - Sync the dirty data for a inode glock
12812 +inode_go_sync(struct gfs_glock *gl, int flags)
12814 + int meta = (flags & DIO_METADATA);
12815 + int data = (flags & DIO_DATA);
12817 + if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
12818 + if (meta && data) {
12819 + gfs_sync_page(gl, flags | DIO_START);
12820 + gfs_log_flush_glock(gl);
12821 + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12822 + gfs_sync_page(gl, flags | DIO_WAIT | DIO_CHECK);
12823 + } else if (meta) {
12824 + gfs_log_flush_glock(gl);
12825 + gfs_sync_buf(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12827 + gfs_sync_page(gl, flags | DIO_START | DIO_WAIT | DIO_CHECK);
12830 + if (meta && data) {
12831 + if (!(flags & DIO_INVISIBLE))
12832 + clear_bit(GLF_DIRTY, &gl->gl_flags);
12833 + clear_bit(GLF_SYNC, &gl->gl_flags);
12838 + * inode_go_inval - prepare a inode glock to be released
12845 +inode_go_inval(struct gfs_glock *gl, int flags)
12847 + int meta = (flags & DIO_METADATA);
12848 + int data = (flags & DIO_DATA);
12851 + gfs_inval_buf(gl);
12855 + gfs_inval_page(gl);
12859 + * inode_go_demote_ok - check to see if it's ok to unlock a glock
12862 + * Returns: TRUE if it's ok
12866 +inode_go_demote_ok(struct gfs_glock *gl)
12868 + struct gfs_sbd *sdp = gl->gl_sbd;
12869 + int demote = FALSE;
12871 + if (!gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages)
12873 + else if (!sdp->sd_args.ar_localcaching &&
12874 + time_after_eq(jiffies, gl->gl_stamp + sdp->sd_tune.gt_demote_secs * HZ))
12881 + * inode_go_lock - operation done after an inode lock is locked by a process
12883 + * @flags: the flags passed into gfs_glock()
12885 + * Returns: 0 on success, -EXXX on failure
12889 +inode_go_lock(struct gfs_glock *gl, int flags)
12891 + struct gfs_inode *ip = gl2ip(gl);
12894 + if (ip && ip->i_vn != gl->gl_vn) {
12895 + error = gfs_copyin_dinode(ip);
12897 + gfs_inode_attr_in(ip);
12904 + * inode_go_unlock - operation done before an inode lock is unlocked by a process
12906 + * @flags: the flags passed into gfs_gunlock()
12911 +inode_go_unlock(struct gfs_glock *gl, int flags)
12913 + struct gfs_inode *ip = gl2ip(gl);
12915 + if (ip && test_bit(GLF_DIRTY, &gl->gl_flags))
12916 + gfs_inode_attr_in(ip);
12919 + gfs_flush_meta_cache(ip);
12923 + * rgrp_go_xmote_th - promote/demote a glock
12925 + * @state: the requested state
12926 + * @flags: the flags passed into gfs_glock()
12931 +rgrp_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
12933 + struct gfs_rgrpd *rgd = gl2rgd(gl);
12935 + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12937 + gfs_mhc_zap(rgd);
12938 + gfs_depend_sync(rgd);
12939 + gfs_glock_xmote_th(gl, state, flags);
12943 + * rgrp_go_drop_th - unlock a glock
12949 +rgrp_go_drop_th(struct gfs_glock *gl)
12951 + struct gfs_rgrpd *rgd = gl2rgd(gl);
12953 + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12955 + gfs_mhc_zap(rgd);
12956 + gfs_depend_sync(rgd);
12957 + gfs_glock_drop_th(gl);
12961 + * rgrp_go_demote_ok - check to see if it's ok to unlock a glock
12964 + * Returns: TRUE if it's ok
12968 +rgrp_go_demote_ok(struct gfs_glock *gl)
12970 + struct gfs_rgrpd *rgd = gl2rgd(gl);
12971 + int demote = TRUE;
12973 + if (gl->gl_aspace->i_mapping->nrpages)
12975 + else if (rgd && !list_empty(&rgd->rd_mhc)) /* Don't bother with lock here */
12982 + * rgrp_go_lock - operation done after an rgrp lock is locked by a process
12984 + * @flags: the flags passed into gfs_glock()
12986 + * Returns: 0 on success, -EXXX on failure
12990 +rgrp_go_lock(struct gfs_glock *gl, int flags)
12992 + struct gfs_rgrpd *rgd = gl2rgd(gl);
12995 + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
12997 + if (!(flags & GL_SKIP))
12998 + error = gfs_rgrp_read(rgd);
13004 + * rgrp_go_unlock - operation done before an rgrp lock is unlocked by a process
13006 + * @flags: the flags passed into gfs_gunlock()
13011 +rgrp_go_unlock(struct gfs_glock *gl, int flags)
13013 + struct gfs_rgrpd *rgd = gl2rgd(gl);
13015 + GFS_ASSERT_GLOCK(rgd && gl->gl_lvb, gl,);
13017 + if (!(flags & GL_SKIP)) {
13018 + gfs_rgrp_relse(rgd);
13019 + if (test_bit(GLF_DIRTY, &gl->gl_flags))
13020 + gfs_rgrp_lvb_fill(rgd);
13025 + * trans_go_xmote_th - promote/demote a metadata glock
13027 + * @state: the requested state
13028 + * @flags: the flags passed into gfs_glock()
13033 +trans_go_xmote_th(struct gfs_glock *gl, unsigned int state, int flags)
13035 + struct gfs_sbd *sdp = gl->gl_sbd;
13038 + if (gl->gl_state != LM_ST_UNLOCKED &&
13039 + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13040 + gfs_sync_meta(sdp);
13042 + error = gfs_log_shutdown(sdp);
13044 + gfs_io_error(sdp);
13047 + gfs_glock_xmote_th(gl, state, flags);
13051 + * trans_go_xmote_bh - promote/demote a metadata glock
13057 +trans_go_xmote_bh(struct gfs_glock *gl)
13059 + struct gfs_sbd *sdp = gl->gl_sbd;
13060 + struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl;
13061 + struct gfs_log_header head;
13064 + if (gl->gl_state != LM_ST_UNLOCKED &&
13065 + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13066 + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
13068 + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head);
13069 + GFS_ASSERT_SBD(!error, sdp,); /* FixMe!!! */
13070 + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
13072 + /* Initialize some head of the log stuff */
13073 + sdp->sd_sequence = head.lh_sequence;
13074 + sdp->sd_log_head = head.lh_first + 1;
13079 + * trans_go_drop_th - prepare the transaction glock to be released
13082 + * We want to sync the device even with localcaching. Remember
13083 + * that localcaching journal replay only marks buffers dirty.
13087 +trans_go_drop_th(struct gfs_glock *gl)
13089 + struct gfs_sbd *sdp = gl->gl_sbd;
13092 + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
13093 + gfs_sync_meta(sdp);
13095 + error = gfs_log_shutdown(sdp);
13097 + gfs_io_error(sdp);
13100 + gfs_glock_drop_th(gl);
13104 + * nondisk_go_demote_ok - check to see if it's ok to unlock a glock
13107 + * Returns: TRUE if it's ok
13111 +nondisk_go_demote_ok(struct gfs_glock *gl)
13117 + * quota_go_demote_ok - check to see if it's ok to unlock a glock
13120 + * Returns: TRUE if it's ok
13124 +quota_go_demote_ok(struct gfs_glock *gl)
13126 + return !atomic_read(&gl->gl_lvb_count);
13129 +struct gfs_glock_operations gfs_meta_glops = {
13130 + .go_xmote_th = gfs_glock_xmote_th,
13131 + .go_drop_th = gfs_glock_drop_th,
13132 + .go_sync = meta_go_sync,
13133 + .go_inval = meta_go_inval,
13134 + .go_demote_ok = meta_go_demote_ok,
13135 + .go_type = LM_TYPE_META
13138 +struct gfs_glock_operations gfs_inode_glops = {
13139 + .go_xmote_th = inode_go_xmote_th,
13140 + .go_xmote_bh = inode_go_xmote_bh,
13141 + .go_drop_th = inode_go_drop_th,
13142 + .go_sync = inode_go_sync,
13143 + .go_inval = inode_go_inval,
13144 + .go_demote_ok = inode_go_demote_ok,
13145 + .go_lock = inode_go_lock,
13146 + .go_unlock = inode_go_unlock,
13147 + .go_type = LM_TYPE_INODE
13150 +struct gfs_glock_operations gfs_rgrp_glops = {
13151 + .go_xmote_th = rgrp_go_xmote_th,
13152 + .go_drop_th = rgrp_go_drop_th,
13153 + .go_sync = meta_go_sync,
13154 + .go_inval = meta_go_inval,
13155 + .go_demote_ok = rgrp_go_demote_ok,
13156 + .go_lock = rgrp_go_lock,
13157 + .go_unlock = rgrp_go_unlock,
13158 + .go_type = LM_TYPE_RGRP
13161 +struct gfs_glock_operations gfs_trans_glops = {
13162 + .go_xmote_th = trans_go_xmote_th,
13163 + .go_xmote_bh = trans_go_xmote_bh,
13164 + .go_drop_th = trans_go_drop_th,
13165 + .go_type = LM_TYPE_NONDISK
13168 +struct gfs_glock_operations gfs_iopen_glops = {
13169 + .go_xmote_th = gfs_glock_xmote_th,
13170 + .go_drop_th = gfs_glock_drop_th,
13171 + .go_callback = gfs_iopen_go_callback,
13172 + .go_type = LM_TYPE_IOPEN
13175 +struct gfs_glock_operations gfs_flock_glops = {
13176 + .go_xmote_th = gfs_glock_xmote_th,
13177 + .go_drop_th = gfs_glock_drop_th,
13178 + .go_type = LM_TYPE_FLOCK
13181 +struct gfs_glock_operations gfs_nondisk_glops = {
13182 + .go_xmote_th = gfs_glock_xmote_th,
13183 + .go_drop_th = gfs_glock_drop_th,
13184 + .go_demote_ok = nondisk_go_demote_ok,
13185 + .go_type = LM_TYPE_NONDISK
13188 +struct gfs_glock_operations gfs_quota_glops = {
13189 + .go_xmote_th = gfs_glock_xmote_th,
13190 + .go_drop_th = gfs_glock_drop_th,
13191 + .go_demote_ok = quota_go_demote_ok,
13192 + .go_type = LM_TYPE_QUOTA
13194 diff -urN linux-orig/fs/gfs/glops.h linux-patched/fs/gfs/glops.h
13195 --- linux-orig/fs/gfs/glops.h 1969-12-31 18:00:00.000000000 -0600
13196 +++ linux-patched/fs/gfs/glops.h 2004-06-30 13:27:49.343711130 -0500
13198 +/******************************************************************************
13199 +*******************************************************************************
13201 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13202 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13204 +** This copyrighted material is made available to anyone wishing to use,
13205 +** modify, copy, or redistribute it subject to the terms and conditions
13206 +** of the GNU General Public License v.2.
13208 +*******************************************************************************
13209 +******************************************************************************/
13211 +#ifndef __GLOPS_DOT_H__
13212 +#define __GLOPS_DOT_H__
13214 +extern struct gfs_glock_operations gfs_meta_glops;
13215 +extern struct gfs_glock_operations gfs_inode_glops;
13216 +extern struct gfs_glock_operations gfs_rgrp_glops;
13217 +extern struct gfs_glock_operations gfs_trans_glops;
13218 +extern struct gfs_glock_operations gfs_iopen_glops;
13219 +extern struct gfs_glock_operations gfs_flock_glops;
13220 +extern struct gfs_glock_operations gfs_nondisk_glops;
13221 +extern struct gfs_glock_operations gfs_quota_glops;
13223 +#endif /* __GLOPS_DOT_H__ */
13224 diff -urN linux-orig/fs/gfs/incore.h linux-patched/fs/gfs/incore.h
13225 --- linux-orig/fs/gfs/incore.h 1969-12-31 18:00:00.000000000 -0600
13226 +++ linux-patched/fs/gfs/incore.h 2004-06-30 13:27:49.343711130 -0500
13228 +/******************************************************************************
13229 +*******************************************************************************
13231 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13232 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13234 +** This copyrighted material is made available to anyone wishing to use,
13235 +** modify, copy, or redistribute it subject to the terms and conditions
13236 +** of the GNU General Public License v.2.
13238 +*******************************************************************************
13239 +******************************************************************************/
13241 +#ifndef __INCORE_DOT_H__
13242 +#define __INCORE_DOT_H__
13244 +#define DIO_NEW (0x00000001)
13245 +#define DIO_FORCE (0x00000002)
13246 +#define DIO_CLEAN (0x00000004)
13247 +#define DIO_DIRTY (0x00000008)
13248 +#define DIO_START (0x00000010)
13249 +#define DIO_WAIT (0x00000020)
13250 +#define DIO_METADATA (0x00000040)
13251 +#define DIO_DATA (0x00000080)
13252 +#define DIO_INVISIBLE (0x00000100)
13253 +#define DIO_CHECK (0x00000200)
13254 +#define DIO_ALL (0x00000400)
13256 +/* Structure prototypes */
13258 +struct gfs_log_operations;
13259 +struct gfs_log_element;
13260 +struct gfs_meta_header_cache;
13261 +struct gfs_depend;
13262 +struct gfs_bitmap;
13264 +struct gfs_bufdata;
13265 +struct gfs_glock_operations;
13266 +struct gfs_holder;
13271 +struct gfs_unlinked;
13272 +struct gfs_quota_le;
13273 +struct gfs_quota_data;
13274 +struct gfs_log_buf;
13276 +struct gfs_gl_hash_bucket;
13279 +typedef void (*gfs_glop_bh_t) (struct gfs_glock * gl, unsigned int ret);
13282 + * Structure of operations that are associated with each
13283 + * type of element in the log.
13286 +struct gfs_log_operations {
13287 + /* Operations specific to a given log element */
13289 + void (*lo_add) (struct gfs_sbd * sdp, struct gfs_log_element * le);
13290 + void (*lo_trans_end) (struct gfs_sbd * sdp,
13291 + struct gfs_log_element * le);
13292 + void (*lo_print) (struct gfs_sbd * sdp, struct gfs_log_element * le,
13293 + unsigned int where);
13294 + struct gfs_trans *(*lo_overlap_trans) (struct gfs_sbd * sdp,
13295 + struct gfs_log_element * le);
13296 + void (*lo_incore_commit) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13297 + struct gfs_log_element * le);
13298 + void (*lo_add_to_ail) (struct gfs_sbd * sdp,
13299 + struct gfs_log_element * le);
13300 + void (*lo_clean_dump) (struct gfs_sbd * sdp,
13301 + struct gfs_log_element * le);
13303 + /* Operations specific to a class of log elements */
13305 + void (*lo_trans_size) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13306 + unsigned int *mblks, unsigned int *eblks,
13307 + unsigned int *blocks, unsigned int *bmem);
13308 + void (*lo_trans_combine) (struct gfs_sbd * sdp, struct gfs_trans * tr,
13309 + struct gfs_trans * new_tr);
13310 + void (*lo_build_bhlist) (struct gfs_sbd * sdp, struct gfs_trans * tr);
13311 + void (*lo_dump_size) (struct gfs_sbd * sdp, unsigned int *elements,
13312 + unsigned int *blocks, unsigned int *bmem);
13313 + void (*lo_build_dump) (struct gfs_sbd * sdp, struct gfs_trans * tr);
13315 + /* Operations that happen at recovery time */
13317 + void (*lo_before_scan) (struct gfs_sbd * sdp, unsigned int jid,
13318 + struct gfs_log_header * head,
13319 + unsigned int pass);
13320 + int (*lo_scan_elements) (struct gfs_sbd * sdp,
13321 + struct gfs_jindex * jdesc,
13322 + struct gfs_glock * gl, uint64_t start,
13323 + struct gfs_log_descriptor * desc,
13324 + unsigned int pass);
13325 + void (*lo_after_scan) (struct gfs_sbd * sdp, unsigned int jid,
13326 + unsigned int pass);
13332 + * Structure that gets added to struct gfs_trans->tr_elements. They
13333 + * make up the "stuff" in each transaction.
13336 +struct gfs_log_element {
13337 + struct gfs_log_operations *le_ops;
13339 + struct gfs_trans *le_trans;
13340 + struct list_head le_list;
13343 +struct gfs_meta_header_cache {
13344 + struct list_head mc_list_hash;
13345 + struct list_head mc_list_single;
13346 + struct list_head mc_list_rgd;
13348 + uint64_t mc_block;
13349 + struct gfs_meta_header mc_mh;
13352 +struct gfs_depend {
13353 + struct list_head gd_list_hash;
13354 + struct list_head gd_list_rgd;
13356 + struct gfs_rgrpd *gd_rgd;
13357 + uint64_t gd_formal_ino;
13358 + unsigned long gd_time;
13362 + * Structure containing information about the allocation bitmaps.
13363 + * There are one of these for each fs block that the bitmap for
13364 + * the resource group header covers.
13367 +struct gfs_bitmap {
13368 + uint32_t bi_offset; /* The offset in the buffer of the first byte */
13369 + uint32_t bi_start; /* The position of the first byte in this block */
13370 + uint32_t bi_len; /* The number of bytes in this block */
13374 + * Structure containing information Resource Groups
13377 +struct gfs_rgrpd {
13378 + struct list_head rd_list; /* Link with superblock */
13379 + struct list_head rd_list_mru;
13380 + struct list_head rd_recent; /* Recently used rgrps */
13382 + struct gfs_glock *rd_gl; /* Glock for rgrp */
13384 + unsigned long rd_flags;
13386 + struct gfs_rindex rd_ri; /* Resource Index structure */
13387 + struct gfs_rgrp rd_rg; /* Resource Group structure */
13388 + uint64_t rd_rg_vn;
13390 + struct gfs_bitmap *rd_bits;
13391 + struct buffer_head **rd_bh;
13393 + uint32_t rd_last_alloc_data;
13394 + uint32_t rd_last_alloc_meta;
13396 + struct list_head rd_mhc;
13397 + struct list_head rd_depend;
13399 + struct gfs_sbd *rd_sbd;
13403 + * Per-buffer data
13406 +struct gfs_bufdata {
13407 + struct buffer_head *bd_bh; /* struct buffer_head which this struct belongs to */
13408 + struct gfs_glock *bd_gl; /* Pointer to Glock struct for this bh */
13410 + struct gfs_log_element bd_new_le;
13411 + struct gfs_log_element bd_incore_le;
13414 + struct semaphore bd_lock;
13416 + unsigned int bd_pinned; /* Pin count */
13417 + struct list_head bd_ail_tr_list; /* List of buffers hanging off tr_ail_bufs */
13418 + struct list_head bd_ail_gl_list; /* List of buffers hanging off gl_ail_bufs */
13422 + * Glock operations
13425 +struct gfs_glock_operations {
13426 + void (*go_xmote_th) (struct gfs_glock * gl, unsigned int state,
13428 + void (*go_xmote_bh) (struct gfs_glock * gl);
13429 + void (*go_drop_th) (struct gfs_glock * gl);
13430 + void (*go_drop_bh) (struct gfs_glock * gl);
13431 + void (*go_sync) (struct gfs_glock * gl, int flags);
13432 + void (*go_inval) (struct gfs_glock * gl, int flags);
13433 + int (*go_demote_ok) (struct gfs_glock * gl);
13434 + int (*go_lock) (struct gfs_glock * gl, int flags);
13435 + void (*go_unlock) (struct gfs_glock * gl, int flags);
13436 + void (*go_callback) (struct gfs_glock * gl, unsigned int state);
13441 +#define HIF_MUTEX (0)
13442 +#define HIF_PROMOTE (1)
13443 +#define HIF_DEMOTE (2)
13446 +#define HIF_ALLOCED (3)
13447 +#define HIF_DEALLOC (4)
13448 +#define HIF_HOLDER (5)
13449 +#define HIF_FIRST (6)
13450 +#define HIF_WAKEUP (7)
13451 +#define HIF_RECURSE (8)
13453 +struct gfs_holder {
13454 + struct list_head gh_list;
13456 + struct gfs_glock *gh_gl;
13457 + struct task_struct *gh_owner;
13458 + unsigned int gh_state;
13462 + unsigned long gh_iflags;
13463 + struct completion gh_wait;
13467 + * Glock Structure
13470 +#define GLF_PLUG (0)
13471 +#define GLF_LOCK (1)
13472 +#define GLF_STICKY (2)
13473 +#define GLF_PREFETCH (3)
13474 +#define GLF_SYNC (4)
13475 +#define GLF_DIRTY (5)
13476 +#define GLF_LVB_INVALID (6)
13478 +struct gfs_glock {
13479 + struct list_head gl_list;
13480 + unsigned long gl_flags;
13481 + struct lm_lockname gl_name;
13482 + atomic_t gl_count;
13484 + spinlock_t gl_spin;
13486 + unsigned int gl_state;
13487 + struct list_head gl_holders;
13488 + struct list_head gl_waiters1; /* HIF_MUTEX */
13489 + struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_PROMOTE */
13491 + struct gfs_glock_operations *gl_ops;
13493 + struct gfs_holder *gl_req_gh;
13494 + gfs_glop_bh_t gl_req_bh;
13496 + lm_lock_t *gl_lock;
13498 + atomic_t gl_lvb_count;
13501 + unsigned long gl_stamp;
13504 + struct gfs_log_element gl_new_le;
13505 + struct gfs_log_element gl_incore_le;
13507 + struct gfs_gl_hash_bucket *gl_bucket;
13508 + struct list_head gl_reclaim;
13510 + struct gfs_sbd *gl_sbd;
13512 + struct inode *gl_aspace;
13513 + struct list_head gl_dirty_buffers;
13514 + struct list_head gl_ail_bufs;
13518 + * In-Place Reservation structure
13521 +struct gfs_alloc {
13522 + /* Quota stuff */
13524 + unsigned int al_qd_num;
13525 + struct gfs_quota_data *al_qd[4];
13526 + struct gfs_holder al_qd_ghs[4];
13528 + /* Filled in by the caller to gfs_inplace_reserve() */
13530 + uint32_t al_requested_di;
13531 + uint32_t al_requested_meta;
13532 + uint32_t al_requested_data;
13534 + /* Filled in by gfs_inplace_reserve() */
13537 + unsigned int al_line;
13538 + struct gfs_holder al_ri_gh;
13539 + struct gfs_holder al_rgd_gh;
13540 + struct gfs_rgrpd *al_rgd;
13541 + uint32_t al_reserved_meta;
13542 + uint32_t al_reserved_data;
13544 + /* Filled in by gfs_blkalloc() */
13546 + uint32_t al_alloced_di;
13547 + uint32_t al_alloced_meta;
13548 + uint32_t al_alloced_data;
13550 + /* Dinode allocation crap */
13552 + struct gfs_unlinked *al_ul;
13556 + * Incore inode structure
13559 +#define GIF_QD_LOCKED (0)
13560 +#define GIF_PAGED (1)
13561 +#define GIF_SW_PAGED (2)
13563 +struct gfs_inode {
13564 + struct gfs_inum i_num;
13566 + atomic_t i_count;
13567 + unsigned long i_flags;
13570 + struct gfs_dinode i_di;
13572 + struct gfs_glock *i_gl;
13573 + struct gfs_sbd *i_sbd;
13574 + struct inode *i_vnode;
13576 + struct gfs_holder i_iopen_gh;
13578 + struct gfs_alloc *i_alloc;
13579 + uint64_t i_last_rg_alloc;
13581 + struct task_struct *i_creat_task;
13582 + pid_t i_creat_pid;
13584 + spinlock_t i_lock;
13585 + struct buffer_head *i_cache[GFS_MAX_META_HEIGHT];
13589 + * GFS per-fd structure
13592 +#define GFF_DID_DIRECT_ALLOC (0)
13595 + unsigned long f_flags;
13597 + struct semaphore f_fl_lock;
13598 + struct gfs_holder f_fl_gh;
13600 + struct gfs_inode *f_inode;
13601 + struct file *f_vfile;
13605 + * Unlinked inode log entry
13608 +#define ULF_NEW_UL (0)
13609 +#define ULF_INCORE_UL (1)
13610 +#define ULF_IC_LIST (2)
13611 +#define ULF_OD_LIST (3)
13612 +#define ULF_LOCK (4)
13614 +struct gfs_unlinked {
13615 + struct list_head ul_list;
13616 + unsigned int ul_count;
13618 + struct gfs_inum ul_inum;
13619 + unsigned long ul_flags;
13621 + struct gfs_log_element ul_new_le;
13622 + struct gfs_log_element ul_incore_le;
13623 + struct gfs_log_element ul_ondisk_le;
13627 + * Quota log element
13630 +struct gfs_quota_le {
13631 + struct gfs_log_element ql_le;
13633 + struct gfs_quota_data *ql_data;
13634 + struct list_head ql_data_list;
13636 + int64_t ql_change;
13639 +#define QDF_USER (0)
13640 +#define QDF_OD_LIST (1)
13641 +#define QDF_LOCK (2)
13643 +struct gfs_quota_data {
13644 + struct list_head qd_list;
13645 + unsigned int qd_count;
13648 + unsigned long qd_flags;
13650 + struct list_head qd_le_list;
13652 + int64_t qd_change_new;
13653 + int64_t qd_change_ic;
13654 + int64_t qd_change_od;
13655 + int64_t qd_change_sync;
13657 + struct gfs_quota_le qd_ondisk_ql;
13658 + uint64_t qd_sync_gen;
13660 + struct gfs_glock *qd_gl;
13661 + struct gfs_quota_lvb qd_qb;
13663 + unsigned long qd_last_warn;
13666 +struct gfs_log_buf {
13667 + struct list_head lb_list;
13669 + struct buffer_head lb_bh;
13670 + struct buffer_head *lb_unlock;
13674 + * Transaction structures
13677 +#define TRF_LOG_DUMP (0x00000001)
13679 +struct gfs_trans {
13680 + struct list_head tr_list;
13682 + /* Initial creation stuff */
13685 + unsigned int tr_line;
13687 + unsigned int tr_mblks_asked; /* Number of log blocks asked to be reserved */
13688 + unsigned int tr_eblks_asked;
13689 + unsigned int tr_seg_reserved; /* Number of segments reserved */
13691 + struct gfs_holder *tr_t_gh;
13693 + /* Stuff filled in during creation */
13695 + unsigned int tr_flags;
13696 + struct list_head tr_elements;
13698 + /* Stuff modified during the commit */
13700 + unsigned int tr_num_free_bufs;
13701 + struct list_head tr_free_bufs;
13702 + unsigned int tr_num_free_bmem;
13703 + struct list_head tr_free_bmem;
13705 + uint64_t tr_log_head; /* The current log head */
13706 + uint64_t tr_first_head; /* First header block */
13708 + struct list_head tr_bufs; /* List of buffers going to the log */
13710 + /* Stuff that's part of the AIL */
13712 + struct list_head tr_ail_bufs;
13714 + /* Private data for different log element types */
13716 + unsigned int tr_num_gl;
13717 + unsigned int tr_num_buf;
13718 + unsigned int tr_num_iul;
13719 + unsigned int tr_num_ida;
13720 + unsigned int tr_num_q;
13724 + * One bucket of the glock hash table.
13727 +struct gfs_gl_hash_bucket {
13728 + rwlock_t hb_lock;
13729 + struct list_head hb_list;
13730 +} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
13733 + * Super Block Data Structure (One per filesystem)
13736 +#define SDF_JOURNAL_LIVE (0)
13737 +#define SDF_SCAND_RUN (1)
13738 +#define SDF_GLOCKD_RUN (2)
13739 +#define SDF_RECOVERD_RUN (3)
13740 +#define SDF_LOGD_RUN (4)
13741 +#define SDF_QUOTAD_RUN (5)
13742 +#define SDF_INODED_RUN (6)
13743 +#define SDF_NOATIME (7)
13744 +#define SDF_ROFS (8)
13745 +#define SDF_NEED_LOG_DUMP (9)
13746 +#define SDF_FOUND_UL_DUMP (10)
13747 +#define SDF_FOUND_Q_DUMP (11)
13748 +#define SDF_IN_LOG_DUMP (12)
13750 +#define GFS_GL_HASH_SHIFT (13)
13751 +#define GFS_GL_HASH_SIZE (1 << GFS_GL_HASH_SHIFT)
13752 +#define GFS_GL_HASH_MASK (GFS_GL_HASH_SIZE - 1)
13754 +#define GFS_MHC_HASH_SHIFT (10)
13755 +#define GFS_MHC_HASH_SIZE (1 << GFS_MHC_HASH_SHIFT)
13756 +#define GFS_MHC_HASH_MASK (GFS_MHC_HASH_SIZE - 1)
13758 +#define GFS_DEPEND_HASH_SHIFT (10)
13759 +#define GFS_DEPEND_HASH_SIZE (1 << GFS_DEPEND_HASH_SHIFT)
13760 +#define GFS_DEPEND_HASH_MASK (GFS_DEPEND_HASH_SIZE - 1)
13763 + struct gfs_sb sd_sb; /* Super Block */
13765 + struct super_block *sd_vfs; /* FS's device independent sb */
13767 + struct gfs_args sd_args;
13768 + unsigned long sd_flags;
13770 + struct gfs_tune sd_tune; /* FS tuning structure */
13772 + /* Resource group stuff */
13774 + struct gfs_inode *sd_riinode; /* rindex inode */
13775 + uint64_t sd_riinode_vn; /* Version number of the resource index inode */
13777 + struct list_head sd_rglist; /* List of resource groups */
13778 + struct semaphore sd_rindex_lock;
13780 + struct list_head sd_rg_mru_list; /* List of resource groups in MRU order */
13781 + spinlock_t sd_rg_mru_lock; /* Lock for MRU list */
13782 + struct list_head sd_rg_recent; /* Recently used rgrps */
13783 + spinlock_t sd_rg_recent_lock;
13784 + struct gfs_rgrpd *sd_rg_forward; /* Next new rgrp to try for allocation */
13785 + spinlock_t sd_rg_forward_lock;
13787 + unsigned int sd_rgcount; /* Count of resource groups */
13789 + /* Constants computed on mount */
13791 + uint32_t sd_fsb2bb;
13792 + uint32_t sd_fsb2bb_shift; /* Shift FS Block numbers to the left by
13793 + this to get buffer cache blocks */
13794 + uint32_t sd_diptrs; /* Number of pointers in a dinode */
13795 + uint32_t sd_inptrs; /* Number of pointers in a indirect block */
13796 + uint32_t sd_jbsize; /* Size of a journaled data block */
13797 + uint32_t sd_hash_bsize; /* sizeof(exhash block) */
13798 + uint32_t sd_hash_bsize_shift;
13799 + uint32_t sd_hash_ptrs; /* Number of points in a hash block */
13800 + uint32_t sd_max_dirres; /* Maximum space needed to add a directory entry */
13801 + uint32_t sd_max_height; /* Maximum height of a file's metadata tree */
13802 + uint64_t sd_heightsize[GFS_MAX_META_HEIGHT];
13803 + uint32_t sd_max_jheight; /* Maximum height of a journaled file's metadata tree */
13804 + uint64_t sd_jheightsize[GFS_MAX_META_HEIGHT];
13808 + struct gfs_gl_hash_bucket sd_gl_hash[GFS_GL_HASH_SIZE];
13810 + struct list_head sd_reclaim_list;
13811 + spinlock_t sd_reclaim_lock;
13812 + wait_queue_head_t sd_reclaim_wchan;
13813 + atomic_t sd_reclaim_count;
13815 + struct lm_lockstruct sd_lockstruct;
13817 + struct list_head sd_mhc[GFS_MHC_HASH_SIZE];
13818 + struct list_head sd_mhc_single;
13819 + spinlock_t sd_mhc_lock;
13820 + atomic_t sd_mhc_count;
13822 + struct list_head sd_depend[GFS_DEPEND_HASH_SIZE];
13823 + spinlock_t sd_depend_lock;
13824 + atomic_t sd_depend_count;
13826 + struct gfs_holder sd_live_gh;
13828 + struct gfs_holder sd_freeze_gh;
13829 + struct semaphore sd_freeze_lock;
13830 + unsigned int sd_freeze_count;
13832 + /* Inode Stuff */
13834 + struct gfs_inode *sd_rooti; /* FS's root inode */
13836 + struct gfs_glock *sd_rename_gl; /* rename glock */
13838 + /* Daemon stuff */
13840 + struct task_struct *sd_scand_process;
13841 + unsigned int sd_glockd_num;
13842 + struct task_struct *sd_recoverd_process;
13843 + struct task_struct *sd_logd_process;
13844 + struct task_struct *sd_quotad_process;
13845 + struct task_struct *sd_inoded_process;
13847 + struct semaphore sd_thread_lock;
13848 + struct completion sd_thread_completion;
13852 + struct gfs_glock *sd_trans_gl; /* transaction glock */
13854 + struct gfs_inode *sd_jiinode; /* jindex inode */
13855 + uint64_t sd_jiinode_vn; /* Version number of the journal index inode */
13857 + unsigned int sd_journals; /* Number of journals in the FS */
13858 + struct gfs_jindex *sd_jindex; /* Array of Jindex structures describing this FS's journals */
13859 + struct semaphore sd_jindex_lock;
13860 + unsigned long sd_jindex_refresh_time;
13862 + struct gfs_jindex sd_jdesc; /* Jindex structure describing this machine's journal */
13863 + struct gfs_holder sd_journal_gh; /* the glock for this machine's journal */
13865 + uint64_t sd_sequence; /* Assigned to xactions in order they commit */
13866 + uint64_t sd_log_head; /* Block number of next journal write */
13867 + uint64_t sd_log_wrap;
13869 + spinlock_t sd_log_seg_lock;
13870 + unsigned int sd_log_seg_free; /* Free segments in the log */
13871 + struct list_head sd_log_seg_list;
13872 + wait_queue_head_t sd_log_seg_wait;
13874 + struct list_head sd_log_ail; /* struct gfs_trans structures that form the Active Items List
13875 + "next" is the head, "prev" is the tail */
13877 + struct list_head sd_log_incore; /* transactions that have been commited incore (but not ondisk)
13878 + "next" is the newest, "prev" is the oldest */
13879 + unsigned int sd_log_buffers; /* Number of buffers in the incore log */
13881 + struct semaphore sd_log_lock; /* Lock for access to log values */
13883 + uint64_t sd_log_dump_last;
13884 + uint64_t sd_log_dump_last_wrap;
13886 + /* unlinked crap */
13888 + struct list_head sd_unlinked_list;
13889 + spinlock_t sd_unlinked_lock;
13891 + atomic_t sd_unlinked_ic_count;
13892 + atomic_t sd_unlinked_od_count;
13896 + struct list_head sd_quota_list;
13897 + spinlock_t sd_quota_lock;
13899 + atomic_t sd_quota_count;
13900 + atomic_t sd_quota_od_count;
13902 + struct gfs_inode *sd_qinode;
13904 + uint64_t sd_quota_sync_gen;
13905 + unsigned long sd_quota_sync_time;
13907 + /* license crap */
13909 + struct gfs_inode *sd_linode;
13911 + /* Recovery stuff */
13913 + struct list_head sd_dirty_j;
13914 + spinlock_t sd_dirty_j_lock;
13916 + unsigned int sd_recovery_replays;
13917 + unsigned int sd_recovery_skips;
13918 + unsigned int sd_recovery_sames;
13922 + atomic_t sd_glock_count;
13923 + atomic_t sd_glock_held_count;
13924 + atomic_t sd_inode_count;
13925 + atomic_t sd_bufdata_count;
13926 + atomic_t sd_fh2dentry_misses;
13927 + atomic_t sd_reclaimed;
13928 + atomic_t sd_glock_nq_calls;
13929 + atomic_t sd_glock_dq_calls;
13930 + atomic_t sd_glock_prefetch_calls;
13931 + atomic_t sd_lm_lock_calls;
13932 + atomic_t sd_lm_unlock_calls;
13933 + atomic_t sd_lm_callbacks;
13934 + atomic_t sd_ops_address;
13935 + atomic_t sd_ops_dentry;
13936 + atomic_t sd_ops_export;
13937 + atomic_t sd_ops_file;
13938 + atomic_t sd_ops_inode;
13939 + atomic_t sd_ops_super;
13940 + atomic_t sd_ops_vm;
13942 + char sd_fsname[256];
13944 + /* Debugging crud */
13946 + unsigned long sd_last_readdirplus;
13947 + unsigned long sd_last_unlocked_aop;
13949 + spinlock_t sd_ail_lock;
13950 + struct list_head sd_recovery_bufs;
13953 +#endif /* __INCORE_DOT_H__ */
13954 diff -urN linux-orig/fs/gfs/inode.c linux-patched/fs/gfs/inode.c
13955 --- linux-orig/fs/gfs/inode.c 1969-12-31 18:00:00.000000000 -0600
13956 +++ linux-patched/fs/gfs/inode.c 2004-06-30 13:27:49.343711130 -0500
13958 +/******************************************************************************
13959 +*******************************************************************************
13961 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13962 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13964 +** This copyrighted material is made available to anyone wishing to use,
13965 +** modify, copy, or redistribute it subject to the terms and conditions
13966 +** of the GNU General Public License v.2.
13968 +*******************************************************************************
13969 +******************************************************************************/
13971 +#include <linux/sched.h>
13972 +#include <linux/slab.h>
13973 +#include <linux/smp_lock.h>
13974 +#include <linux/spinlock.h>
13975 +#include <asm/semaphore.h>
13976 +#include <linux/completion.h>
13977 +#include <linux/buffer_head.h>
13978 +#include <linux/xattr_acl.h>
13985 +#include "eattr.h"
13986 +#include "glock.h"
13987 +#include "glops.h"
13988 +#include "inode.h"
13990 +#include "ops_address.h"
13991 +#include "ops_file.h"
13992 +#include "ops_inode.h"
13993 +#include "quota.h"
13995 +#include "trans.h"
13996 +#include "unlinked.h"
13999 + * inode_attr_in - Copy attributes from the dinode into the VFS inode
14000 + * @ip: The GFS inode
14005 +inode_attr_in(struct gfs_inode *ip, struct inode *ino)
14007 + unsigned int mode;
14009 + ino->i_ino = ip->i_num.no_formal_ino;
14011 + switch (ip->i_di.di_type) {
14012 + case GFS_FILE_REG:
14016 + case GFS_FILE_DIR:
14020 + case GFS_FILE_LNK:
14024 + case GFS_FILE_BLK:
14026 + ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
14028 + case GFS_FILE_CHR:
14030 + ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
14032 + case GFS_FILE_FIFO:
14036 + case GFS_FILE_SOCK:
14041 + GFS_ASSERT_INODE(FALSE, ip,
14042 + printk("type = %u\n", ip->i_di.di_type););
14046 + ino->i_mode = mode | (ip->i_di.di_mode & S_IALLUGO);
14047 + ino->i_nlink = ip->i_di.di_nlink;
14048 + ino->i_uid = ip->i_di.di_uid;
14049 + ino->i_gid = ip->i_di.di_gid;
14050 + i_size_write(ino, ip->i_di.di_size);
14051 + ino->i_atime.tv_sec = ip->i_di.di_atime;
14052 + ino->i_mtime.tv_sec = ip->i_di.di_mtime;
14053 + ino->i_ctime.tv_sec = ip->i_di.di_ctime;
14054 + ino->i_atime.tv_nsec = ino->i_mtime.tv_nsec = ino->i_ctime.tv_nsec = 0;
14055 + ino->i_blksize = PAGE_SIZE;
14056 + ino->i_blocks = ip->i_di.di_blocks <<
14057 + (ip->i_sbd->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT);
14058 + ino->i_generation = ip->i_di.di_header.mh_incarn;
14062 + * gfs_inode_attr_in - Copy attributes from the dinode into the VFS inode
14063 + * @ip: The GFS inode
14068 +gfs_inode_attr_in(struct gfs_inode *ip)
14070 + struct inode *inode;
14072 + inode = gfs_iget(ip, NO_CREATE);
14074 + inode_attr_in(ip, inode);
14081 + * gfs_inode_attr_out - Copy attributes from VFS inode into the dinode
14082 + * @ip: The GFS inode
14084 + * Only copy out the attributes that we want the VFS layer
14085 + * to be able to modify.
14089 +gfs_inode_attr_out(struct gfs_inode *ip)
14091 + struct inode *inode;
14093 + inode = gfs_iget(ip, NO_CREATE);
14095 + ip->i_di.di_mode = inode->i_mode & S_IALLUGO;
14096 + ip->i_di.di_uid = inode->i_uid;
14097 + ip->i_di.di_gid = inode->i_gid;
14098 + ip->i_di.di_atime = inode->i_atime.tv_sec;
14099 + ip->i_di.di_mtime = inode->i_mtime.tv_sec;
14100 + ip->i_di.di_ctime = inode->i_ctime.tv_sec;
14106 + * gfs_iget - Get/Create a struct inode for a struct gfs_inode
14107 + * @ip: the struct gfs_inode to get the struct inode for
14109 + * Returns: An inode
14113 +gfs_iget(struct gfs_inode *ip, int create)
14115 + struct inode *inode = NULL, *tmp;
14117 + spin_lock(&ip->i_lock);
14119 + inode = igrab(ip->i_vnode);
14120 + spin_unlock(&ip->i_lock);
14122 + if (inode || !create)
14125 + tmp = new_inode(ip->i_sbd->sd_vfs);
14129 + inode_attr_in(ip, tmp);
14131 + if (ip->i_di.di_type == GFS_FILE_REG) {
14132 + tmp->i_op = &gfs_file_iops;
14133 + tmp->i_fop = &gfs_file_fops;
14134 + tmp->i_mapping->a_ops = &gfs_file_aops;
14135 + } else if (ip->i_di.di_type == GFS_FILE_DIR) {
14136 + tmp->i_op = &gfs_dir_iops;
14137 + tmp->i_fop = &gfs_dir_fops;
14138 + } else if (ip->i_di.di_type == GFS_FILE_LNK) {
14139 + tmp->i_op = &gfs_symlink_iops;
14141 + tmp->i_op = &gfs_dev_iops;
14142 + init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
14145 + vn2ip(tmp) = NULL;
14148 + spin_lock(&ip->i_lock);
14149 + if (!ip->i_vnode)
14151 + inode = igrab(ip->i_vnode);
14152 + spin_unlock(&ip->i_lock);
14163 + gfs_inode_hold(ip);
14164 + ip->i_vnode = inode;
14165 + vn2ip(inode) = ip;
14167 + spin_unlock(&ip->i_lock);
14169 + insert_inode_hash(inode);
14175 + * gfs_copyin_dinode - Refresh the incore copy of the dinode
14176 + * @ip: The GFS inode
14178 + * Returns: 0 on success, -EXXX on failure
14182 +gfs_copyin_dinode(struct gfs_inode *ip)
14184 + struct buffer_head *dibh;
14187 + error = gfs_get_inode_buffer(ip, &dibh);
14191 + gfs_metatype_check(ip->i_sbd, dibh, GFS_METATYPE_DI);
14192 + gfs_dinode_in(&ip->i_di, dibh->b_data);
14196 + GFS_ASSERT_INODE(ip->i_num.no_formal_ino ==
14197 + ip->i_di.di_num.no_formal_ino, ip,
14198 + gfs_dinode_print(&ip->i_di););
14200 + /* Handle a moved inode */
14202 + if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
14203 + /* Not implemented yet */
14204 + GFS_ASSERT_INODE(FALSE, ip,);
14207 + ip->i_vn = ip->i_gl->gl_vn;
14213 + * inode_create - create a struct gfs_inode
14214 + * @i_gl: The glock covering the inode
14215 + * @inum: The inode number
14216 + * @io_gl: the iopen glock, or NULL
14217 + * @io_state: the state the iopen glock should be acquire in
14218 + * @ipp: pointer to put the returned inode in
14220 + * Returns: 0 on success, -EXXX on failure
14224 +inode_create(struct gfs_glock *i_gl, struct gfs_inum *inum,
14225 + struct gfs_glock *io_gl, unsigned int io_state,
14226 + struct gfs_inode **ipp)
14228 + struct gfs_sbd *sdp = i_gl->gl_sbd;
14229 + struct gfs_inode *ip;
14232 + RETRY_MALLOC(ip = kmem_cache_alloc(gfs_inode_cachep, GFP_KERNEL), ip);
14233 + memset(ip, 0, sizeof(struct gfs_inode));
14235 + ip->i_num = *inum;
14237 + atomic_set(&ip->i_count, 1);
14242 + spin_lock_init(&ip->i_lock);
14244 + error = gfs_glock_nq_init(io_gl,
14245 + io_state, GL_LOCAL_EXCL | GL_EXACT,
14246 + &ip->i_iopen_gh);
14250 + ip->i_iopen_gh.gh_owner = NULL;
14252 + spin_lock(&io_gl->gl_spin);
14253 + gfs_glock_hold(i_gl);
14254 + gl2gl(io_gl) = i_gl;
14255 + spin_unlock(&io_gl->gl_spin);
14257 + error = gfs_copyin_dinode(ip);
14261 + gfs_glock_hold(i_gl);
14262 + gl2ip(i_gl) = ip;
14264 + atomic_inc(&sdp->sd_inode_count);
14271 + spin_lock(&io_gl->gl_spin);
14272 + gl2gl(io_gl) = NULL;
14273 + gfs_glock_put(i_gl);
14274 + spin_unlock(&io_gl->gl_spin);
14276 + gfs_glock_dq_uninit(&ip->i_iopen_gh);
14279 + gfs_flush_meta_cache(ip);
14280 + kmem_cache_free(gfs_inode_cachep, ip);
14287 + * gfs_inode_get - Get an inode given its number
14288 + * @i_gl: The glock covering the inode
14289 + * @inum: The inode number
14290 + * @create: Flag to say if we are allowed to create a new struct gfs_inode
14291 + * @ipp: pointer to put the returned inode in
14293 + * Returns: 0 on success, -EXXX on failure
14297 +gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create,
14298 + struct gfs_inode **ipp)
14300 + struct gfs_glock *io_gl;
14303 + *ipp = gl2ip(i_gl);
14305 + atomic_inc(&(*ipp)->i_count);
14306 + GFS_ASSERT_INODE((*ipp)->i_num.no_formal_ino ==
14307 + inum->no_formal_ino,
14309 + } else if (create) {
14310 + error = gfs_glock_get(i_gl->gl_sbd,
14311 + inum->no_addr, &gfs_iopen_glops,
14314 + error = inode_create(i_gl, inum, io_gl,
14315 + LM_ST_SHARED, ipp);
14316 + gfs_glock_put(io_gl);
14324 + * gfs_inode_hold - hold a struct gfs_inode structure
14325 + * @ip: The GFS inode
14330 +gfs_inode_hold(struct gfs_inode *ip)
14332 + GFS_ASSERT_INODE(atomic_read(&ip->i_count), ip,);
14333 + atomic_inc(&ip->i_count);
14337 + * gfs_inode_put - put a struct gfs_inode structure
14338 + * @ip: The GFS inode
14343 +gfs_inode_put(struct gfs_inode *ip)
14345 + atomic_dec(&ip->i_count);
14346 + GFS_ASSERT_INODE(atomic_read(&ip->i_count) >= 0, ip,);
14350 + * gfs_inode_destroy - Destroy an inode structure with no references on it
14351 + * @ip: The GFS inode
14353 + * This function must be called with a glock held on the inode.
14358 +gfs_inode_destroy(struct gfs_inode *ip)
14360 + struct gfs_sbd *sdp = ip->i_sbd;
14361 + struct gfs_glock *io_gl = ip->i_iopen_gh.gh_gl;
14362 + struct gfs_glock *i_gl = ip->i_gl;
14364 + GFS_ASSERT_INODE(!atomic_read(&ip->i_count), ip,);
14365 + GFS_ASSERT_INODE(gl2gl(io_gl) == i_gl, ip,);
14367 + spin_lock(&io_gl->gl_spin);
14368 + gl2gl(io_gl) = NULL;
14369 + gfs_glock_put(i_gl);
14370 + spin_unlock(&io_gl->gl_spin);
14372 + gfs_glock_dq_uninit(&ip->i_iopen_gh);
14374 + gfs_flush_meta_cache(ip);
14375 + kmem_cache_free(gfs_inode_cachep, ip);
14377 + gl2ip(i_gl) = NULL;
14378 + gfs_glock_put(i_gl);
14380 + atomic_dec(&sdp->sd_inode_count);
14384 + * dinode_mark_unused -
14391 +dinode_mark_unused(struct gfs_inode *ip)
14393 + struct buffer_head *dibh;
14394 + struct gfs_dinode *di;
14400 + error = gfs_get_inode_buffer(ip, &dibh);
14404 + di = (struct gfs_dinode *)dibh->b_data;
14406 + gfs_trans_add_bh(ip->i_gl, dibh);
14408 + incarn = gfs32_to_cpu(di->di_header.mh_incarn) + 1;
14409 + di->di_header.mh_incarn = cpu_to_gfs32(incarn);
14411 + ctime = get_seconds();
14412 + di->di_ctime = cpu_to_gfs64(ctime);
14414 + flags = (gfs32_to_cpu(di->di_flags)) | GFS_DIF_UNUSED;
14415 + di->di_flags = cpu_to_gfs32(flags);
14423 + * dinode_dealloc - Put deallocate a dinode
14424 + * @ip: The GFS inode
14426 + * Returns: 0 on success, -EXXX on failure
14430 +dinode_dealloc(struct gfs_inode *ip)
14432 + struct gfs_sbd *sdp = ip->i_sbd;
14433 + struct gfs_rgrpd *rgd;
14434 + struct gfs_holder ri_gh, rgd_gh;
14437 + gfs_alloc_get(ip);
14439 + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
14443 + error = gfs_rindex_hold(sdp, &ri_gh);
14447 + rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr);
14448 + GFS_ASSERT_INODE(rgd, ip,);
14450 + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
14452 + goto fail_rindex_relse;
14454 + GFS_ASSERT_INODE(ip->i_di.di_blocks == 1, ip,
14455 + gfs_dinode_print(&ip->i_di););
14457 + /* Trans may require:
14458 + One block for the RG header.
14459 + One block for the dinode bit.
14460 + One block for the dinode.
14461 + We also need a block for the unlinked change.
14462 + One block for the quota change. */
14464 + error = gfs_trans_begin(sdp, 3, 2);
14466 + goto fail_rg_gunlock;
14468 + error = dinode_mark_unused(ip);
14470 + goto fail_end_trans;
14472 + gfs_difree(rgd, ip);
14474 + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, &ip->i_num);
14475 + clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
14477 + gfs_trans_end(sdp);
14479 + gfs_glock_dq_uninit(&rgd_gh);
14480 + gfs_glock_dq_uninit(&ri_gh);
14482 + gfs_quota_unhold_m(ip);
14483 + gfs_alloc_put(ip);
14488 + gfs_trans_end(sdp);
14491 + gfs_glock_dq_uninit(&rgd_gh);
14493 + fail_rindex_relse:
14494 + gfs_glock_dq_uninit(&ri_gh);
14497 + gfs_quota_unhold_m(ip);
14500 + gfs_alloc_put(ip);
14506 + * inode_dealloc - Deallocate an inode
14507 + * @sdp: the filesystem
14508 + * @inum: the inode number to deallocate
14509 + * @io_gh: a holder for the iopen glock for this inode
14511 + * Returns: 0 on success, -EXXX on failure
14515 +inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum,
14516 + struct gfs_holder *io_gh)
14518 + struct gfs_inode *ip;
14519 + struct gfs_holder i_gh;
14522 + error = gfs_glock_nq_num(sdp,
14523 + inum->no_formal_ino, &gfs_inode_glops,
14524 + LM_ST_EXCLUSIVE, 0, &i_gh);
14528 + /* We reacquire the iopen lock here to avoid a race with the NFS server
14529 + calling gfs_read_inode() with the inode number of a inode we're in the
14530 + process of deallocating. And we can't keep our hold on the lock
14531 + from try_dealloc_inode() for deadlock reasons. */
14533 + gfs_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
14534 + error = gfs_glock_nq(io_gh);
14538 + case GLR_TRYFAILED:
14542 + GFS_ASSERT_SBD(error < 0, sdp,);
14546 + GFS_ASSERT_GLOCK(!gl2ip(i_gh.gh_gl), i_gh.gh_gl,);
14547 + error = inode_create(i_gh.gh_gl, inum, io_gh->gh_gl, LM_ST_EXCLUSIVE,
14550 + gfs_glock_dq(io_gh);
14555 + GFS_ASSERT_INODE(!ip->i_di.di_nlink, ip,
14556 + gfs_dinode_print(&ip->i_di););
14557 + GFS_ASSERT_INODE(atomic_read(&ip->i_count) == 1, ip,);
14558 + GFS_ASSERT_INODE(!ip->i_vnode, ip,);
14560 + if (ip->i_di.di_type == GFS_FILE_DIR &&
14561 + (ip->i_di.di_flags & GFS_DIF_EXHASH)) {
14562 + error = gfs_dir_exhash_free(ip);
14567 + if (ip->i_di.di_eattr) {
14568 + error = gfs_ea_dealloc(ip);
14573 + error = gfs_shrink(ip, 0, NULL);
14577 + error = dinode_dealloc(ip);
14581 + gfs_inode_put(ip);
14582 + gfs_inode_destroy(ip);
14584 + gfs_glock_dq_uninit(&i_gh);
14589 + gfs_inode_put(ip);
14590 + gfs_inode_destroy(ip);
14593 + gfs_glock_dq_uninit(&i_gh);
14599 + * inode_dealloc_init - Try to deallocate an inode and all its blocks
14600 + * @sdp: the filesystem
14602 + * Returns: 0 on success, -errno on error, 1 on busy
14606 +inode_dealloc_init(struct gfs_sbd *sdp, struct gfs_inum *inum)
14608 + struct gfs_holder io_gh;
14611 + gfs_try_toss_inode(sdp, inum);
14613 + error = gfs_glock_nq_num(sdp,
14614 + inum->no_addr, &gfs_iopen_glops,
14615 + LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
14619 + case GLR_TRYFAILED:
14622 + GFS_ASSERT_SBD(error < 0, sdp,);
14626 + gfs_glock_dq(&io_gh);
14627 + error = inode_dealloc(sdp, inum, &io_gh);
14628 + gfs_holder_uninit(&io_gh);
14634 + * inode_dealloc_uninit - dealloc an uninitialized inode
14635 + * @sdp: the filesystem
14637 + * Returns: 0 on success, -errno on error, 1 on busy
14641 +inode_dealloc_uninit(struct gfs_sbd *sdp, struct gfs_inum *inum)
14643 + struct gfs_rgrpd *rgd;
14644 + struct gfs_holder ri_gh, rgd_gh;
14647 + error = gfs_rindex_hold(sdp, &ri_gh);
14651 + rgd = gfs_blk2rgrpd(sdp, inum->no_addr);
14652 + GFS_ASSERT_SBD(rgd, sdp,);
14654 + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
14658 + /* Trans may require:
14659 + One block for the RG header.
14660 + One block for the dinode bit.
14661 + We also need a block for the unlinked change. */
14663 + error = gfs_trans_begin(sdp, 2, 1);
14665 + goto fail_gunlock;
14667 + gfs_difree_uninit(rgd, inum->no_addr);
14668 + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, inum);
14670 + gfs_trans_end(sdp);
14672 + gfs_glock_dq_uninit(&rgd_gh);
14673 + gfs_glock_dq_uninit(&ri_gh);
14678 + gfs_glock_dq_uninit(&rgd_gh);
14681 + gfs_glock_dq_uninit(&ri_gh);
14687 + * gfs_inode_dealloc - Grab an unlinked inode off the list and try to free it.
14688 + * @sdp: the filesystem
14690 + * Returns: 0 on success, -errno on error, 1 on busy
14694 +gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum)
14696 + if (inum->no_formal_ino)
14697 + return inode_dealloc_init(sdp, inum);
14699 + return inode_dealloc_uninit(sdp, inum);
14703 + * gfs_change_nlink - Change nlink count on inode
14704 + * @ip: The GFS inode
14705 + * @diff: The change in the nlink count required
14707 + * Returns: 0 on success, -EXXXX on failure.
14711 +gfs_change_nlink(struct gfs_inode *ip, int diff)
14713 + struct buffer_head *dibh;
14717 + nlink = ip->i_di.di_nlink + diff;
14720 + GFS_ASSERT_INODE(nlink < ip->i_di.di_nlink, ip,
14721 + gfs_dinode_print(&ip->i_di););
14723 + error = gfs_get_inode_buffer(ip, &dibh);
14727 + ip->i_di.di_nlink = nlink;
14728 + ip->i_di.di_ctime = get_seconds();
14730 + gfs_trans_add_bh(ip->i_gl, dibh);
14731 + gfs_dinode_out(&ip->i_di, dibh->b_data);
14738 + * gfs_lookupi - Look up a filename in a directory and return its inode
14739 + * @d_gh: An initialized holder for the directory glock
14740 + * @name: The name of the inode to look for
14741 + * @is_root: If TRUE, ignore the caller's permissions
14742 + * @i_gh: An uninitialized holder for the new inode glock
14744 + * Returns: 0 on success, -EXXXX on failure
14748 +gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name,
14749 + int is_root, struct gfs_holder *i_gh)
14751 + struct gfs_inode *dip = gl2ip(d_gh->gh_gl);
14752 + struct gfs_sbd *sdp = dip->i_sbd;
14753 + struct gfs_glock *gl;
14754 + struct gfs_inode *ip;
14755 + struct gfs_inum inum, inum2;
14756 + unsigned int type;
14759 + i_gh->gh_gl = NULL;
14761 + if (!name->len || name->len > GFS_FNAMESIZE)
14762 + return -ENAMETOOLONG;
14764 + if (gfs_filecmp(name, ".", 1)) {
14765 + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14766 + error = gfs_glock_nq(d_gh);
14768 + error = gfs_glock_nq_init(dip->i_gl,
14771 + GFS_ASSERT_INODE(!error, ip,);
14772 + gfs_inode_hold(dip);
14778 + if (gfs_glock_is_locked_by_me(d_gh->gh_gl))
14779 + bitch_about(sdp, &sdp->sd_last_readdirplus,
14780 + "readdirplus-type behavior");
14782 + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14783 + error = gfs_glock_nq(d_gh);
14788 + struct inode *dir = gfs_iget(dip, NO_CREATE);
14790 + error = permission(dir, MAY_EXEC, NULL);
14793 + gfs_glock_dq(d_gh);
14799 + error = gfs_dir_search(dip, name, &inum, &type);
14801 + gfs_glock_dq(d_gh);
14802 + if (error == -ENOENT)
14808 + error = gfs_glock_get(sdp, inum.no_formal_ino, &gfs_inode_glops,
14811 + gfs_glock_dq(d_gh);
14815 + /* Acquire the second lock */
14817 + if (gl->gl_name.ln_number < dip->i_gl->gl_name.ln_number) {
14818 + gfs_glock_dq(d_gh);
14820 + error = gfs_glock_nq_init(gl, LM_ST_SHARED,
14821 + LM_FLAG_ANY | GL_LOCAL_EXCL,
14826 + gfs_holder_reinit(LM_ST_SHARED, 0, d_gh);
14827 + error = gfs_glock_nq(d_gh);
14829 + gfs_glock_dq_uninit(i_gh);
14834 + struct inode *dir = gfs_iget(dip, NO_CREATE);
14836 + error = permission(dir, MAY_EXEC, NULL);
14839 + gfs_glock_dq(d_gh);
14840 + gfs_glock_dq_uninit(i_gh);
14846 + error = gfs_dir_search(dip, name, &inum2, &type);
14848 + gfs_glock_dq(d_gh);
14849 + gfs_glock_dq_uninit(i_gh);
14850 + if (error == -ENOENT)
14855 + if (!gfs_inum_equal(&inum, &inum2)) {
14856 + gfs_glock_dq_uninit(i_gh);
14857 + gfs_glock_put(gl);
14862 + error = gfs_glock_nq_init(gl, LM_ST_SHARED,
14863 + LM_FLAG_ANY | GL_LOCAL_EXCL,
14866 + gfs_glock_dq(d_gh);
14871 + error = gfs_inode_get(gl, &inum, CREATE, &ip);
14873 + gfs_glock_dq(d_gh);
14874 + gfs_glock_dq_uninit(i_gh);
14876 + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
14879 + gfs_glock_put(gl);
14894 +create_ok(struct gfs_inode *dip, struct qstr *name, unsigned int type)
14899 + struct inode *dir = gfs_iget(dip, NO_CREATE);
14901 + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
14908 + /* Don't create entries in an unlinked directory */
14910 + if (!dip->i_di.di_nlink)
14913 + error = gfs_dir_search(dip, name, NULL, NULL);
14924 + if (dip->i_di.di_entries == (uint32_t)-1)
14926 + if (type == GFS_FILE_DIR && dip->i_di.di_nlink == (uint32_t)-1)
14941 +dinode_alloc(struct gfs_inode *dip, struct gfs_unlinked **ul)
14943 + struct gfs_sbd *sdp = dip->i_sbd;
14944 + struct gfs_alloc *al;
14945 + struct gfs_inum inum;
14948 + al = gfs_alloc_get(dip);
14950 + al->al_requested_di = 1;
14952 + error = gfs_inplace_reserve(dip);
14956 + error = gfs_trans_begin(sdp, al->al_rgd->rd_ri.ri_length, 1);
14958 + goto out_inplace;
14960 + inum.no_formal_ino = 0;
14961 + error = gfs_dialloc(dip, &inum.no_addr);
14963 + goto out_end_trans;
14965 + *ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &inum);
14966 + gfs_unlinked_lock(sdp, *ul);
14968 + gfs_trans_add_gl(dip->i_gl);
14971 + gfs_trans_end(sdp);
14974 + gfs_inplace_release(dip);
14977 + gfs_alloc_put(dip);
14983 + * pick_formal_ino - Pick a formal inode number for a given inode
14984 + * @sdp: the filesystem
14985 + * @inum: the inode number structure
14990 +pick_formal_ino(struct gfs_sbd *sdp, struct gfs_inum *inum)
14992 + /* This won't always be true */
14993 + inum->no_formal_ino = inum->no_addr;
14997 + * make_dinode - Fill in a new dinode structure
14998 + * @dip: the directory this inode is being created in
14999 + * @gl: The glock covering the new inode
15000 + * @inum: the inode number
15001 + * @type: the file type
15002 + * @mode: the file permissions
15009 +make_dinode(struct gfs_inode *dip,
15010 + struct gfs_glock *gl, struct gfs_inum *inum,
15011 + unsigned int type, unsigned int mode,
15012 + unsigned int uid, unsigned int gid)
15014 + struct gfs_sbd *sdp = dip->i_sbd;
15015 + struct gfs_dinode di;
15016 + struct buffer_head *dibh;
15017 + struct gfs_rgrpd *rgd;
15020 + error = gfs_dread(sdp, inum->no_addr, gl,
15021 + DIO_NEW | DIO_START | DIO_WAIT,
15026 + gfs_trans_add_bh(gl, dibh);
15027 + gfs_metatype_set(sdp, dibh, GFS_METATYPE_DI, GFS_FORMAT_DI);
15028 + gfs_buffer_clear_tail(dibh, sizeof(struct gfs_dinode));
15030 + memset(&di, 0, sizeof(struct gfs_dinode));
15032 + gfs_meta_header_in(&di.di_header, dibh->b_data);
15034 + di.di_num = *inum;
15036 + di.di_mode = mode & S_IALLUGO;
15040 + di.di_blocks = 1;
15041 + di.di_atime = di.di_mtime = di.di_ctime = get_seconds();
15043 + rgd = gfs_blk2rgrpd(sdp, inum->no_addr);
15044 + GFS_ASSERT_SBD(rgd, sdp,
15045 + printk("block = %"PRIu64"\n", inum->no_addr););
15047 + di.di_rgrp = rgd->rd_ri.ri_addr;
15048 + di.di_goal_rgrp = di.di_rgrp;
15049 + di.di_goal_dblk = di.di_goal_mblk = inum->no_addr - rgd->rd_ri.ri_data1;
15051 + if (type == GFS_FILE_REG) {
15052 + if ((dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA) ||
15053 + sdp->sd_tune.gt_new_files_jdata)
15054 + di.di_flags |= GFS_DIF_JDATA;
15055 + if ((dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO) ||
15056 + sdp->sd_tune.gt_new_files_directio)
15057 + di.di_flags |= GFS_DIF_DIRECTIO;
15058 + } else if (type == GFS_FILE_DIR) {
15059 + di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_DIRECTIO);
15060 + di.di_flags |= (dip->i_di.di_flags & GFS_DIF_INHERIT_JDATA);
15063 + di.di_type = type;
15065 + gfs_dinode_out(&di, dibh->b_data);
15072 + * inode_init_and_link -
15084 +inode_init_and_link(struct gfs_inode *dip, struct qstr *name,
15085 + struct gfs_inum *inum, struct gfs_glock *gl,
15086 + unsigned int type, unsigned int mode)
15088 + struct gfs_sbd *sdp = dip->i_sbd;
15089 + struct posix_acl *acl = NULL;
15090 + struct gfs_alloc *al;
15091 + struct gfs_inode *ip;
15092 + unsigned int gid;
15093 + int alloc_required;
15096 + error = gfs_setup_new_acl(dip, type, &mode, &acl);
15100 + if (dip->i_di.di_mode & S_ISGID) {
15101 + if (type == GFS_FILE_DIR)
15103 + gid = dip->i_di.di_gid;
15106 + gid = current->fsgid;
15108 + al = gfs_alloc_get(dip);
15110 + error = gfs_quota_lock_m(dip,
15116 + error = gfs_quota_check(dip, current->fsuid, gid);
15118 + goto fail_gunlock_q;
15121 + alloc_required = TRUE;
15123 + error = gfs_diradd_alloc_required(dip, name, &alloc_required);
15125 + goto fail_gunlock_q;
15128 + if (alloc_required) {
15129 + error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid);
15131 + goto fail_gunlock_q;
15133 + al->al_requested_meta = sdp->sd_max_dirres + GFS_MAX_EA_ACL_BLKS;
15135 + error = gfs_inplace_reserve(dip);
15137 + goto fail_gunlock_q;
15139 + /* Trans may require:
15140 + blocks for two dinodes, the directory blocks necessary for
15141 + a new entry, RG bitmap blocks for an allocation,
15142 + and one block for a quota change and
15143 + one block for an unlinked tag. */
15145 + error = gfs_trans_begin(sdp,
15146 + 2 + sdp->sd_max_dirres +
15147 + al->al_rgd->rd_ri.ri_length +
15148 + GFS_MAX_EA_ACL_BLKS, 2);
15150 + goto fail_inplace;
15152 + /* Trans may require:
15153 + blocks for two dinodes, a leaf block,
15154 + and one block for a quota change and
15155 + one block for an unlinked tag. */
15157 + error = gfs_trans_begin(sdp, 3, 2);
15159 + goto fail_gunlock_q;
15162 + error = gfs_dir_add(dip, name, inum, type);
15164 + goto fail_end_trans;
15166 + error = make_dinode(dip, gl, inum, type, mode, current->fsuid, gid);
15168 + goto fail_end_trans;
15170 + al->al_ul = gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA,
15171 + &(struct gfs_inum){0, inum->no_addr});
15172 + gfs_trans_add_quota(sdp, +1, current->fsuid, gid);
15174 + /* Gfs_inode_get() can't fail here. But then again, it shouldn't be
15175 + here (it should be in gfs_createi()). Gfs_init_acl() has no
15176 + business needing a memory-resident inode. */
15178 + gfs_inode_get(gl, inum, CREATE, &ip);
15181 + error = gfs_init_acl(dip, ip, type, acl);
15182 + GFS_ASSERT(!error, ); /* Sigh. */
15188 + gfs_trans_end(sdp);
15191 + if (alloc_required)
15192 + gfs_inplace_release(dip);
15195 + gfs_quota_unlock_m(dip);
15198 + gfs_alloc_put(dip);
15200 + posix_acl_release(acl);
15206 + * gfs_createi - Create a new inode
15207 + * @d_gh: An initialized holder for the directory glock
15208 + * @name: The name of the new file
15209 + * @type: The type of dinode (GFS_FILE_REG, GFS_FILE_DIR, GFS_FILE_LNK, ...)
15210 + * @mode: the permissions on the new inode
15211 + * @i_gh: An uninitialized holder for the new inode glock
15213 + * If the return value is 0, the glocks on both the directory and the new
15214 + * file are held. A transaction has been started and an inplace reservation
15215 + * is held, as well.
15217 + * Returns: 0 on success, -EXXXX on failure
15221 +gfs_createi(struct gfs_holder *d_gh, struct qstr *name,
15222 + unsigned int type, unsigned int mode,
15223 + struct gfs_holder *i_gh)
15225 + struct gfs_inode *dip = gl2ip(d_gh->gh_gl);
15226 + struct gfs_sbd *sdp = dip->i_sbd;
15227 + struct gfs_unlinked *ul;
15228 + struct gfs_inum inum;
15229 + struct gfs_holder io_gh;
15232 + if (!name->len || name->len > GFS_FNAMESIZE)
15233 + return -ENAMETOOLONG;
15235 + gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh);
15236 + error = gfs_glock_nq(d_gh);
15240 + error = create_ok(dip, name, type);
15244 + error = dinode_alloc(dip, &ul);
15248 + inum.no_addr = ul->ul_inum.no_addr;
15249 + pick_formal_ino(sdp, &inum);
15251 + if (inum.no_formal_ino < dip->i_num.no_formal_ino) {
15252 + gfs_glock_dq(d_gh);
15254 + error = gfs_glock_nq_num(sdp,
15255 + inum.no_formal_ino, &gfs_inode_glops,
15256 + LM_ST_EXCLUSIVE, GL_SKIP, i_gh);
15258 + gfs_unlinked_unlock(sdp, ul);
15262 + gfs_holder_reinit(LM_ST_EXCLUSIVE, 0, d_gh);
15263 + error = gfs_glock_nq(d_gh);
15265 + gfs_glock_dq_uninit(i_gh);
15266 + gfs_unlinked_unlock(sdp, ul);
15270 + error = create_ok(dip, name, type);
15272 + goto fail_gunlock_i;
15274 + error = gfs_glock_nq_num(sdp,
15275 + inum.no_formal_ino, &gfs_inode_glops,
15276 + LM_ST_EXCLUSIVE, GL_SKIP, i_gh);
15281 + error = gfs_glock_nq_num(sdp,
15282 + inum.no_addr, &gfs_iopen_glops,
15283 + LM_ST_SHARED, GL_LOCAL_EXCL | GL_EXACT,
15286 + goto fail_gunlock_i;
15288 + error = inode_init_and_link(dip, name, &inum, i_gh->gh_gl, type, mode);
15290 + goto fail_gunlock_io;
15292 + gfs_glock_dq_uninit(&io_gh);
15297 + gfs_glock_dq_uninit(&io_gh);
15300 + gfs_glock_dq_uninit(i_gh);
15303 + gfs_unlinked_unlock(sdp, ul);
15306 + gfs_glock_dq(d_gh);
15312 + * gfs_unlinki - Unlink a file
15313 + * @dip: The inode of the directory
15314 + * @name: The name of the file to be unlinked
15315 + * @ip: The inode of the file to be removed
15317 + * Assumes Glocks on both dip and ip are held.
15319 + * Returns: 0 on success, -EXXXX on failure
15323 +gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15325 + struct gfs_sbd *sdp = dip->i_sbd;
15328 + error = gfs_dir_del(dip, name);
15332 + error = gfs_change_nlink(ip, -1);
15336 + /* If this inode is being unlinked from the directory structure,
15337 + we need to mark that in the log so that it isn't lost during
15340 + if (!ip->i_di.di_nlink) {
15341 + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num);
15342 + set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
15349 + * gfs_rmdiri - Remove a directory
15350 + * @dip: The parent directory of the directory to be removed
15351 + * @name: The name of the directory to be removed
15352 + * @ip: The GFS inode of the directory to be removed
15354 + * Assumes Glocks on dip and ip are held
15356 + * Returns: 0 on success, -EXXXX on failure
15360 +gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15362 + struct gfs_sbd *sdp = dip->i_sbd;
15363 + struct qstr dotname;
15366 + GFS_ASSERT_INODE(ip->i_di.di_entries == 2, ip,
15367 + gfs_dinode_print(&ip->i_di););
15369 + error = gfs_dir_del(dip, name);
15373 + error = gfs_change_nlink(dip, -1);
15378 + dotname.name = ".";
15379 + error = gfs_dir_del(ip, &dotname);
15384 + dotname.name = "..";
15385 + error = gfs_dir_del(ip, &dotname);
15389 + error = gfs_change_nlink(ip, -2);
15393 + /* This inode is being unlinked from the directory structure and
15394 + we need to mark that in the log so that it isn't lost during
15397 + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IUL, &ip->i_num);
15398 + set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
15404 + * gfs_revalidate - check to see that a inode is still in a directory
15405 + * @dip: the directory
15406 + * @name: the name of the file
15409 + * Assumes that the lock on (at least) @dip is held.
15411 + * Returns: 0 if the parent/child relationship is correct, -ENOENT if it isn't
15415 +gfs_revalidate(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
15417 + struct gfs_inum inum;
15418 + unsigned int type;
15421 + error = gfs_dir_search(dip, name, &inum, &type);
15423 + if (inum.no_formal_ino == ip->i_num.no_formal_ino)
15424 + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
15433 + * gfs_ok_to_move - check if it's ok to move a directory to another directory
15434 + * @this: move this
15437 + * Follow @to back to the root and make sure we don't encounter @this
15438 + * Assumes we already hold the rename lock.
15440 + * Returns: 0 if it's ok to move, -EXXX if it isn't
15444 +gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to)
15446 + struct gfs_sbd *sdp = this->i_sbd;
15447 + struct gfs_inode *tmp;
15448 + struct gfs_holder to_gh, tmp_gh;
15449 + struct qstr dotdot;
15452 + memset(&dotdot, 0, sizeof (struct qstr));
15453 + dotdot.name = "..";
15456 + gfs_inode_hold(to);
15459 + if (to == this) {
15463 + if (to == sdp->sd_rooti) {
15468 + gfs_holder_init(to->i_gl, 0, 0, &to_gh);
15470 + error = gfs_lookupi(&to_gh, &dotdot, TRUE, &tmp_gh);
15472 + gfs_holder_uninit(&to_gh);
15475 + if (!tmp_gh.gh_gl) {
15476 + gfs_holder_uninit(&to_gh);
15481 + tmp = gl2ip(tmp_gh.gh_gl);
15483 + gfs_glock_dq_uninit(&to_gh);
15484 + gfs_glock_dq_uninit(&tmp_gh);
15486 + gfs_inode_put(to);
15490 + gfs_inode_put(to);
15496 + * gfs_readlinki - return the contents of a symlink
15497 + * @ip: the symlink's inode
15498 + * @buf: a pointer to the buffer to be filled
15499 + * @len: a pointer to the length of @buf
15501 + * If @buf is too small, a piece of memory is gmalloc()ed and needs
15502 + * to be freed by the caller.
15504 + * Returns: 0 on success, -EXXX on failure
15508 +gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len)
15510 + struct gfs_holder i_gh;
15511 + struct buffer_head *dibh;
15515 + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
15516 + error = gfs_glock_nq_atime(&i_gh);
15518 + gfs_holder_uninit(&i_gh);
15522 + GFS_ASSERT_INODE(ip->i_di.di_size, ip,);
15524 + error = gfs_get_inode_buffer(ip, &dibh);
15528 + x = ip->i_di.di_size + 1;
15530 + *buf = gmalloc(x);
15532 + memcpy(*buf, dibh->b_data + sizeof(struct gfs_dinode), x);
15538 + gfs_glock_dq_uninit(&i_gh);
15544 + * gfs_glock_nq_atime - Acquire the glock and conditionally update the atime on an inode
15545 + * @gh: the holder to acquire
15547 + * Tests atime for gfs_read, gfs_readdir and gfs_test_mmap
15548 + * Update if the difference between the current time and the current atime
15549 + * is greater than a interval specfied at mount.
15551 + * Returns: 0 on success, -EXXX on error
15555 +gfs_glock_nq_atime(struct gfs_holder *gh)
15557 + struct gfs_glock *gl = gh->gh_gl;
15558 + struct gfs_sbd *sdp = gl->gl_sbd;
15559 + struct gfs_inode *ip;
15560 + int64_t curtime, quantum = sdp->sd_tune.gt_atime_quantum;
15561 + unsigned int state;
15565 + GFS_ASSERT_GLOCK(gh->gh_flags & GL_ATIME, gl,);
15566 + GFS_ASSERT_GLOCK(!(gh->gh_flags & GL_ASYNC), gl,);
15567 + GFS_ASSERT_GLOCK(gl->gl_ops == &gfs_inode_glops, gl,);
15570 + GFS_ASSERT_GLOCK(ip, gl,);
15572 + state = gh->gh_state;
15573 + flags = gh->gh_flags;
15575 + error = gfs_glock_nq(gh);
15579 + if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
15580 + test_bit(SDF_ROFS, &sdp->sd_flags))
15583 + curtime = get_seconds();
15584 + if (curtime - ip->i_di.di_atime >= quantum) {
15585 + int was_exclusive = (gl->gl_state == LM_ST_EXCLUSIVE);
15587 + gfs_glock_dq(gh);
15588 + gfs_holder_reinit(LM_ST_EXCLUSIVE,
15589 + gh->gh_flags & ~LM_FLAG_ANY,
15591 + error = gfs_glock_nq(gh);
15595 + /* Verify this hasn't been updated while we were
15596 + trying to get exclusive lock. */
15598 + curtime = get_seconds();
15599 + if (curtime - ip->i_di.di_atime >= quantum) {
15600 + struct buffer_head *dibh;
15602 + error = gfs_trans_begin(sdp, 1, 0);
15603 + if (error == -EROFS)
15608 + error = gfs_get_inode_buffer(ip, &dibh);
15610 + goto fail_end_trans;
15612 + ip->i_di.di_atime = curtime;
15614 + gfs_trans_add_bh(ip->i_gl, dibh);
15615 + gfs_dinode_out(&ip->i_di, dibh->b_data);
15618 + gfs_trans_end(sdp);
15621 + if (!was_exclusive) {
15622 + gfs_glock_dq(gh);
15623 + flags &= ~LM_FLAG_ANY;
15624 + flags |= GL_EXACT;
15625 + gfs_holder_reinit(state, flags, gh);
15626 + error = gfs_glock_nq(gh);
15634 + gfs_trans_end(sdp);
15637 + gfs_glock_dq(gh);
15643 + * glock_compare_atime - Compare two struct gfs_glock structures for sorting
15644 + * @arg_a: the first structure
15645 + * @arg_b: the second structure
15650 +glock_compare_atime(const void *arg_a, const void *arg_b)
15652 + struct gfs_holder *gh_a = *(struct gfs_holder **)arg_a;
15653 + struct gfs_holder *gh_b = *(struct gfs_holder **)arg_b;
15654 + struct lm_lockname *a = &gh_a->gh_gl->gl_name;
15655 + struct lm_lockname *b = &gh_b->gh_gl->gl_name;
15658 + if (a->ln_number > b->ln_number)
15660 + else if (a->ln_number < b->ln_number)
15663 + if (gh_a->gh_state == LM_ST_SHARED &&
15664 + gh_b->gh_state == LM_ST_EXCLUSIVE)
15666 + else if (gh_a->gh_state == LM_ST_SHARED &&
15667 + (gh_b->gh_flags & GL_ATIME))
15675 + * gfs_glock_nq_m_atime - acquire multiple glocks where one may need an atime update
15676 + * @num_gh: the number of structures
15677 + * @ghs: an array of struct gfs_holder structures
15679 + * Returns: 0 on success (all glocks acquired), -EXXX on failure (no glocks acquired)
15683 +gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs)
15685 + struct gfs_holder *p[num_gh];
15689 + GFS_ASSERT(num_gh,);
15691 + if (num_gh == 1) {
15692 + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
15693 + if (ghs->gh_flags & GL_ATIME)
15694 + error = gfs_glock_nq_atime(ghs);
15696 + error = gfs_glock_nq(ghs);
15700 + for (x = 0; x < num_gh; x++)
15703 + gfs_sort(p, num_gh, sizeof(struct gfs_holder *), glock_compare_atime);
15705 + for (x = 0; x < num_gh; x++) {
15706 + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
15708 + if (p[x]->gh_flags & GL_ATIME)
15709 + error = gfs_glock_nq_atime(p[x]);
15711 + error = gfs_glock_nq(p[x]);
15715 + gfs_glock_dq(p[x]);
15724 + * gfs_try_toss_vnode - See if we can toss a vnode from memory
15727 + * Returns: TRUE if the vnode was tossed
15731 +gfs_try_toss_vnode(struct gfs_inode *ip)
15733 + struct inode *inode;
15735 + inode = gfs_iget(ip, NO_CREATE);
15739 + d_prune_aliases(inode);
15741 + if (ip->i_di.di_type == GFS_FILE_DIR) {
15742 + struct list_head *head = &inode->i_dentry;
15743 + struct dentry *d = NULL;
15745 + spin_lock(&dcache_lock);
15746 + if (list_empty(head))
15747 + spin_unlock(&dcache_lock);
15749 + d = list_entry(head->next, struct dentry, d_alias);
15751 + spin_unlock(&dcache_lock);
15753 + if (have_submounts(d))
15756 + shrink_dcache_parent(d);
15758 + d_prune_aliases(inode);
15763 + inode->i_nlink = 0;
15768 + * iah_make_jdata -
15775 +iah_make_jdata(struct gfs_glock *gl, struct gfs_inum *inum)
15777 + struct buffer_head *bh;
15778 + struct gfs_dinode *di;
15782 + error = gfs_dread(gl->gl_sbd, inum->no_addr, gl, DIO_START | DIO_WAIT, &bh);
15783 + GFS_ASSERT_GLOCK(!error, gl,); /* Already pinned */
15785 + di = (struct gfs_dinode *)bh->b_data;
15787 + flags = di->di_flags;
15788 + flags = gfs32_to_cpu(flags) | GFS_DIF_JDATA;
15789 + di->di_flags = cpu_to_gfs32(flags);
15795 + * iah_super_update -
15802 +iah_super_update(struct gfs_sbd *sdp)
15804 + struct gfs_glock *gl;
15805 + struct buffer_head *bh;
15808 + error = gfs_glock_get(sdp,
15809 + GFS_SB_LOCK, &gfs_meta_glops,
15811 + GFS_ASSERT_SBD(!error && gl, sdp,); /* This should already be held. */
15813 + error = gfs_dread(sdp,
15814 + GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, gl,
15815 + DIO_START | DIO_WAIT, &bh);
15817 + gfs_trans_add_bh(gl, bh);
15818 + gfs_sb_out(&sdp->sd_sb, bh->b_data);
15822 + gfs_glock_put(gl);
15828 + * inode_alloc_hidden -
15836 +inode_alloc_hidden(struct gfs_sbd *sdp, struct gfs_inum *inum)
15838 + struct gfs_inode *dip = sdp->sd_rooti;
15839 + struct gfs_holder d_gh, i_gh;
15840 + struct gfs_unlinked *ul;
15843 + error = gfs_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
15847 + error = dinode_alloc(dip, &ul);
15851 + inum->no_addr = ul->ul_inum.no_addr;
15852 + pick_formal_ino(sdp, inum);
15854 + /* Don't worry about deadlock ordering here. We're the first
15855 + mounter and still under the mount lock (i.e. there is no
15858 + error = gfs_glock_nq_num(sdp,
15859 + inum->no_formal_ino, &gfs_inode_glops,
15860 + LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
15864 + gfs_alloc_get(dip);
15866 + error = gfs_quota_hold_m(dip, 0, 0);
15870 + /* Trans may require:
15871 + The new inode, the superblock,
15872 + and one block for a quota change and
15873 + one block for an unlinked tag. */
15875 + error = gfs_trans_begin(sdp, 2, 2);
15877 + goto fail_unhold;
15879 + error = make_dinode(dip, i_gh.gh_gl, inum, GFS_FILE_REG, 0600, 0, 0);
15881 + goto fail_end_trans;
15883 + iah_make_jdata(i_gh.gh_gl, inum);
15885 + error = iah_super_update(sdp);
15887 + goto fail_end_trans;
15889 + gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA,
15890 + &(struct gfs_inum){0, inum->no_addr});
15891 + gfs_trans_add_quota(sdp, +1, 0, 0);
15892 + gfs_trans_add_gl(dip->i_gl);
15894 + gfs_trans_end(sdp);
15895 + gfs_quota_unhold_m(dip);
15896 + gfs_alloc_put(dip);
15898 + gfs_glock_dq_uninit(&i_gh);
15899 + gfs_glock_dq_uninit(&d_gh);
15901 + gfs_unlinked_unlock(sdp, ul);
15903 + gfs_log_flush(sdp);
15908 + gfs_trans_end(sdp);
15911 + gfs_quota_unhold_m(dip);
15914 + gfs_alloc_put(dip);
15915 + gfs_glock_dq_uninit(&i_gh);
15918 + gfs_unlinked_unlock(sdp, ul);
15921 + gfs_glock_dq_uninit(&d_gh);
15927 + * gfs_alloc_qinode - allocate a quota inode
15928 + * @sdp: The GFS superblock
15930 + * Returns: 0 on success, error code otherwise
15934 +gfs_alloc_qinode(struct gfs_sbd *sdp)
15936 + return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_quota_di);
15940 + * gfs_alloc_linode - allocate a license inode
15941 + * @sdp: The GFS superblock
15943 + * Returns: 0 on success, error code otherwise
15947 +gfs_alloc_linode(struct gfs_sbd *sdp)
15949 + return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_license_di);
15951 diff -urN linux-orig/fs/gfs/inode.h linux-patched/fs/gfs/inode.h
15952 --- linux-orig/fs/gfs/inode.h 1969-12-31 18:00:00.000000000 -0600
15953 +++ linux-patched/fs/gfs/inode.h 2004-06-30 13:27:49.344710898 -0500
15955 +/******************************************************************************
15956 +*******************************************************************************
15958 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
15959 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
15961 +** This copyrighted material is made available to anyone wishing to use,
15962 +** modify, copy, or redistribute it subject to the terms and conditions
15963 +** of the GNU General Public License v.2.
15965 +*******************************************************************************
15966 +******************************************************************************/
15968 +#ifndef __INODE_DOT_H__
15969 +#define __INODE_DOT_H__
15971 +void gfs_inode_attr_in(struct gfs_inode *ip);
15972 +void gfs_inode_attr_out(struct gfs_inode *ip);
15973 +struct inode *gfs_iget(struct gfs_inode *ip, int create);
15975 +int gfs_copyin_dinode(struct gfs_inode *ip);
15977 +int gfs_inode_get(struct gfs_glock *i_gl, struct gfs_inum *inum, int create,
15978 + struct gfs_inode **ipp);
15979 +void gfs_inode_hold(struct gfs_inode *ip);
15980 +void gfs_inode_put(struct gfs_inode *ip);
15981 +void gfs_inode_destroy(struct gfs_inode *ip);
15983 +int gfs_inode_dealloc(struct gfs_sbd *sdp, struct gfs_inum *inum);
15985 +int gfs_change_nlink(struct gfs_inode *ip, int diff);
15986 +int gfs_lookupi(struct gfs_holder *d_gh, struct qstr *name,
15987 + int is_root, struct gfs_holder *i_gh);
15988 +int gfs_createi(struct gfs_holder *d_gh, struct qstr *name,
15989 + unsigned int type, unsigned int mode,
15990 + struct gfs_holder *i_gh);
15991 +int gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
15992 +int gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
15993 +int gfs_revalidate(struct gfs_inode *dip, struct qstr *name,
15994 + struct gfs_inode *ip);
15995 +int gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to);
15996 +int gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len);
15998 +int gfs_glock_nq_atime(struct gfs_holder *gh);
15999 +int gfs_glock_nq_m_atime(unsigned int num_gh, struct gfs_holder *ghs);
16001 +void gfs_try_toss_vnode(struct gfs_inode *ip);
16003 +/* Backwards compatibility functions */
16005 +int gfs_alloc_qinode(struct gfs_sbd *sdp);
16006 +int gfs_alloc_linode(struct gfs_sbd *sdp);
16010 +static __inline__ int
16011 +gfs_is_stuffed(struct gfs_inode *ip)
16013 + return !ip->i_di.di_height;
16016 +static __inline__ int
16017 +gfs_is_jdata(struct gfs_inode *ip)
16019 + return ip->i_di.di_flags & GFS_DIF_JDATA;
16022 +#endif /* __INODE_DOT_H__ */
16023 diff -urN linux-orig/fs/gfs/ioctl.c linux-patched/fs/gfs/ioctl.c
16024 --- linux-orig/fs/gfs/ioctl.c 1969-12-31 18:00:00.000000000 -0600
16025 +++ linux-patched/fs/gfs/ioctl.c 2004-06-30 13:27:49.345710666 -0500
16027 +/******************************************************************************
16028 +*******************************************************************************
16030 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
16031 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
16033 +** This copyrighted material is made available to anyone wishing to use,
16034 +** modify, copy, or redistribute it subject to the terms and conditions
16035 +** of the GNU General Public License v.2.
16037 +*******************************************************************************
16038 +******************************************************************************/
16040 +#include <linux/sched.h>
16041 +#include <linux/slab.h>
16042 +#include <linux/smp_lock.h>
16043 +#include <linux/spinlock.h>
16044 +#include <asm/semaphore.h>
16045 +#include <linux/completion.h>
16046 +#include <linux/buffer_head.h>
16047 +#include <asm/uaccess.h>
16053 +#include "eattr.h"
16055 +#include "glock.h"
16056 +#include "glops.h"
16057 +#include "inode.h"
16058 +#include "ioctl.h"
16059 +#include "quota.h"
16061 +#include "super.h"
16062 +#include "trans.h"
16065 + * gfs_add_bh_to_ub - copy a buffer up to user space
16066 + * @ub: the structure representing where to copy
16067 + * @bh: the buffer
16069 + * Returns: 0 on success, -EXXX on failure
16073 +gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh)
16075 + uint64_t blkno = bh->b_blocknr;
16077 + if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size)
16080 + if (copy_to_user(ub->ub_data + ub->ub_count,
16082 + sizeof(uint64_t)))
16084 + ub->ub_count += sizeof(uint64_t);
16086 + if (copy_to_user(ub->ub_data + ub->ub_count,
16090 + ub->ub_count += bh->b_size;
16096 + * get_meta - Read out all the metadata for a file
16099 + * Returns: 0 on success, -EXXX on failure
16103 +get_meta(struct gfs_inode *ip, void *arg)
16105 + struct gfs_holder i_gh;
16106 + struct gfs_user_buffer ub;
16109 + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16113 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16117 + error = gfs_get_file_meta(ip, &ub);
16121 + if (ip->i_di.di_type == GFS_FILE_DIR &&
16122 + (ip->i_di.di_flags & GFS_DIF_EXHASH)) {
16123 + error = gfs_get_dir_meta(ip, &ub);
16128 + if (ip->i_di.di_eattr) {
16129 + error = gfs_get_eattr_meta(ip, &ub);
16134 + if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16138 + gfs_glock_dq_uninit(&i_gh);
16144 + * file_stat - return the struct gfs_dinode of a file to user space
16146 + * @arg: where to copy to
16148 + * Returns: 0 on success, -EXXX on failure
16152 +file_stat(struct gfs_inode *ip, void *arg)
16154 + struct gfs_holder i_gh;
16155 + struct gfs_dinode di;
16158 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16162 + memcpy(&di, &ip->i_di, sizeof(struct gfs_dinode));
16164 + gfs_glock_dq_uninit(&i_gh);
16166 + if (copy_to_user(arg, &di, sizeof(struct gfs_dinode)))
16173 + * do_get_super - Dump the superblock into a buffer
16174 + * @sb: The superblock
16175 + * @ptr: The buffer pointer
16177 + * Returns: 0 or error code
16181 +do_get_super(struct gfs_sbd *sdp, void *arg)
16183 + struct gfs_sb *sb;
16184 + struct gfs_holder sb_gh;
16185 + struct buffer_head *bh;
16188 + sb = gmalloc(sizeof(struct gfs_sb));
16190 + error = gfs_glock_nq_num(sdp,
16191 + GFS_SB_LOCK, &gfs_meta_glops,
16192 + LM_ST_SHARED, 0, &sb_gh);
16196 + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gh.gh_gl,
16197 + DIO_START | DIO_WAIT, &bh);
16199 + gfs_glock_dq_uninit(&sb_gh);
16203 + gfs_sb_in(sb, bh->b_data);
16206 + gfs_glock_dq_uninit(&sb_gh);
16208 + if (copy_to_user(arg, sb, sizeof(struct gfs_sb)))
16218 + * jt2ip - convert the file type in a jio struct to the right hidden ip
16219 + * @sdp: the filesystem
16220 + * @jt: the gfs_jio_structure
16222 + * Returns: The inode structure for the correct hidden file
16225 +static struct gfs_inode *
16226 +jt2ip(struct gfs_sbd *sdp, struct gfs_jio *jt)
16228 + struct gfs_inode *ip = NULL;
16230 + switch (jt->jio_file) {
16231 + case GFS_HIDDEN_JINDEX:
16232 + ip = sdp->sd_jiinode;
16235 + case GFS_HIDDEN_RINDEX:
16236 + ip = sdp->sd_riinode;
16239 + case GFS_HIDDEN_QUOTA:
16240 + ip = sdp->sd_qinode;
16243 + case GFS_HIDDEN_LICENSE:
16244 + ip = sdp->sd_linode;
16252 + * jread_ioctl - Read from a journaled data file via ioctl
16253 + * @sdp: the filesystem
16254 + * @arg: The argument from ioctl
16256 + * Returns: Amount of data copied or error
16260 +jread_ioctl(struct gfs_sbd *sdp, void *arg)
16262 + struct gfs_jio jt;
16263 + struct gfs_inode *ip;
16264 + struct gfs_holder i_gh;
16267 + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16270 + ip = jt2ip(sdp, &jt);
16274 + GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,);
16276 + if (!access_ok(VERIFY_WRITE, jt.jio_data, jt.jio_size))
16279 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
16283 + error = gfs_readi(ip, jt.jio_data, jt.jio_offset, jt.jio_size,
16286 + gfs_glock_dq_uninit(&i_gh);
16290 + jt.jio_count = error;
16292 + if (copy_to_user(arg, &jt, sizeof(struct gfs_jio)))
16299 + * jwrite_ioctl - Write to a journaled file via ioctl
16300 + * @sdp: the filesystem
16301 + * @arg: The argument from ioctl
16303 + * Returns: Amount of data copied or error
16307 +jwrite_ioctl(struct gfs_sbd *sdp, void *arg)
16309 + struct gfs_jio jt;
16310 + struct gfs_inode *ip;
16311 + struct gfs_alloc *al = NULL;
16312 + struct gfs_holder i_gh;
16313 + unsigned int data_blocks, ind_blocks;
16314 + int alloc_required;
16317 + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16320 + ip = jt2ip(sdp, &jt);
16324 + GFS_ASSERT_INODE(gfs_is_jdata(ip), ip,);
16326 + if (!access_ok(VERIFY_READ, jt.jio_data, jt.jio_size))
16329 + gfs_write_calc_reserv(ip, jt.jio_size, &data_blocks, &ind_blocks);
16331 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
16332 + LM_FLAG_PRIORITY | GL_SYNC, &i_gh);
16336 + error = gfs_write_alloc_required(ip, jt.jio_offset, jt.jio_size,
16337 + &alloc_required);
16341 + if (alloc_required) {
16342 + al = gfs_alloc_get(ip);
16344 + error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE,
16345 + NO_QUOTA_CHANGE);
16349 + al->al_requested_meta = ind_blocks + data_blocks;
16351 + error = gfs_inplace_reserve(ip);
16355 + /* Trans may require:
16356 + All blocks for a RG bitmap, all the "data" blocks, whatever
16357 + indirect blocks we need, a modified dinode, and a quota change */
16359 + error = gfs_trans_begin(sdp,
16360 + 1 + al->al_rgd->rd_ri.ri_length +
16361 + ind_blocks + data_blocks, 1);
16365 + /* Trans may require:
16366 + All the "data" blocks and a modified dinode. */
16368 + error = gfs_trans_begin(sdp, 1 + data_blocks, 0);
16373 + error = gfs_writei(ip, jt.jio_data, jt.jio_offset, jt.jio_size,
16374 + gfs_copy_from_user);
16375 + if (error >= 0) {
16376 + jt.jio_count = error;
16380 + gfs_trans_end(sdp);
16383 + if (alloc_required) {
16384 + GFS_ASSERT_INODE(error || al->al_alloced_meta, ip,);
16385 + gfs_inplace_release(ip);
16389 + if (alloc_required)
16390 + gfs_quota_unhold_m(ip);
16393 + if (alloc_required)
16394 + gfs_alloc_put(ip);
16397 + ip->i_gl->gl_vn++;
16398 + gfs_glock_dq_uninit(&i_gh);
16400 + if (!error && copy_to_user(arg, &jt, sizeof(struct gfs_jio)))
16407 + * jstat_ioctl - Stat to a journaled file via ioctl
16408 + * @sdp: the filesystem
16409 + * @arg: The argument from ioctl
16411 + * Returns: 0 on success, -EXXX on failure
16415 +jstat_ioctl(struct gfs_sbd *sdp, void *arg)
16417 + struct gfs_jio jt;
16418 + struct gfs_inode *ip;
16419 + struct gfs_holder i_gh;
16422 + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16425 + ip = jt2ip(sdp, &jt);
16429 + if (jt.jio_size < sizeof(struct gfs_dinode))
16432 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
16436 + error = copy_to_user(jt.jio_data, &ip->i_di, sizeof(struct gfs_dinode));
16438 + gfs_glock_dq_uninit(&i_gh);
16447 + * jtrunc_ioctl - Truncate to a journaled file via ioctl
16448 + * @sdp: the filesystem
16449 + * @arg: The argument from ioctl
16451 + * Returns: 0 on success, -EXXX on failure
16455 +jtrunc_ioctl(struct gfs_sbd *sdp, void *arg)
16457 + struct gfs_jio jt;
16458 + struct gfs_inode *ip;
16459 + struct gfs_holder i_gh;
16462 + if (copy_from_user(&jt, arg, sizeof(struct gfs_jio)))
16465 + ip = jt2ip(sdp, &jt);
16469 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SYNC, &i_gh);
16473 + error = gfs_truncatei(ip, jt.jio_offset, NULL);
16475 + ip->i_gl->gl_vn++;
16476 + gfs_glock_dq_uninit(&i_gh);
16482 + * lock_dump - copy out info about the GFS' lock space
16483 + * @sdp: the filesystem
16484 + * @arg: a pointer to a struct gfs_user_buffer in user space
16486 + * Returns: 0 on success, -EXXX on failure
16490 +lock_dump(struct gfs_sbd *sdp, void *arg)
16492 + struct gfs_user_buffer ub;
16495 + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16499 + error = gfs_dump_lockstate(sdp, &ub);
16503 + if (copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16510 + * stat_gfs_ioctl - Do a GFS specific statfs
16511 + * @sdp: the filesystem
16512 + * @arg: the struct gfs_usage structure
16514 + * Returns: 0 on success, -EXXX on failure
16518 +stat_gfs_ioctl(struct gfs_sbd *sdp, void *arg)
16520 + struct gfs_usage *u;
16523 + u = gmalloc(sizeof(struct gfs_usage));
16525 + error = gfs_stat_gfs(sdp, u, TRUE);
16526 + if (!error && copy_to_user(arg, u, sizeof(struct gfs_usage)))
16535 + * reclaim_ioctl - ioctl called to perform metadata reclaimation
16536 + * @sdp: the filesystem
16537 + * @arg: a pointer to a struct gfs_reclaim_stats in user space
16539 + * Returns: 0 on success, -EXXX on failure
16543 +reclaim_ioctl(struct gfs_sbd *sdp, void *arg)
16545 + struct gfs_reclaim_stats stats;
16548 + memset(&stats, 0, sizeof(struct gfs_reclaim_stats));
16550 + error = gfs_reclaim_metadata(sdp, &stats);
16554 + if (copy_to_user(arg, &stats, sizeof(struct gfs_reclaim_stats)))
16561 + * get_tune - pass the current tuneable parameters up to user space
16562 + * @sdp: the filesystem
16563 + * @arg: a pointer to a struct gfs_tune in user space
16565 + * Returns: 0 on success, -EXXX on failure
16569 +get_tune(struct gfs_sbd *sdp, void *arg)
16571 + if (copy_to_user(arg, &sdp->sd_tune, sizeof(struct gfs_tune)))
16578 + * set_tune - replace the current tuneable parameters with a set from user space
16579 + * @sdp: the filesystem
16580 + * @arg: a pointer to a struct gfs_tune in user space
16582 + * Returns: 0 on success, -EXXX on failure
16586 +set_tune(struct gfs_sbd *sdp, void *arg)
16588 + struct gfs_tune *gt;
16591 + gt = gmalloc(sizeof(struct gfs_tune));
16593 + if (copy_from_user(gt, arg, sizeof(struct gfs_tune)))
16596 + if (gt->gt_tune_version != GFS_TUNE_VERSION) {
16597 + printk("GFS: fsid=%s: invalid version of tuneable parameters\n",
16601 + memcpy(&sdp->sd_tune, gt, sizeof(struct gfs_tune));
16610 + * gfs_set_flag - set/clear a flag on an inode
16612 + * @cmd: GFS_SET_FLAG or GFS_CLEAR_FLAG
16613 + * @arg: the flag to change (in user space)
16615 + * Returns: 0 on success, -EXXX on failure
16619 +gfs_set_flag(struct gfs_inode *ip, unsigned int cmd, void *arg)
16621 + struct gfs_sbd *sdp = ip->i_sbd;
16622 + struct gfs_holder i_gh;
16623 + struct buffer_head *dibh;
16627 + if (copy_from_user(&flag, arg, sizeof(uint32_t)))
16630 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
16635 + if (ip->i_di.di_uid != current->fsuid && !capable(CAP_FOWNER))
16641 + case GFS_DIF_EXHASH:
16642 + case GFS_DIF_UNUSED:
16643 + case GFS_DIF_EA_INDIRECT:
16646 + case GFS_DIF_JDATA:
16647 + if (ip->i_di.di_type != GFS_FILE_REG || ip->i_di.di_size)
16651 + case GFS_DIF_DIRECTIO:
16652 + if (ip->i_di.di_type != GFS_FILE_REG)
16656 + case GFS_DIF_IMMUTABLE:
16657 + case GFS_DIF_APPENDONLY:
16658 + case GFS_DIF_NOATIME:
16659 + case GFS_DIF_SYNC:
16664 + case GFS_DIF_INHERIT_DIRECTIO:
16665 + case GFS_DIF_INHERIT_JDATA:
16666 + if (ip->i_di.di_type != GFS_FILE_DIR)
16674 + error = gfs_trans_begin(sdp, 1, 0);
16678 + error = gfs_get_inode_buffer(ip, &dibh);
16680 + goto out_trans_end;
16682 + if (cmd == GFS_SET_FLAG)
16683 + ip->i_di.di_flags |= flag;
16685 + ip->i_di.di_flags &= ~flag;
16687 + gfs_trans_add_bh(ip->i_gl, dibh);
16688 + gfs_dinode_out(&ip->i_di, dibh->b_data);
16693 + gfs_trans_end(sdp);
16696 + gfs_glock_dq_uninit(&i_gh);
16702 + * handle_roll - Read a atomic_t as an unsigned int
16705 + * if @a is negative, reset it to zero
16707 + * Returns: the value of the counter
16710 +static unsigned int
16711 +handle_roll(atomic_t *a)
16713 + int x = atomic_read(a);
16715 + atomic_set(a, 0);
16718 + return (unsigned int)x;
16722 + * fill_counters - Write a FS' counters into a buffer
16723 + * @sdp: the filesystem
16724 + * @buf: the buffer
16725 + * @size: the size of the buffer
16726 + * @count: where we are in the buffer
16732 +fill_counters(struct gfs_sbd *sdp,
16733 + char *buf, unsigned int size, unsigned int *count)
16737 + gfs_sprintf("sd_glock_count:locks::%d\n",
16738 + atomic_read(&sdp->sd_glock_count));
16739 + gfs_sprintf("sd_glock_held_count:locks held::%d\n",
16740 + atomic_read(&sdp->sd_glock_held_count));
16741 + gfs_sprintf("sd_inode_count:incore inodes::%d\n",
16742 + atomic_read(&sdp->sd_inode_count));
16743 + gfs_sprintf("sd_bufdata_count:metadata buffers::%d\n",
16744 + atomic_read(&sdp->sd_bufdata_count));
16745 + gfs_sprintf("sd_unlinked_ic_count:unlinked inodes::%d\n",
16746 + atomic_read(&sdp->sd_unlinked_ic_count));
16747 + gfs_sprintf("sd_quota_count:quota IDs::%d\n",
16748 + atomic_read(&sdp->sd_quota_count));
16749 + gfs_sprintf("sd_log_buffers:incore log buffers::%u\n",
16750 + sdp->sd_log_buffers);
16751 + gfs_sprintf("sd_log_seg_free:log segments free::%u\n",
16752 + sdp->sd_log_seg_free);
16753 + gfs_sprintf("ji_nsegment:log segments total::%u\n",
16754 + sdp->sd_jdesc.ji_nsegment);
16755 + gfs_sprintf("sd_mhc_count:meta header cache entries::%d\n",
16756 + atomic_read(&sdp->sd_mhc_count));
16757 + gfs_sprintf("sd_depend_count:glock dependencies::%d\n",
16758 + atomic_read(&sdp->sd_depend_count));
16759 + gfs_sprintf("sd_reclaim_count:glocks on reclaim list::%d\n",
16760 + atomic_read(&sdp->sd_reclaim_count));
16761 + gfs_sprintf("sd_log_wrap:log wraps::%"PRIu64"\n",
16762 + sdp->sd_log_wrap);
16763 + gfs_sprintf("sd_fh2dentry_misses:fh2dentry misses:diff:%u\n",
16764 + handle_roll(&sdp->sd_fh2dentry_misses));
16765 + gfs_sprintf("sd_reclaimed:glocks reclaimed:diff:%u\n",
16766 + handle_roll(&sdp->sd_reclaimed));
16767 + gfs_sprintf("sd_glock_nq_calls:glock nq calls:diff:%u\n",
16768 + handle_roll(&sdp->sd_glock_nq_calls));
16769 + gfs_sprintf("sd_glock_dq_calls:glock dq calls:diff:%u\n",
16770 + handle_roll(&sdp->sd_glock_dq_calls));
16771 + gfs_sprintf("sd_glock_prefetch_calls:glock prefetch calls:diff:%u\n",
16772 + handle_roll(&sdp->sd_glock_prefetch_calls));
16773 + gfs_sprintf("sd_lm_lock_calls:lm_lock calls:diff:%u\n",
16774 + handle_roll(&sdp->sd_lm_lock_calls));
16775 + gfs_sprintf("sd_lm_unlock_calls:lm_unlock calls:diff:%u\n",
16776 + handle_roll(&sdp->sd_lm_unlock_calls));
16777 + gfs_sprintf("sd_lm_callbacks:lm callbacks:diff:%u\n",
16778 + handle_roll(&sdp->sd_lm_callbacks));
16779 + gfs_sprintf("sd_ops_address:address operations:diff:%u\n",
16780 + handle_roll(&sdp->sd_ops_address));
16781 + gfs_sprintf("sd_ops_dentry:dentry operations:diff:%u\n",
16782 + handle_roll(&sdp->sd_ops_dentry));
16783 + gfs_sprintf("sd_ops_export:export operations:diff:%u\n",
16784 + handle_roll(&sdp->sd_ops_export));
16785 + gfs_sprintf("sd_ops_file:file operations:diff:%u\n",
16786 + handle_roll(&sdp->sd_ops_file));
16787 + gfs_sprintf("sd_ops_inode:inode operations:diff:%u\n",
16788 + handle_roll(&sdp->sd_ops_inode));
16789 + gfs_sprintf("sd_ops_super:super operations:diff:%u\n",
16790 + handle_roll(&sdp->sd_ops_super));
16791 + gfs_sprintf("sd_ops_vm:vm operations:diff:%u\n",
16792 + handle_roll(&sdp->sd_ops_vm));
16799 + * get_counters - return usage counters to user space
16800 + * @sdp: the filesystem
16801 + * @arg: the counter structure to fill
16803 + * Returns: 0 on success, -EXXX on failure
16807 +get_counters(struct gfs_sbd *sdp, void *arg)
16809 + struct gfs_user_buffer ub;
16810 + unsigned int size = sdp->sd_tune.gt_lockdump_size;
16814 + if (copy_from_user(&ub, arg, sizeof(struct gfs_user_buffer)))
16818 + if (size > ub.ub_size)
16819 + size = ub.ub_size;
16821 + buf = kmalloc(size, GFP_KERNEL);
16825 + error = fill_counters(sdp, buf, size, &ub.ub_count);
16827 + if (copy_to_user(ub.ub_data, buf, ub.ub_count) ||
16828 + copy_to_user(arg, &ub, sizeof(struct gfs_user_buffer)))
16838 + * gfs_ioctli - filesystem independent ioctl function
16839 + * @ip: the inode the ioctl was on
16840 + * @cmd: the ioctl number
16841 + * @arg: the argument (still in user space)
16843 + * Returns: 0 on success, -EXXX on failure
16847 +gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg)
16849 + struct gfs_sbd *sdp = ip->i_sbd;
16853 + case GFS_GET_META:
16854 + error = get_meta(ip, arg);
16857 + case GFS_FILE_STAT:
16858 + error = file_stat(ip, arg);
16862 + if (capable(CAP_SYS_ADMIN))
16863 + gfs_gl_hash_clear(sdp, FALSE);
16868 + case GFS_GET_ARGS:
16869 + if (copy_to_user(arg, &sdp->sd_args,
16870 + sizeof(struct gfs_args)))
16874 + case GFS_GET_LOCKSTRUCT:
16875 + if (copy_to_user(arg, &sdp->sd_lockstruct,
16876 + sizeof(struct lm_lockstruct)))
16880 + case GFS_GET_SUPER:
16881 + error = do_get_super(sdp, arg);
16885 + if (capable(CAP_SYS_ADMIN))
16886 + error = jread_ioctl(sdp, arg);
16892 + if (capable(CAP_SYS_ADMIN))
16893 + error = jwrite_ioctl(sdp, arg);
16899 + error = jstat_ioctl(sdp, arg);
16903 + if (capable(CAP_SYS_ADMIN))
16904 + error = jtrunc_ioctl(sdp, arg);
16909 + case GFS_LOCK_DUMP:
16910 + if (capable(CAP_SYS_ADMIN))
16911 + error = lock_dump(sdp, arg);
16916 + case GFS_STATGFS:
16917 + error = stat_gfs_ioctl(sdp, arg);
16921 + if (capable(CAP_SYS_ADMIN))
16922 + error = gfs_freeze_fs(sdp);
16927 + case GFS_UNFREEZE:
16928 + if (capable(CAP_SYS_ADMIN))
16929 + gfs_unfreeze_fs(sdp);
16934 + case GFS_RECLAIM_METADATA:
16935 + if (capable(CAP_SYS_ADMIN))
16936 + error = reclaim_ioctl(sdp, arg);
16941 + case GFS_QUOTA_SYNC:
16942 + if (capable(CAP_SYS_ADMIN))
16943 + error = gfs_quota_sync(sdp);
16948 + case GFS_QUOTA_REFRESH:
16949 + if (capable(CAP_SYS_ADMIN))
16950 + error = gfs_quota_refresh(sdp, arg);
16955 + case GFS_QUOTA_READ:
16956 + /* Permissions handled later */
16957 + error = gfs_quota_read(sdp, arg);
16960 + case GFS_GET_TUNE:
16961 + error = get_tune(sdp, arg);
16964 + case GFS_SET_TUNE:
16965 + if (capable(CAP_SYS_ADMIN))
16966 + error = set_tune(sdp, arg);
16971 + case GFS_EATTR_GET:
16972 + /* Permissions handled later */
16973 + error = gfs_get_eattr_ioctl(sdp, ip, arg);
16976 + case GFS_EATTR_SET:
16977 + /* Permissions handled later */
16978 + error = gfs_set_eattr_ioctl(sdp, ip, arg);
16981 + case GFS_WHERE_ARE_YOU:
16983 + unsigned int x = GFS_MAGIC;
16984 + if (copy_to_user(arg, &x, sizeof(unsigned int)))
16989 + case GFS_SET_FLAG:
16990 + case GFS_CLEAR_FLAG:
16991 + /* Permissions handled later */
16992 + error = gfs_set_flag(ip, cmd, arg);
16995 + case GFS_GET_COUNTERS:
16996 + error = get_counters(sdp, arg);
16999 + case GFS_FILE_FLUSH:
17000 + gfs_glock_force_drop(ip->i_gl);
17010 diff -urN linux-orig/fs/gfs/ioctl.h linux-patched/fs/gfs/ioctl.h
17011 --- linux-orig/fs/gfs/ioctl.h 1969-12-31 18:00:00.000000000 -0600
17012 +++ linux-patched/fs/gfs/ioctl.h 2004-06-30 13:27:49.345710666 -0500
17014 +/******************************************************************************
17015 +*******************************************************************************
17017 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
17018 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
17020 +** This copyrighted material is made available to anyone wishing to use,
17021 +** modify, copy, or redistribute it subject to the terms and conditions
17022 +** of the GNU General Public License v.2.
17024 +*******************************************************************************
17025 +******************************************************************************/
17027 +#ifndef __IOCTL_DOT_H__
17028 +#define __IOCTL_DOT_H__
17030 +int gfs_add_bh_to_ub(struct gfs_user_buffer *ub, struct buffer_head *bh);
17032 +int gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg);
17034 +#endif /* __IOCTL_DOT_H__ */
17035 diff -urN linux-orig/fs/gfs/locking.c linux-patched/fs/gfs/locking.c
17036 --- linux-orig/fs/gfs/locking.c 1969-12-31 18:00:00.000000000 -0600
17037 +++ linux-patched/fs/gfs/locking.c 2004-06-30 13:27:49.345710666 -0500
17039 +/******************************************************************************
17040 +*******************************************************************************
17042 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
17043 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
17045 +** This copyrighted material is made available to anyone wishing to use,
17046 +** modify, copy, or redistribute it subject to the terms and conditions
17047 +** of the GNU General Public License v.2.
17049 +*******************************************************************************
17050 +******************************************************************************/
17052 +#include <linux/sched.h>
17053 +#include <linux/slab.h>
17054 +#include <linux/smp_lock.h>
17055 +#include <linux/spinlock.h>
17056 +#include <asm/semaphore.h>
17057 +#include <linux/completion.h>
17058 +#include <linux/buffer_head.h>
17062 +#include "glock.h"
17063 +#include "locking.h"
17064 +#include "super.h"
17067 + * gfs_mount_lockproto - mount a locking protocol
17068 + * @sdp: the filesystem
17069 + * @args: mount arguements
17070 + * @silent: if TRUE, don't complain if the FS isn't a GFS fs
17072 + * Returns: 0 on success, -EXXX on failure
17076 +gfs_mount_lockproto(struct gfs_sbd *sdp, int silent)
17078 + struct gfs_sb *sb = NULL;
17079 + struct buffer_head *bh;
17080 + char *proto, *table, *p = NULL;
17083 + proto = sdp->sd_args.ar_lockproto;
17084 + table = sdp->sd_args.ar_locktable;
17086 + /* Try to autodetect */
17088 + if (!proto[0] || !table[0]) {
17089 + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, NULL,
17090 + DIO_FORCE | DIO_START | DIO_WAIT, &bh);
17094 + sb = gmalloc(sizeof(struct gfs_sb));
17095 + gfs_sb_in(sb, bh->b_data);
17098 + error = gfs_check_sb(sdp, sb, silent);
17103 + proto = sb->sb_lockproto;
17106 + table = sb->sb_locktable;
17109 + error = lm_mount(proto, table, sdp->sd_args.ar_hostdata,
17110 + gfs_glock_cb, sdp,
17111 + GFS_MIN_LVB_SIZE, &sdp->sd_lockstruct);
17113 + printk("GFS: can't mount proto = %s, table = %s, hostdata = %s\n",
17114 + proto, table, sdp->sd_args.ar_hostdata);
17118 + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lockspace, sdp,);
17119 + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_ops, sdp,);
17120 + GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lvb_size >= GFS_MIN_LVB_SIZE,
17124 + table = p = gmalloc(sizeof(sdp->sd_vfs->s_id) + 1);
17125 + strncpy(table, sdp->sd_vfs->s_id, sizeof(sdp->sd_vfs->s_id));
17126 + table[sizeof(sdp->sd_vfs->s_id)] = 0;
17129 + snprintf(sdp->sd_fsname, 256, "%s.%u", table,
17130 + sdp->sd_lockstruct.ls_jid);
17143 + * gfs_unmount_lockproto - Unmount lock protocol
17144 + * @sdp: The GFS superblock
17149 +gfs_unmount_lockproto(struct gfs_sbd *sdp)
17151 + lm_unmount(&sdp->sd_lockstruct);
17153 diff -urN linux-orig/fs/gfs/locking.h linux-patched/fs/gfs/locking.h
17154 --- linux-orig/fs/gfs/locking.h 1969-12-31 18:00:00.000000000 -0600
17155 +++ linux-patched/fs/gfs/locking.h 2004-06-30 13:27:49.345710666 -0500
17157 +/******************************************************************************
17158 +*******************************************************************************
17160 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
17161 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
17163 +** This copyrighted material is made available to anyone wishing to use,
17164 +** modify, copy, or redistribute it subject to the terms and conditions
17165 +** of the GNU General Public License v.2.
17167 +*******************************************************************************
17168 +******************************************************************************/
17170 +#ifndef __LOCKING_DOT_H__
17171 +#define __LOCKING_DOT_H__
17173 +int gfs_mount_lockproto(struct gfs_sbd *sdp, int silent);
17174 +void gfs_unmount_lockproto(struct gfs_sbd *sdp);
17176 +#endif /* __LOCKING_DOT_H__ */
17177 diff -urN linux-orig/fs/gfs/log.c linux-patched/fs/gfs/log.c
17178 --- linux-orig/fs/gfs/log.c 1969-12-31 18:00:00.000000000 -0600
17179 +++ linux-patched/fs/gfs/log.c 2004-06-30 13:27:49.346710434 -0500
17181 +/******************************************************************************
17182 +*******************************************************************************
17184 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
17185 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
17187 +** This copyrighted material is made available to anyone wishing to use,
17188 +** modify, copy, or redistribute it subject to the terms and conditions
17189 +** of the GNU General Public License v.2.
17191 +*******************************************************************************
17192 +******************************************************************************/
17195 + What rolls down stairs
17196 + Alone or in pairs
17197 + Rolls over your neighbor's dog.
17198 + What's great for a snack
17199 + And fits on your back
17200 + It's log, log, log!
17201 + It's lo-og, lo-og,
17202 + It's big, it's heavy, it's wood.
17203 + It's lo-og, lo-og,
17204 + It's better than bad, it's good.
17205 + Everyone wants a log,
17206 + You're gonna love it, log
17207 + Come on and get your log,
17208 + Everyone needs a log...
17209 + LOG... FROM BLAMMO!
17211 + -- The Ren and Stimpy Show
17214 +#include <linux/sched.h>
17215 +#include <linux/slab.h>
17216 +#include <linux/smp_lock.h>
17217 +#include <linux/spinlock.h>
17218 +#include <asm/semaphore.h>
17219 +#include <linux/completion.h>
17220 +#include <linux/buffer_head.h>
17228 + * gfs_struct2blk - compute stuff
17229 + * @sdp: the filesystem
17230 + * @nstruct: the number of structures
17231 + * @ssize: the size of the structures
17233 + * Compute the number of log descriptor blocks needed to hold a certain number
17234 + * of structures of a certain size.
17236 + * Returns: the number of blocks needed
17240 +gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct, unsigned int ssize)
17242 + unsigned int blks;
17243 + unsigned int first, second;
17246 + first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_log_descriptor)) / ssize;
17248 + if (nstruct > first) {
17249 + second = sdp->sd_sb.sb_bsize / ssize;
17250 + blks += DIV_RU(nstruct - first, second);
17257 + * gfs_blk2seg - Convert number of blocks into number of segments
17258 + * @sdp: The GFS superblock
17259 + * @blocks: The number of blocks
17261 + * Returns: The number of journal segments
17265 +gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks)
17267 + return DIV_RU(blocks, sdp->sd_sb.sb_seg_size - 1);
17271 + * log_distance - Compute distance between two journal blocks
17272 + * @sdp: The GFS superblock
17273 + * @newer: The most recent journal block of the pair
17274 + * @older: The older journal block of the pair
17276 + * Compute the distance (in the journal direction) between two
17277 + * blocks in the journal
17279 + * Returns: the distance in blocks
17282 +static __inline__ unsigned int
17283 +log_distance(struct gfs_sbd *sdp, uint64_t newer, uint64_t older)
17287 + dist = newer - older;
17289 + dist += sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size;
17295 + * log_incr_head - Increment journal head
17296 + * @sdp: The GFS superblock
17297 + * @head: the variable holding the head of the journal
17299 + * Increment journal head by one.
17300 + * At the end of the journal, wrap head back to the start.
17304 +static __inline__ void
17305 +log_incr_head(struct gfs_sbd *sdp, uint64_t * head)
17307 + struct gfs_jindex *jdesc = &sdp->sd_jdesc;
17310 + jdesc->ji_addr + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size)
17311 + *head = jdesc->ji_addr;
17315 + * gfs_ail_start - Start I/O on the AIL
17316 + * @sdp: the filesystem
17322 +gfs_ail_start(struct gfs_sbd *sdp, int flags)
17324 + struct list_head *head = &sdp->sd_log_ail;
17325 + struct list_head *first, *tmp;
17326 + struct gfs_trans *first_tr, *tr;
17328 + gfs_log_lock(sdp);
17330 + if (list_empty(head)) {
17331 + gfs_log_unlock(sdp);
17335 + first = head->prev;
17336 + first_tr = list_entry(first, struct gfs_trans, tr_list);
17337 + gfs_ail_start_trans(sdp, first_tr);
17339 + if (flags & DIO_ALL)
17342 + for (tmp = first->prev; tmp != head; tmp = tmp->prev) {
17343 + if (first_tr && gfs_ail_empty_trans(sdp, first_tr))
17346 + tr = list_entry(tmp, struct gfs_trans, tr_list);
17347 + gfs_ail_start_trans(sdp, tr);
17350 + gfs_log_unlock(sdp);
17354 + * current_tail - Find block number of current log tail
17355 + * @sdp: The GFS superblock
17357 + * Find the block number of the current tail of the log.
17358 + * Assumes that the log lock is held.
17360 + * Returns: The tail's block number
17364 +current_tail(struct gfs_sbd *sdp)
17366 + struct gfs_trans *tr;
17369 + if (list_empty(&sdp->sd_log_ail)) {
17370 + tail = sdp->sd_log_head;
17372 + if (!gfs_log_is_header(sdp, tail)) {
17374 + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tail), sdp,);
17377 + tr = list_entry(sdp->sd_log_ail.prev,
17378 + struct gfs_trans, tr_list);
17379 + tail = tr->tr_first_head;
17386 + * gfs_ail_empty - move the tail of the log forward (if possible)
17387 + * @sdp: the filesystem
17389 + * Returns: TRUE if the AIL is empty
17393 +gfs_ail_empty(struct gfs_sbd *sdp)
17395 + struct list_head *head, *tmp, *prev;
17396 + struct gfs_trans *tr;
17397 + uint64_t oldtail, newtail;
17398 + unsigned int dist;
17399 + unsigned int segments;
17402 + gfs_log_lock(sdp);
17404 + oldtail = current_tail(sdp);
17406 + for (head = &sdp->sd_log_ail, tmp = head->prev, prev = tmp->prev;
17408 + tmp = prev, prev = tmp->prev) {
17409 + tr = list_entry(tmp, struct gfs_trans, tr_list);
17411 + if (gfs_ail_empty_trans(sdp, tr)) {
17412 + list_del(&tr->tr_list);
17417 + newtail = current_tail(sdp);
17419 + if (oldtail != newtail) {
17420 + dist = log_distance(sdp, newtail, oldtail);
17422 + segments = dist / sdp->sd_sb.sb_seg_size;
17423 + GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,);
17425 + spin_lock(&sdp->sd_log_seg_lock);
17426 + sdp->sd_log_seg_free += segments;
17427 + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
17429 + spin_unlock(&sdp->sd_log_seg_lock);
17432 + ret = list_empty(head);
17434 + gfs_log_unlock(sdp);
17440 + * gfs_log_reserve - Make a log reservation
17441 + * @sdp: The GFS superblock
17442 + * @segments: The number of segments to reserve
17443 + * @jump_queue: if TRUE, don't care about fairness ordering
17445 + * Returns: 0 on success, -EXXX on failure
17449 +gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue)
17451 + unsigned long start;
17452 + struct list_head list;
17453 + unsigned int try = 0;
17455 + GFS_ASSERT_SBD(segments, sdp,);
17457 + if (segments >= sdp->sd_jdesc.ji_nsegment) {
17458 + printk("GFS: fsid=%s: error reserving log space (%u, %u)\n",
17459 + sdp->sd_fsname, segments, sdp->sd_jdesc.ji_nsegment);
17463 + INIT_LIST_HEAD(&list);
17467 + spin_lock(&sdp->sd_log_seg_lock);
17469 + if (list_empty(&list)) {
17471 + list_add(&list, &sdp->sd_log_seg_list);
17473 + list_add_tail(&list, &sdp->sd_log_seg_list);
17474 + while (sdp->sd_log_seg_list.next != &list) {
17475 + DECLARE_WAITQUEUE(__wait_chan, current);
17476 + current->state = TASK_UNINTERRUPTIBLE;
17477 + add_wait_queue(&sdp->sd_log_seg_wait,
17479 + spin_unlock(&sdp->sd_log_seg_lock);
17481 + spin_lock(&sdp->sd_log_seg_lock);
17482 + remove_wait_queue(&sdp->sd_log_seg_wait,
17484 + current->state = TASK_RUNNING;
17489 + if (sdp->sd_log_seg_free >= segments) {
17490 + sdp->sd_log_seg_free -= segments;
17492 + spin_unlock(&sdp->sd_log_seg_lock);
17493 + wake_up(&sdp->sd_log_seg_wait);
17497 + spin_unlock(&sdp->sd_log_seg_lock);
17500 + gfs_log_flush(sdp);
17501 + gfs_ail_start(sdp, 0);
17504 + gfs_ail_empty(sdp);
17507 + if (time_after_eq(jiffies, start + 60 * HZ))
17508 + printk("GFS: fsid=%s: pid %d can't make log reservation (asking for %u segments)\n",
17509 + sdp->sd_fsname, current->pid, segments);
17517 + * gfs_log_release - Release a given number of log segments
17518 + * @sdp: The GFS superblock
17519 + * @segments: The number of segments
17524 +gfs_log_release(struct gfs_sbd *sdp, unsigned int segments)
17526 + spin_lock(&sdp->sd_log_seg_lock);
17527 + sdp->sd_log_seg_free += segments;
17528 + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment, sdp,);
17529 + spin_unlock(&sdp->sd_log_seg_lock);
17533 + * log_get_header - Get the journal header buffer
17534 + * @sdp: The GFS superblock
17535 + * @tr: The transaction
17536 + * @next: TRUE is this is not a continuation of an existing transaction
17538 + * Returns: the log buffer
17541 +static struct gfs_log_buf *
17542 +log_get_header(struct gfs_sbd *sdp, struct gfs_trans *tr, int next)
17544 + struct gfs_log_buf *lb;
17545 + struct list_head *bmem;
17546 + struct gfs_log_header header;
17548 + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_log_head), sdp,);
17550 + GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17551 + !list_empty(&tr->tr_free_bufs), sdp,);
17552 + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17553 + list_del(&lb->lb_list);
17554 + tr->tr_num_free_bufs--;
17556 + GFS_ASSERT_SBD(tr->tr_num_free_bmem &&
17557 + !list_empty(&tr->tr_free_bmem), sdp,);
17558 + bmem = tr->tr_free_bmem.next;
17560 + tr->tr_num_free_bmem--;
17562 + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem);
17563 + memset(bmem, 0, sdp->sd_sb.sb_bsize);
17565 + memset(&header, 0, sizeof (header));
17568 + header.lh_header.mh_magic = GFS_MAGIC;
17569 + header.lh_header.mh_type = GFS_METATYPE_LH;
17570 + header.lh_header.mh_format = GFS_FORMAT_LH;
17571 + header.lh_first = tr->tr_log_head;
17572 + header.lh_sequence = sdp->sd_sequence + 1;
17573 + header.lh_tail = current_tail(sdp);
17574 + header.lh_last_dump = sdp->sd_log_dump_last;
17576 + header.lh_header.mh_magic = GFS_MAGIC;
17577 + header.lh_header.mh_type = GFS_METATYPE_LH;
17578 + header.lh_header.mh_format = GFS_FORMAT_LH;
17579 + header.lh_first = tr->tr_first_head;
17580 + header.lh_sequence = sdp->sd_sequence;
17581 + header.lh_tail = current_tail(sdp);
17582 + header.lh_last_dump = sdp->sd_log_dump_last;
17584 + list_add(&lb->lb_list, &tr->tr_bufs);
17587 + gfs_log_header_out(&header, lb->lb_bh.b_data);
17588 + gfs_log_header_out(&header,
17589 + lb->lb_bh.b_data + GFS_BASIC_BLOCK -
17590 + sizeof(struct gfs_log_header));
17592 + log_incr_head(sdp, &tr->tr_log_head);
17598 + * gfs_log_get_buf - Get a buffer to use for control data
17599 + * @sdp: The GFS superblock
17600 + * @tr: The GFS transaction
17602 + * Generate a regular buffer for use in the journal as control data.
17604 + * Returns: the buffer
17607 +struct gfs_log_buf *
17608 +gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr)
17610 + struct gfs_log_buf *lb;
17611 + struct list_head *bmem;
17613 + if (gfs_log_is_header(sdp, tr->tr_log_head))
17614 + log_get_header(sdp, tr, FALSE);
17616 + GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17617 + !list_empty(&tr->tr_free_bufs), sdp,);
17618 + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17619 + list_del(&lb->lb_list);
17620 + tr->tr_num_free_bufs--;
17622 + GFS_ASSERT_SBD(tr->tr_num_free_bmem
17623 + && !list_empty(&tr->tr_free_bmem), sdp,);
17624 + bmem = tr->tr_free_bmem.next;
17626 + tr->tr_num_free_bmem--;
17628 + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, (char *)bmem);
17629 + memset(bmem, 0, sdp->sd_sb.sb_bsize);
17631 + list_add(&lb->lb_list, &tr->tr_bufs);
17633 + log_incr_head(sdp, &tr->tr_log_head);
17639 + * gfs_log_fake_buf - Build a fake buffer head
17640 + * @sdp: the filesystem
17641 + * @tr: the transaction this is part of
17642 + * @data: the data the buffer should point to
17643 + * @unlock: a buffer that is unlocked as this struct gfs_log_buf is torn down
17648 +gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data,
17649 + struct buffer_head *unlock)
17651 + struct gfs_log_buf *lb;
17653 + if (gfs_log_is_header(sdp, tr->tr_log_head))
17654 + log_get_header(sdp, tr, FALSE);
17656 + GFS_ASSERT_SBD(tr->tr_num_free_bufs &&
17657 + !list_empty(&tr->tr_free_bufs), sdp,);
17658 + lb = list_entry(tr->tr_free_bufs.next, struct gfs_log_buf, lb_list);
17659 + list_del(&lb->lb_list);
17660 + tr->tr_num_free_bufs--;
17662 + gfs_logbh_init(sdp, &lb->lb_bh, tr->tr_log_head, data);
17663 + lb->lb_unlock = unlock;
17665 + list_add(&lb->lb_list, &tr->tr_bufs);
17667 + log_incr_head(sdp, &tr->tr_log_head);
17671 + * check_seg_usage - Check that we didn't use too many segments
17672 + * @sdp: The GFS superblock
17673 + * @tr: The transaction
17675 + * Also, make sure we don't write ever get to a point where there are
17676 + * no dumps in the log (corrupting the log). Panic before we let
17682 +check_seg_usage(struct gfs_sbd *sdp, struct gfs_trans *tr)
17684 + struct gfs_jindex *jdesc = &sdp->sd_jdesc;
17685 + unsigned int dist;
17686 + unsigned int segments;
17687 + uint64_t head_off, head_wrap;
17688 + uint64_t dump_off, dump_wrap;
17690 + dist = log_distance(sdp, tr->tr_log_head, tr->tr_first_head);
17692 + segments = dist / sdp->sd_sb.sb_seg_size;
17693 + GFS_ASSERT_SBD(segments * sdp->sd_sb.sb_seg_size == dist, sdp,);
17694 + GFS_ASSERT_SBD(segments == tr->tr_seg_reserved, sdp,);
17696 + if (sdp->sd_log_dump_last) {
17697 + head_off = tr->tr_first_head +
17698 + tr->tr_seg_reserved * sdp->sd_sb.sb_seg_size;
17699 + head_wrap = sdp->sd_log_wrap;
17700 + if (head_off >= jdesc->ji_addr +
17701 + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size) {
17702 + head_off -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
17706 + dump_off = sdp->sd_log_dump_last;
17707 + dump_wrap = sdp->sd_log_dump_last_wrap;
17709 + switch (head_wrap - dump_wrap) {
17714 + if (head_off < dump_off)
17716 + else if (head_off == dump_off &&
17717 + (tr->tr_flags & TRF_LOG_DUMP))
17721 + GFS_ASSERT_SBD(FALSE, sdp,
17722 + printk("head_off = %"PRIu64", head_wrap = %"PRIu64"\n",
17723 + head_off, head_wrap);
17724 + printk("dump_off = %"PRIu64", dump_wrap = %"PRIu64"\n",
17725 + dump_off, dump_wrap););
17732 + * log_free_buf - Free a struct gfs_log_buf (and possibly the data it points to)
17733 + * @sdp: the filesystem
17734 + * @lb: the log buffer
17739 +log_free_buf(struct gfs_sbd *sdp, struct gfs_log_buf *lb)
17743 + bmem = lb->lb_bh.b_data;
17744 + gfs_logbh_uninit(sdp, &lb->lb_bh);
17746 + if (lb->lb_unlock)
17747 + gfs_unlock_buffer(lb->lb_unlock);
17755 + * sync_trans - Add "last" descriptor to transaction and sync to disk
17756 + * @sdp: The GFS superblock
17757 + * @tr: The transaction
17759 + * Add the "last" descriptor on to the end of the current transaction
17760 + * and sync it out to disk. Don't commit it yet, though.
17762 + * Returns: 0 on success, -EXXX on failure
17766 +sync_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
17768 + struct list_head *tmp, *head, *prev;
17769 + struct gfs_log_descriptor desc;
17770 + struct gfs_log_buf *lb;
17772 + int error = 0, e;
17774 + /* Build LAST descriptor */
17776 + lb = gfs_log_get_buf(sdp, tr);
17778 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
17779 + desc.ld_header.mh_magic = GFS_MAGIC;
17780 + desc.ld_header.mh_type = GFS_METATYPE_LD;
17781 + desc.ld_header.mh_format = GFS_FORMAT_LD;
17782 + desc.ld_type = GFS_LOG_DESC_LAST;
17783 + desc.ld_length = 1;
17784 + for (blk = tr->tr_log_head; !gfs_log_is_header(sdp, blk); blk++)
17785 + desc.ld_length++;
17786 + gfs_desc_out(&desc, lb->lb_bh.b_data);
17788 + while (!gfs_log_is_header(sdp, tr->tr_log_head))
17789 + log_incr_head(sdp, &tr->tr_log_head);
17791 + check_seg_usage(sdp, tr);
17794 + Go in "prev" direction to start the I/O in order. */
17796 + for (head = &tr->tr_bufs, tmp = head->prev, prev = tmp->prev;
17798 + tmp = prev, prev = tmp->prev) {
17799 + lb = list_entry(tmp, struct gfs_log_buf, lb_list);
17802 + list_del(&lb->lb_list);
17803 + log_free_buf(sdp, lb);
17805 + e = gfs_logbh_start(sdp, &lb->lb_bh);
17807 + list_del(&lb->lb_list);
17808 + log_free_buf(sdp, lb);
17815 + Go in "next" direction to minimize sleeps/wakeups. */
17817 + while (!list_empty(&tr->tr_bufs)) {
17818 + lb = list_entry(tr->tr_bufs.next, struct gfs_log_buf, lb_list);
17820 + e = gfs_logbh_wait(sdp, &lb->lb_bh);
17824 + list_del(&lb->lb_list);
17825 + log_free_buf(sdp, lb);
17832 + * commit_trans - Commit the current transaction
17833 + * @sdp: The GFS superblock
17834 + * @tr: The transaction
17836 + * Write next header to commit
17838 + * Returns: 0 on success, -EXXX on failure
17842 +commit_trans(struct gfs_sbd *sdp, struct gfs_trans *tr)
17844 + struct gfs_log_buf *lb;
17847 + lb = log_get_header(sdp, tr, TRUE);
17849 + error = gfs_logbh_start(sdp, &lb->lb_bh);
17851 + error = gfs_logbh_wait(sdp, &lb->lb_bh);
17853 + log_free_buf(sdp, lb);
17859 + * disk_commit - Write a transaction to the on-disk journal
17860 + * @sdp: The GFS superblock
17861 + * @tr: The transaction
17863 + * Returns: 0 on success, -EXXX on failure
17867 +disk_commit(struct gfs_sbd *sdp, struct gfs_trans *tr)
17869 + uint64_t last_dump, last_dump_wrap;
17872 + GFS_ASSERT_SBD(!test_bit(SDF_ROFS, &sdp->sd_flags), sdp,);
17873 + tr->tr_log_head = sdp->sd_log_head;
17874 + tr->tr_first_head = tr->tr_log_head - 1;
17875 + GFS_ASSERT_SBD(gfs_log_is_header(sdp, tr->tr_first_head), sdp,);
17877 + LO_BUILD_BHLIST(sdp, tr);
17879 + GFS_ASSERT_SBD(!list_empty(&tr->tr_bufs), sdp,);
17881 + error = sync_trans(sdp, tr);
17883 + /* Eat unusable commit buffer */
17884 + log_free_buf(sdp, log_get_header(sdp, tr, TRUE));
17888 + if (tr->tr_flags & TRF_LOG_DUMP) {
17889 + /* This commit header should point to the log dump we're
17890 + commiting as the current one. But save the copy of the
17891 + old one in case we have problems commiting the dump. */
17893 + last_dump = sdp->sd_log_dump_last;
17894 + last_dump_wrap = sdp->sd_log_dump_last_wrap;
17896 + sdp->sd_log_dump_last = tr->tr_first_head;
17897 + sdp->sd_log_dump_last_wrap = sdp->sd_log_wrap;
17899 + error = commit_trans(sdp, tr);
17901 + sdp->sd_log_dump_last = last_dump;
17902 + sdp->sd_log_dump_last_wrap = last_dump_wrap;
17906 + error = commit_trans(sdp, tr);
17911 + if (sdp->sd_log_head > tr->tr_log_head)
17912 + sdp->sd_log_wrap++;
17913 + sdp->sd_log_head = tr->tr_log_head;
17914 + sdp->sd_sequence++;
17917 + GFS_ASSERT_SBD(!tr->tr_num_free_bufs &&
17918 + list_empty(&tr->tr_free_bufs), sdp,);
17919 + GFS_ASSERT_SBD(!tr->tr_num_free_bmem &&
17920 + list_empty(&tr->tr_free_bmem), sdp,);
17926 + * add_trans_to_ail - Add a ondisk commited transaction to the AIL
17927 + * @sdp: the filesystem
17928 + * @tr: the transaction
17933 +add_trans_to_ail(struct gfs_sbd *sdp, struct gfs_trans *tr)
17935 + struct gfs_log_element *le;
17937 + while (!list_empty(&tr->tr_elements)) {
17938 + le = list_entry(tr->tr_elements.next,
17939 + struct gfs_log_element, le_list);
17940 + LO_ADD_TO_AIL(sdp, le);
17943 + list_add(&tr->tr_list, &sdp->sd_log_ail);
17947 + * log_refund - Refund log segments to the free pool
17948 + * @sdp: The GFS superblock
17949 + * @tr: The tranaction to examine
17951 + * Look at the number of segments reserved for this transaction and the
17952 + * number of segments actually needed for it. If they aren't the
17953 + * same, refund the difference to the free segment pool.
17955 + * Called with the log lock held
17959 +log_refund(struct gfs_sbd *sdp, struct gfs_trans *tr)
17961 + struct gfs_log_buf *lb;
17962 + struct list_head *bmem;
17963 + unsigned int num_bufs = 0, num_bmem = 0;
17964 + unsigned int segments;
17966 + LO_TRANS_SIZE(sdp, tr, NULL, NULL, &num_bufs, &num_bmem);
17968 + segments = gfs_blk2seg(sdp, num_bufs + 1);
17969 + num_bufs += segments + 1;
17970 + num_bmem += segments + 1;
17972 + if (tr->tr_seg_reserved > segments) {
17973 + spin_lock(&sdp->sd_log_seg_lock);
17974 + sdp->sd_log_seg_free += tr->tr_seg_reserved - segments;
17975 + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
17977 + spin_unlock(&sdp->sd_log_seg_lock);
17979 + tr->tr_seg_reserved = segments;
17981 + GFS_ASSERT_SBD(tr->tr_seg_reserved == segments, sdp,);
17983 + GFS_ASSERT_SBD(tr->tr_num_free_bufs >= num_bufs, sdp,);
17984 + while (tr->tr_num_free_bufs > num_bufs) {
17985 + lb = list_entry(tr->tr_free_bufs.next,
17986 + struct gfs_log_buf, lb_list);
17987 + list_del(&lb->lb_list);
17989 + tr->tr_num_free_bufs--;
17992 + GFS_ASSERT_SBD(tr->tr_num_free_bmem >= num_bmem, sdp,);
17993 + while (tr->tr_num_free_bmem > num_bmem) {
17994 + bmem = tr->tr_free_bmem.next;
17997 + tr->tr_num_free_bmem--;
18002 + * trans_combine - combine two transactions
18003 + * @sdp: the filesystem
18004 + * @tr: the surviving transaction
18005 + * @new_tr: the transaction that gets freed
18007 + * Assumes that the two transactions are independent.
18011 +trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18012 + struct gfs_trans *new_tr)
18014 + struct gfs_log_element *le;
18015 + struct gfs_log_buf *lb;
18016 + struct list_head *bmem;
18018 + tr->tr_file = __FILE__;
18019 + tr->tr_line = __LINE__;
18020 + tr->tr_seg_reserved += new_tr->tr_seg_reserved;
18021 + tr->tr_flags |= new_tr->tr_flags;
18022 + tr->tr_num_free_bufs += new_tr->tr_num_free_bufs;
18023 + tr->tr_num_free_bmem += new_tr->tr_num_free_bmem;
18025 + /* Combine the elements of the two transactions */
18027 + while (!list_empty(&new_tr->tr_elements)) {
18028 + le = list_entry(new_tr->tr_elements.next,
18029 + struct gfs_log_element, le_list);
18030 + GFS_ASSERT_SBD(le->le_trans == new_tr, sdp,);
18031 + le->le_trans = tr;
18032 + list_move(&le->le_list, &tr->tr_elements);
18035 + LO_TRANS_COMBINE(sdp, tr, new_tr);
18037 + while (!list_empty(&new_tr->tr_free_bufs)) {
18038 + lb = list_entry(new_tr->tr_free_bufs.next,
18039 + struct gfs_log_buf, lb_list);
18040 + list_move(&lb->lb_list, &tr->tr_free_bufs);
18041 + new_tr->tr_num_free_bufs--;
18043 + while (!list_empty(&new_tr->tr_free_bmem)) {
18044 + bmem = new_tr->tr_free_bmem.next;
18045 + list_move(bmem, &tr->tr_free_bmem);
18046 + new_tr->tr_num_free_bmem--;
18049 + GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,);
18050 + GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,);
18056 + * log_flush_internal - flush incore transactions
18057 + * @sdp: the filesystem
18058 + * @gl: The glock structure to flush. If NULL, flush the whole incore log
18063 +log_flush_internal(struct gfs_sbd *sdp, struct gfs_glock *gl)
18065 + struct gfs_trans *trans = NULL, *tr;
18068 + gfs_log_lock(sdp);
18070 + if (list_empty(&sdp->sd_log_incore))
18074 + if (!gl->gl_incore_le.le_trans)
18077 + trans = gl->gl_incore_le.le_trans;
18079 + list_del(&trans->tr_list);
18081 + while (!list_empty(&sdp->sd_log_incore)) {
18082 + tr = list_entry(sdp->sd_log_incore.next,
18083 + struct gfs_trans, tr_list);
18085 + list_del(&tr->tr_list);
18088 + trans_combine(sdp, trans, tr);
18094 + GFS_ASSERT_SBD(trans, sdp,);
18096 + log_refund(sdp, trans);
18098 + /* Actually do the stuff to commit the transaction */
18100 + error = disk_commit(sdp, trans);
18102 + gfs_io_error(sdp);
18104 + add_trans_to_ail(sdp, trans);
18106 + if (log_distance(sdp, sdp->sd_log_head, sdp->sd_log_dump_last) * GFS_DUMPS_PER_LOG >=
18107 + sdp->sd_jdesc.ji_nsegment * sdp->sd_sb.sb_seg_size)
18108 + set_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags);
18111 + if (list_empty(&sdp->sd_log_incore))
18112 + sdp->sd_vfs->s_dirt = FALSE;
18114 + gfs_log_unlock(sdp);
18116 + /* Dump if we need to. */
18118 + if (test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags))
18119 + gfs_log_dump(sdp, FALSE);
18123 + * gfs_log_flush - flush the whole incore log
18124 + * @sdp: the filesystem
18129 +gfs_log_flush(struct gfs_sbd *sdp)
18131 + log_flush_internal(sdp, NULL);
18135 + * gfs_log_flush_glock - flush the incore log for a glock
18141 +gfs_log_flush_glock(struct gfs_glock *gl)
18143 + log_flush_internal(gl->gl_sbd, gl);
18147 + * incore_commit - commit a transaction in-core
18148 + * @sdp: the filesystem
18149 + * @new_tr: the transaction to commit
18151 + * Add the transaction @new_tr to the end of the incore commit list.
18152 + * Pull up and merge an previously commited transactions that share
18153 + * locks. Also pull up any rename transactions that need it.
18157 +incore_commit(struct gfs_sbd *sdp, struct gfs_trans *new_tr)
18159 + struct gfs_log_element *le;
18160 + struct gfs_trans *trans = NULL, *exist_tr;
18161 + struct gfs_log_buf *lb;
18162 + struct list_head *bmem;
18163 + struct list_head *tmp, *head, *next;
18165 + for (head = &new_tr->tr_elements, tmp = head->next;
18167 + tmp = tmp->next) {
18168 + le = list_entry(tmp, struct gfs_log_element, le_list);
18170 + exist_tr = LO_OVERLAP_TRANS(sdp, le);
18174 + if (exist_tr != trans) {
18175 + list_del(&exist_tr->tr_list);
18177 + trans_combine(sdp, trans, exist_tr);
18179 + trans = exist_tr;
18184 + trans->tr_file = __FILE__;
18185 + trans->tr_line = __LINE__;
18186 + trans->tr_seg_reserved += new_tr->tr_seg_reserved;
18187 + trans->tr_flags |= new_tr->tr_flags;
18188 + trans->tr_num_free_bufs += new_tr->tr_num_free_bufs;
18189 + trans->tr_num_free_bmem += new_tr->tr_num_free_bmem;
18191 + while (!list_empty(&new_tr->tr_free_bufs)) {
18192 + lb = list_entry(new_tr->tr_free_bufs.next,
18193 + struct gfs_log_buf, lb_list);
18194 + list_move(&lb->lb_list, &trans->tr_free_bufs);
18195 + new_tr->tr_num_free_bufs--;
18197 + while (!list_empty(&new_tr->tr_free_bmem)) {
18198 + bmem = new_tr->tr_free_bmem.next;
18199 + list_move(bmem, &trans->tr_free_bmem);
18200 + new_tr->tr_num_free_bmem--;
18205 + for (head = &new_tr->tr_elements, tmp = head->next, next = tmp->next;
18207 + tmp = next, next = next->next) {
18208 + le = list_entry(tmp, struct gfs_log_element, le_list);
18209 + LO_INCORE_COMMIT(sdp, trans, le);
18212 + if (trans != new_tr) {
18213 + GFS_ASSERT_SBD(!new_tr->tr_num_free_bufs, sdp,);
18214 + GFS_ASSERT_SBD(!new_tr->tr_num_free_bmem, sdp,);
18215 + GFS_ASSERT_SBD(list_empty(&new_tr->tr_elements), sdp,);
18219 + log_refund(sdp, trans);
18221 + list_add(&trans->tr_list, &sdp->sd_log_incore);
18225 + * gfs_log_commit - Commit a transaction to the log
18226 + * @sdp: the filesystem
18227 + * @tr: the transaction
18229 + * Returns: 0 on success, -EXXX on failure
18233 +gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *tr)
18235 + struct gfs_log_buf *lb;
18236 + struct list_head *bmem;
18237 + unsigned int num_mblks = 0, num_eblks = 0, num_bufs = 0, num_bmem = 0;
18238 + unsigned int segments;
18240 + LO_TRANS_SIZE(sdp, tr, &num_mblks, &num_eblks, &num_bufs, &num_bmem);
18242 + GFS_ASSERT_SBD(num_mblks <= tr->tr_mblks_asked &&
18243 + num_eblks <= tr->tr_eblks_asked, sdp,
18244 + printk("type = (%s, %u)\n",
18245 + tr->tr_file, tr->tr_line);
18246 + printk("num_mblks = %u, tr->tr_mblks_asked = %u\n",
18247 + num_mblks, tr->tr_mblks_asked);
18248 + printk("num_eblks = %u, tr->tr_eblks_asked = %u\n",
18249 + num_eblks, tr->tr_eblks_asked););
18251 + segments = gfs_blk2seg(sdp, num_bufs + 1);
18252 + num_bufs += segments + 1;
18253 + num_bmem += segments + 1;
18255 + while (num_bufs--) {
18256 + lb = gmalloc(sizeof(struct gfs_log_buf));
18257 + memset(lb, 0, sizeof(struct gfs_log_buf));
18258 + list_add(&lb->lb_list, &tr->tr_free_bufs);
18259 + tr->tr_num_free_bufs++;
18261 + while (num_bmem--) {
18262 + bmem = gmalloc(sdp->sd_sb.sb_bsize);
18263 + list_add(bmem, &tr->tr_free_bmem);
18264 + tr->tr_num_free_bmem++;
18267 + gfs_log_lock(sdp);
18269 + incore_commit(sdp, tr);
18271 + if (sdp->sd_log_buffers > sdp->sd_tune.gt_incore_log_blocks) {
18272 + gfs_log_unlock(sdp);
18273 + gfs_log_flush(sdp);
18275 + sdp->sd_vfs->s_dirt = TRUE;
18276 + gfs_log_unlock(sdp);
18282 + * gfs_log_dump - make a Log Dump entry in the log
18283 + * @sdp: the filesystem
18284 + * @force: if TRUE, always make the dump even if one has been made recently
18289 +gfs_log_dump(struct gfs_sbd *sdp, int force)
18291 + struct gfs_log_element *le;
18292 + struct gfs_trans tr;
18293 + struct gfs_log_buf *lb;
18294 + struct list_head *bmem;
18295 + unsigned int num_bufs, num_bmem;
18296 + unsigned int segments;
18299 + if (test_and_set_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags)) {
18300 + GFS_ASSERT_SBD(!force, sdp,);
18304 + memset(&tr, 0, sizeof(struct gfs_trans));
18305 + INIT_LIST_HEAD(&tr.tr_elements);
18306 + INIT_LIST_HEAD(&tr.tr_free_bufs);
18307 + INIT_LIST_HEAD(&tr.tr_free_bmem);
18308 + INIT_LIST_HEAD(&tr.tr_bufs);
18309 + tr.tr_flags = TRF_LOG_DUMP;
18310 + tr.tr_file = __FILE__;
18311 + tr.tr_line = __LINE__;
18314 + gfs_log_lock(sdp);
18316 + if (!force && !test_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags))
18319 + num_bufs = num_bmem = 0;
18320 + LO_DUMP_SIZE(sdp, NULL, &num_bufs, &num_bmem);
18321 + GFS_ASSERT_SBD(num_bufs, sdp,);
18322 + segments = gfs_blk2seg(sdp, num_bufs + 1);
18323 + num_bufs += segments + 1;
18324 + num_bmem += segments + 1;
18326 + if (tr.tr_seg_reserved >= segments &&
18327 + tr.tr_num_free_bufs >= num_bufs &&
18328 + tr.tr_num_free_bmem >= num_bmem)
18331 + gfs_log_unlock(sdp);
18333 + if (tr.tr_seg_reserved < segments) {
18334 + error = gfs_log_reserve(sdp,
18335 + segments - tr.tr_seg_reserved,
18337 + GFS_ASSERT_SBD(!error, sdp,);
18338 + tr.tr_seg_reserved = segments;
18340 + while (tr.tr_num_free_bufs < num_bufs) {
18341 + lb = gmalloc(sizeof(struct gfs_log_buf));
18342 + memset(lb, 0, sizeof(struct gfs_log_buf));
18343 + list_add(&lb->lb_list, &tr.tr_free_bufs);
18344 + tr.tr_num_free_bufs++;
18346 + while (tr.tr_num_free_bmem < num_bmem) {
18347 + bmem = gmalloc(sdp->sd_sb.sb_bsize);
18348 + list_add(bmem, &tr.tr_free_bmem);
18349 + tr.tr_num_free_bmem++;
18353 + if (tr.tr_seg_reserved > segments) {
18354 + spin_lock(&sdp->sd_log_seg_lock);
18355 + sdp->sd_log_seg_free += tr.tr_seg_reserved - segments;
18356 + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
18358 + spin_unlock(&sdp->sd_log_seg_lock);
18359 + tr.tr_seg_reserved = segments;
18361 + while (tr.tr_num_free_bufs > num_bufs) {
18362 + lb = list_entry(tr.tr_free_bufs.next,
18363 + struct gfs_log_buf, lb_list);
18364 + list_del(&lb->lb_list);
18366 + tr.tr_num_free_bufs--;
18368 + while (tr.tr_num_free_bmem > num_bmem) {
18369 + bmem = tr.tr_free_bmem.next;
18372 + tr.tr_num_free_bmem--;
18375 + LO_BUILD_DUMP(sdp, &tr);
18377 + error = disk_commit(sdp, &tr);
18379 + gfs_io_error(sdp);
18381 + while (!list_empty(&tr.tr_elements)) {
18382 + le = list_entry(tr.tr_elements.next,
18383 + struct gfs_log_element, le_list);
18384 + LO_CLEAN_DUMP(sdp, le);
18387 + /* If there isn't anything the AIL, we won't get back the log
18388 + space we reserved unless we do it ourselves. */
18390 + if (list_empty(&sdp->sd_log_ail)) {
18391 + spin_lock(&sdp->sd_log_seg_lock);
18392 + sdp->sd_log_seg_free += tr.tr_seg_reserved;
18393 + GFS_ASSERT_SBD(sdp->sd_log_seg_free < sdp->sd_jdesc.ji_nsegment,
18395 + spin_unlock(&sdp->sd_log_seg_lock);
18398 + clear_bit(SDF_NEED_LOG_DUMP, &sdp->sd_flags);
18401 + gfs_log_unlock(sdp);
18402 + clear_bit(SDF_IN_LOG_DUMP, &sdp->sd_flags);
18406 + * gfs_log_shutdown - write a shutdown header into a journal
18407 + * @sdp: the filesystem
18409 + * Returns: 0 on success, -EXXX on failure
18413 +gfs_log_shutdown(struct gfs_sbd *sdp)
18415 + struct gfs_log_buf *lb;
18417 + struct gfs_log_header head;
18418 + struct gfs_log_descriptor desc;
18419 + unsigned int elements = 0;
18422 + lb = gmalloc(sizeof(struct gfs_log_buf));
18423 + memset(lb, 0, sizeof(struct gfs_log_buf));
18424 + bmem = gmalloc(sdp->sd_sb.sb_bsize);
18426 + gfs_log_lock(sdp);
18428 + GFS_ASSERT_SBD(list_empty(&sdp->sd_log_ail), sdp,);
18429 + GFS_ASSERT_SBD(sdp->sd_log_seg_free == sdp->sd_jdesc.ji_nsegment - 1,
18431 + GFS_ASSERT_SBD(!sdp->sd_log_buffers, sdp,);
18432 + GFS_ASSERT_SBD(gfs_log_is_header(sdp, sdp->sd_log_head - 1), sdp,);
18434 + /* Build a "last" log descriptor */
18436 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
18437 + desc.ld_header.mh_magic = GFS_MAGIC;
18438 + desc.ld_header.mh_type = GFS_METATYPE_LD;
18439 + desc.ld_header.mh_format = GFS_FORMAT_LD;
18440 + desc.ld_type = GFS_LOG_DESC_LAST;
18441 + desc.ld_length = sdp->sd_sb.sb_seg_size - 1;
18443 + /* Write the descriptor */
18445 + gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem);
18446 + memset(bmem, 0, sdp->sd_sb.sb_bsize);
18447 + gfs_desc_out(&desc, lb->lb_bh.b_data);
18448 + error = gfs_logbh_start(sdp, &lb->lb_bh);
18450 + error = gfs_logbh_wait(sdp, &lb->lb_bh);
18451 + gfs_logbh_uninit(sdp, &lb->lb_bh);
18456 + /* Move to the next header */
18458 + while (!gfs_log_is_header(sdp, sdp->sd_log_head))
18459 + log_incr_head(sdp, &sdp->sd_log_head);
18461 + LO_DUMP_SIZE(sdp, &elements, NULL, NULL);
18463 + /* Build the shutdown header */
18465 + memset(&head, 0, sizeof (struct gfs_log_header));
18466 + head.lh_header.mh_magic = GFS_MAGIC;
18467 + head.lh_header.mh_type = GFS_METATYPE_LH;
18468 + head.lh_header.mh_format = GFS_FORMAT_LH;
18469 + head.lh_flags = GFS_LOG_HEAD_UNMOUNT;
18470 + head.lh_first = sdp->sd_log_head;
18471 + head.lh_sequence = sdp->sd_sequence + 1;
18472 + /* Don't care about tail */
18473 + head.lh_last_dump = (elements) ? sdp->sd_log_dump_last : 0;
18475 + /* Write out the shutdown header */
18477 + gfs_logbh_init(sdp, &lb->lb_bh, sdp->sd_log_head, bmem);
18478 + memset(bmem, 0, sdp->sd_sb.sb_bsize);
18479 + gfs_log_header_out(&head, lb->lb_bh.b_data);
18480 + gfs_log_header_out(&head,
18481 + lb->lb_bh.b_data + GFS_BASIC_BLOCK -
18482 + sizeof(struct gfs_log_header));
18483 + error = gfs_logbh_start(sdp, &lb->lb_bh);
18485 + error = gfs_logbh_wait(sdp, &lb->lb_bh);
18486 + gfs_logbh_uninit(sdp, &lb->lb_bh);
18489 + gfs_log_unlock(sdp);
18496 diff -urN linux-orig/fs/gfs/log.h linux-patched/fs/gfs/log.h
18497 --- linux-orig/fs/gfs/log.h 1969-12-31 18:00:00.000000000 -0600
18498 +++ linux-patched/fs/gfs/log.h 2004-06-30 13:27:49.346710434 -0500
18500 +/******************************************************************************
18501 +*******************************************************************************
18503 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
18504 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
18506 +** This copyrighted material is made available to anyone wishing to use,
18507 +** modify, copy, or redistribute it subject to the terms and conditions
18508 +** of the GNU General Public License v.2.
18510 +*******************************************************************************
18511 +******************************************************************************/
18513 +#ifndef __LOG_DOT_H__
18514 +#define __LOG_DOT_H__
18517 + * gfs_log_lock - acquire the right to mess with the log manager
18518 + * @sdp: the filesystem
18522 +static __inline__ void
18523 +gfs_log_lock(struct gfs_sbd *sdp)
18525 + down(&sdp->sd_log_lock);
18529 + * gfs_log_unlock - release the right to mess with the log manager
18530 + * @sdp: the filesystem
18534 +static __inline__ void
18535 +gfs_log_unlock(struct gfs_sbd *sdp)
18537 + up(&sdp->sd_log_lock);
18540 +unsigned int gfs_struct2blk(struct gfs_sbd *sdp, unsigned int nstruct,
18541 + unsigned int ssize);
18542 +unsigned int gfs_blk2seg(struct gfs_sbd *sdp, unsigned int blocks);
18544 +int gfs_log_reserve(struct gfs_sbd *sdp, unsigned int segments, int jump_queue);
18545 +void gfs_log_release(struct gfs_sbd *sdp, unsigned int segments);
18547 +void gfs_ail_start(struct gfs_sbd *sdp, int flags);
18548 +int gfs_ail_empty(struct gfs_sbd *sdp);
18550 +void gfs_log_commit(struct gfs_sbd *sdp, struct gfs_trans *trans);
18551 +void gfs_log_flush(struct gfs_sbd *sdp);
18552 +void gfs_log_flush_glock(struct gfs_glock *gl);
18554 +int gfs_log_shutdown(struct gfs_sbd *sdp);
18556 +void gfs_log_dump(struct gfs_sbd *sdp, int force);
18558 +/* Internal crap used the log operations */
18561 + * gfs_log_is_header - Discover if block is on journal header
18562 + * @sdp: The GFS superblock
18563 + * @block: The block number
18565 + * Returns: TRUE if the block is on a journal segment boundary, FALSE otherwise
18568 +static __inline__ int
18569 +gfs_log_is_header(struct gfs_sbd *sdp, uint64_t block)
18571 + return !do_mod(block, sdp->sd_sb.sb_seg_size);
18574 +struct gfs_log_buf *gfs_log_get_buf(struct gfs_sbd *sdp, struct gfs_trans *tr);
18575 +void gfs_log_fake_buf(struct gfs_sbd *sdp, struct gfs_trans *tr, char *data,
18576 + struct buffer_head *unlock);
18578 +#endif /* __LOG_DOT_H__ */
18579 diff -urN linux-orig/fs/gfs/lops.c linux-patched/fs/gfs/lops.c
18580 --- linux-orig/fs/gfs/lops.c 1969-12-31 18:00:00.000000000 -0600
18581 +++ linux-patched/fs/gfs/lops.c 2004-06-30 13:27:49.348709970 -0500
18583 +/******************************************************************************
18584 +*******************************************************************************
18586 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
18587 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
18589 +** This copyrighted material is made available to anyone wishing to use,
18590 +** modify, copy, or redistribute it subject to the terms and conditions
18591 +** of the GNU General Public License v.2.
18593 +*******************************************************************************
18594 +******************************************************************************/
18596 +#include <linux/sched.h>
18597 +#include <linux/slab.h>
18598 +#include <linux/smp_lock.h>
18599 +#include <linux/spinlock.h>
18600 +#include <asm/semaphore.h>
18601 +#include <linux/completion.h>
18602 +#include <linux/buffer_head.h>
18606 +#include "glock.h"
18609 +#include "quota.h"
18610 +#include "recovery.h"
18611 +#include "trans.h"
18612 +#include "unlinked.h"
18615 + * generic_le_add - generic routine to add a log element to a transaction
18616 + * @sdp: the filesystem
18617 + * @le: the log entry
18622 +generic_le_add(struct gfs_sbd *sdp, struct gfs_log_element *le)
18624 + struct gfs_trans *tr;
18626 + GFS_ASSERT_SBD(le->le_ops &&
18628 + list_empty(&le->le_list), sdp,);
18630 + tr = current_transaction;
18631 + GFS_ASSERT_SBD(tr, sdp,);
18633 + le->le_trans = tr;
18634 + list_add(&le->le_list, &tr->tr_elements);
18638 + * glock_trans_end - drop a glock reference
18639 + * @sdp: the filesystem
18640 + * @le: the log element
18645 +glock_trans_end(struct gfs_sbd *sdp, struct gfs_log_element *le)
18647 + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18649 + GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) &&
18650 + gfs_glock_is_held_excl(gl), gl,);
18651 + gfs_glock_put(gl);
18655 + * glock_print - print debug info about a log element
18656 + * @sdp: the filesystem
18657 + * @le: the log element
18658 + * @where: is this a new transaction or a incore transaction
18663 +glock_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
18665 + struct gfs_glock *gl;
18668 + case TRANS_IS_NEW:
18669 + gl = container_of(le, struct gfs_glock, gl_new_le);
18671 + case TRANS_IS_INCORE:
18672 + gl = container_of(le, struct gfs_glock, gl_incore_le);
18675 + GFS_ASSERT_SBD(FALSE, sdp,);
18678 + printk(" Glock: (%u, %"PRIu64")\n",
18679 + gl->gl_name.ln_type,
18680 + gl->gl_name.ln_number);
18684 + * glock_overlap_trans - Find any incore transactions that might overlap with this LE
18685 + * @sdp: the filesystem
18686 + * @le: the log element
18690 +static struct gfs_trans *
18691 +glock_overlap_trans(struct gfs_sbd *sdp, struct gfs_log_element *le)
18693 + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18695 + return gl->gl_incore_le.le_trans;
18699 + * glock_incore_commit - commit this LE to the incore log
18700 + * @sdp: the filesystem
18701 + * @tr: the incore transaction this LE is a part of
18702 + * @le: the log element
18707 +glock_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
18708 + struct gfs_log_element *le)
18710 + struct gfs_glock *gl = container_of(le, struct gfs_glock, gl_new_le);
18712 + if (gl->gl_incore_le.le_trans)
18713 + GFS_ASSERT_GLOCK(gl->gl_incore_le.le_trans == tr, gl,);
18715 + gl->gl_incore_le.le_trans = tr;
18716 + list_add(&gl->gl_incore_le.le_list, &tr->tr_elements);
18717 + if (tr != le->le_trans)
18721 + le->le_trans = NULL;
18722 + list_del_init(&le->le_list);
18726 + * glock_add_to_ail - Add this LE to the AIL
18727 + * @sdp: the filesystem
18728 + * @le: the log element
18733 +glock_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
18735 + le->le_trans = NULL;
18736 + list_del_init(&le->le_list);
18740 + * glock_trans_combine - combine to incore transactions
18741 + * @sdp: the filesystem
18742 + * @tr: the surviving transaction
18743 + * @new_tr: the transaction that's going to disappear
18748 +glock_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18749 + struct gfs_trans *new_tr)
18751 + tr->tr_num_gl += new_tr->tr_num_gl;
18755 + * buf_print - print debug info about a log element
18756 + * @sdp: the filesystem
18757 + * @le: the log element
18758 + * @where: is this a new transaction or a incore transaction
18763 +buf_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
18765 + struct gfs_bufdata *bd;
18768 + case TRANS_IS_NEW:
18769 + bd = container_of(le, struct gfs_bufdata, bd_new_le);
18771 + case TRANS_IS_INCORE:
18772 + bd = container_of(le, struct gfs_bufdata, bd_incore_le);
18775 + GFS_ASSERT_SBD(FALSE, sdp,);
18778 + printk(" Buffer: %"PRIu64"\n", (uint64_t)bd->bd_bh->b_blocknr);
18782 + * buf_incore_commit - commit this LE to the incore log
18783 + * @sdp: the filesystem
18784 + * @tr: the incore transaction this LE is a part of
18785 + * @le: the log element
18790 +buf_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
18791 + struct gfs_log_element *le)
18793 + struct gfs_bufdata *bd = container_of(le, struct gfs_bufdata, bd_new_le);
18795 + if (bd->bd_frozen) {
18796 + kfree(bd->bd_frozen);
18797 + bd->bd_frozen = NULL;
18800 + if (bd->bd_incore_le.le_trans) {
18801 + GFS_ASSERT_SBD(bd->bd_incore_le.le_trans == tr, sdp,);
18802 + gfs_dunpin(sdp, bd->bd_bh, NULL);
18804 + bd->bd_incore_le.le_trans = tr;
18805 + list_add(&bd->bd_incore_le.le_list, &tr->tr_elements);
18806 + if (tr != le->le_trans)
18807 + tr->tr_num_buf++;
18809 + sdp->sd_log_buffers++;
18812 + le->le_trans = NULL;
18813 + list_del_init(&le->le_list);
18817 + * buf_add_to_ail - Add this LE to the AIL
18818 + * @sdp: the filesystem
18819 + * @le: the log element
18824 +buf_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
18826 + struct gfs_bufdata *bd = container_of(le,
18827 + struct gfs_bufdata,
18830 + gfs_dunpin(sdp, bd->bd_bh, le->le_trans);
18832 + le->le_trans = NULL;
18833 + list_del_init(&le->le_list);
18835 + GFS_ASSERT_SBD(sdp->sd_log_buffers, sdp,);
18836 + sdp->sd_log_buffers--;
18840 + * buf_trans_size - compute how much space the LE class takes up in a transaction
18841 + * @sdp: the filesystem
18842 + * @tr: the transaction
18843 + * @mblks: the number of regular metadata blocks
18844 + * @eblks: the number of extra blocks
18845 + * @blocks: the number of log blocks
18846 + * @bmem: the number of buffer-sized chunks of memory we need
18851 +buf_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
18852 + unsigned int *mblks, unsigned int *eblks,
18853 + unsigned int *blocks, unsigned int *bmem)
18855 + unsigned int cblks;
18857 + if (tr->tr_num_buf) {
18858 + cblks = gfs_struct2blk(sdp, tr->tr_num_buf,
18859 + sizeof(struct gfs_block_tag));
18862 + *mblks += tr->tr_num_buf;
18864 + *blocks += tr->tr_num_buf + cblks;
18871 + * buf_trans_combine - combine to incore transactions
18872 + * @sdp: the filesystem
18873 + * @tr: the surviving transaction
18874 + * @new_tr: the transaction that's going to disappear
18879 +buf_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
18880 + struct gfs_trans *new_tr)
18882 + tr->tr_num_buf += new_tr->tr_num_buf;
18886 + * increment_generation - increment the generation number in metadata buffer
18887 + * @sdp: the filesystem
18888 + * @bd: the struct gfs_bufdata structure associated with the buffer
18893 +increment_generation(struct gfs_sbd *sdp, struct gfs_bufdata *bd)
18895 + struct gfs_meta_header *mh, *mh2;
18898 + mh = (struct gfs_meta_header *)bd->bd_bh->b_data;
18900 + tmp64 = gfs64_to_cpu(mh->mh_generation) + 1;
18901 + tmp64 = cpu_to_gfs64(tmp64);
18903 + if (bd->bd_frozen) {
18904 + mh2 = (struct gfs_meta_header *)bd->bd_frozen;
18905 + GFS_ASSERT_SBD(mh->mh_generation == mh2->mh_generation, sdp,);
18906 + mh2->mh_generation = tmp64;
18908 + mh->mh_generation = tmp64;
18912 + * buf_build_bhlist - create the buffers that will make up the ondisk part of a transaction
18913 + * @sdp: the filesystem
18914 + * @tr: the transaction
18919 +buf_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
18921 + struct list_head *tmp, *head;
18922 + struct gfs_log_element *le;
18923 + struct gfs_bufdata *bd;
18924 + struct gfs_log_descriptor desc;
18925 + struct gfs_block_tag tag;
18926 + struct gfs_log_buf *clb = NULL;
18927 + unsigned int num_ctl;
18928 + unsigned int offset = sizeof(struct gfs_log_descriptor);
18929 + unsigned int x, bufs;
18931 + if (!tr->tr_num_buf)
18934 + /* set up control buffers for descriptor and tags */
18936 + num_ctl = gfs_struct2blk(sdp, tr->tr_num_buf,
18937 + sizeof(struct gfs_block_tag));
18939 + for (x = 0; x < num_ctl; x++) {
18941 + gfs_log_get_buf(sdp, tr);
18943 + clb = gfs_log_get_buf(sdp, tr);
18946 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
18947 + desc.ld_header.mh_magic = GFS_MAGIC;
18948 + desc.ld_header.mh_type = GFS_METATYPE_LD;
18949 + desc.ld_header.mh_format = GFS_FORMAT_LD;
18950 + desc.ld_type = GFS_LOG_DESC_METADATA;
18951 + desc.ld_length = num_ctl + tr->tr_num_buf;
18952 + desc.ld_data1 = tr->tr_num_buf;
18953 + gfs_desc_out(&desc, clb->lb_bh.b_data);
18958 + for (head = &tr->tr_elements, tmp = head->next;
18960 + tmp = tmp->next) {
18961 + le = list_entry(tmp, struct gfs_log_element, le_list);
18962 + if (le->le_ops != &gfs_buf_lops)
18964 + bd = container_of(le, struct gfs_bufdata, bd_incore_le);
18966 + gfs_meta_check(sdp, bd->bd_bh);
18968 + gfs_lock_buffer(bd->bd_bh);
18970 + increment_generation(sdp, bd);
18972 + gfs_log_fake_buf(sdp, tr,
18973 + (bd->bd_frozen) ? bd->bd_frozen : bd->bd_bh->b_data,
18976 + if (offset + sizeof(struct gfs_block_tag) > sdp->sd_sb.sb_bsize) {
18977 + clb = list_entry(clb->lb_list.prev,
18978 + struct gfs_log_buf, lb_list);
18979 + if (gfs_log_is_header(sdp, clb->lb_bh.b_blocknr))
18980 + clb = list_entry(clb->lb_list.prev,
18981 + struct gfs_log_buf, lb_list);
18986 + memset(&tag, 0, sizeof(struct gfs_block_tag));
18987 + tag.bt_blkno = bd->bd_bh->b_blocknr;
18989 + gfs_block_tag_out(&tag, clb->lb_bh.b_data + offset);
18991 + offset += sizeof(struct gfs_block_tag);
18995 + GFS_ASSERT_SBD(x == num_ctl, sdp,);
18996 + GFS_ASSERT_SBD(bufs == tr->tr_num_buf, sdp,);
19000 + * buf_before_scan - called before journal replay
19001 + * @sdp: the filesystem
19002 + * @jid: the journal ID about to be replayed
19003 + * @head: the current head of the log
19004 + * @pass: the pass through the journal
19009 +buf_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19010 + struct gfs_log_header *head, unsigned int pass)
19012 + if (pass == GFS_RECPASS_A1)
19013 + sdp->sd_recovery_replays =
19014 + sdp->sd_recovery_skips =
19015 + sdp->sd_recovery_sames = 0;
19019 + * replay_block - Replay a single metadata block
19020 + * @sdp: the filesystem
19021 + * @jdesc: the struct gfs_jindex structure for the journal being replayed
19022 + * @gl: the journal's glock
19023 + * @tag: the block tag describing the inplace location of the block
19024 + * @blkno: the location of the log's copy of the block
19026 + * Returns: 0 on success, -EXXX on failure
19030 +replay_block(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19031 + struct gfs_glock *gl, struct gfs_block_tag *tag, uint64_t blkno)
19033 + struct buffer_head *inplace_bh, *log_bh;
19034 + struct gfs_meta_header inplace_mh, log_mh;
19035 + int replay_block = TRUE;
19038 + gfs_replay_check(sdp);
19040 + /* Warning: Using a real buffer here instead of a tempbh can be bad
19041 + on a OS that won't support multiple simultaneous buffers for the
19042 + same block on different glocks. */
19044 + error = gfs_dread(sdp, tag->bt_blkno, gl,
19045 + DIO_START | DIO_WAIT, &inplace_bh);
19048 + gfs_meta_check(sdp, inplace_bh);
19049 + gfs_meta_header_in(&inplace_mh, inplace_bh->b_data);
19051 + error = gfs_dread(sdp, blkno, gl, DIO_START | DIO_WAIT, &log_bh);
19053 + brelse(inplace_bh);
19056 + gfs_meta_check(sdp, log_bh);
19057 + gfs_meta_header_in(&log_mh, log_bh->b_data);
19059 + if (log_mh.mh_generation < inplace_mh.mh_generation) {
19060 + replay_block = FALSE;
19061 + sdp->sd_recovery_skips++;
19062 + } else if (log_mh.mh_generation == inplace_mh.mh_generation) {
19063 + if (memcmp(log_bh->b_data,
19064 + inplace_bh->b_data,
19065 + sdp->sd_sb.sb_bsize) == 0) {
19066 + replay_block = FALSE;
19067 + sdp->sd_recovery_sames++;
19071 + if (replay_block) {
19072 + memcpy(inplace_bh->b_data,
19074 + sdp->sd_sb.sb_bsize);
19076 + error = gfs_replay_buf(gl, inplace_bh);
19078 + sdp->sd_recovery_replays++;
19082 + brelse(inplace_bh);
19088 + * buf_scan_elements - Replay a metadata log descriptor
19089 + * @sdp: the filesystem
19090 + * @jdesc: the struct gfs_jindex structure for the journal being replayed
19091 + * @gl: the journal's glock
19092 + * @start: the starting block of the descriptor
19093 + * @desc: the descriptor structure
19094 + * @pass: the pass through the journal
19096 + * Returns: 0 on success, -EXXX on failure
19100 +buf_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19101 + struct gfs_glock *gl, uint64_t start,
19102 + struct gfs_log_descriptor *desc, unsigned int pass)
19104 + struct gfs_block_tag tag;
19105 + struct buffer_head *bh;
19106 + uint64_t cblk = start;
19107 + unsigned int num_tags = desc->ld_data1;
19108 + unsigned int offset = sizeof(struct gfs_log_descriptor);
19112 + if (pass != GFS_RECPASS_A1)
19114 + if (desc->ld_type != GFS_LOG_DESC_METADATA)
19117 + x = gfs_struct2blk(sdp, num_tags, sizeof(struct gfs_block_tag));
19119 + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19125 + GFS_ASSERT_SBD(num_tags, sdp,);
19127 + error = gfs_dread(sdp, cblk, gl, DIO_START | DIO_WAIT, &bh);
19131 + /* Do readahead for the inplace blocks in this control block */
19133 + unsigned int o2 = offset;
19134 + unsigned int nt2 = num_tags;
19136 + while (o2 + sizeof(struct gfs_block_tag) <=
19137 + sdp->sd_sb.sb_bsize) {
19138 + gfs_block_tag_in(&tag, bh->b_data + o2);
19139 + gfs_start_ra(gl, tag.bt_blkno, 1);
19142 + o2 += sizeof(struct gfs_block_tag);
19146 + while (offset + sizeof(struct gfs_block_tag) <=
19147 + sdp->sd_sb.sb_bsize) {
19148 + gfs_block_tag_in(&tag, bh->b_data + offset);
19150 + error = replay_block(sdp, jdesc, gl, &tag, start);
19157 + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19161 + offset += sizeof(struct gfs_block_tag);
19166 + error = gfs_increment_blkno(sdp, jdesc, gl, &cblk, TRUE);
19182 + * buf_after_scan - called after journal replay
19183 + * @sdp: the filesystem
19184 + * @jid: the journal ID about to be replayed
19185 + * @pass: the pass through the journal
19190 +buf_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
19192 + if (pass == GFS_RECPASS_A1) {
19193 + printk("GFS: fsid=%s: jid=%u: Replayed %u of %u blocks\n",
19194 + sdp->sd_fsname, jid,
19195 + sdp->sd_recovery_replays,
19196 + sdp->sd_recovery_replays + sdp->sd_recovery_skips +
19197 + sdp->sd_recovery_sames);
19198 + printk("GFS: fsid=%s: jid=%u: replays = %u, skips = %u, sames = %u\n",
19199 + sdp->sd_fsname, jid, sdp->sd_recovery_replays,
19200 + sdp->sd_recovery_skips, sdp->sd_recovery_sames);
19205 + * unlinked_print - print debug info about a log element
19206 + * @sdp: the filesystem
19207 + * @le: the log element
19208 + * @where: is this a new transaction or a incore transaction
19213 +unlinked_print(struct gfs_sbd *sdp, struct gfs_log_element *le,
19214 + unsigned int where)
19216 + struct gfs_unlinked *ul;
19220 + case TRANS_IS_NEW:
19221 + ul = container_of(le, struct gfs_unlinked, ul_new_le);
19222 + type = (test_bit(ULF_NEW_UL, &ul->ul_flags)) ?
19223 + "unlink" : "dealloc";
19225 + case TRANS_IS_INCORE:
19226 + ul = container_of(le, struct gfs_unlinked, ul_incore_le);
19227 + type = (test_bit(ULF_INCORE_UL, &ul->ul_flags)) ?
19228 + "unlink" : "dealloc";
19231 + GFS_ASSERT_SBD(FALSE, sdp,);
19234 + printk(" unlinked: %"PRIu64"/%"PRIu64", %s\n",
19235 + ul->ul_inum.no_formal_ino, ul->ul_inum.no_addr,
19240 + * unlinked_incore_commit - commit this LE to the incore log
19241 + * @sdp: the filesystem
19242 + * @tr: the incore transaction this LE is a part of
19243 + * @le: the log element
19248 +unlinked_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
19249 + struct gfs_log_element *le)
19251 + struct gfs_unlinked *ul = container_of(le,
19252 + struct gfs_unlinked,
19254 + int n = !!test_bit(ULF_NEW_UL, &ul->ul_flags);
19255 + int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags);
19257 + if (ul->ul_incore_le.le_trans) {
19258 + GFS_ASSERT_SBD(ul->ul_incore_le.le_trans == tr, sdp,);
19259 + GFS_ASSERT_SBD(n != i, sdp,);
19261 + ul->ul_incore_le.le_trans = NULL;
19262 + list_del_init(&ul->ul_incore_le.le_list);
19263 + gfs_unlinked_put(sdp, ul);
19266 + GFS_ASSERT_SBD(tr->tr_num_iul, sdp,);
19267 + tr->tr_num_iul--;
19269 + GFS_ASSERT_SBD(tr->tr_num_ida, sdp,);
19270 + tr->tr_num_ida--;
19273 + gfs_unlinked_hold(sdp, ul);
19274 + ul->ul_incore_le.le_trans = tr;
19275 + list_add(&ul->ul_incore_le.le_list, &tr->tr_elements);
19278 + set_bit(ULF_INCORE_UL, &ul->ul_flags);
19279 + if (tr != le->le_trans)
19280 + tr->tr_num_iul++;
19282 + clear_bit(ULF_INCORE_UL, &ul->ul_flags);
19283 + if (tr != le->le_trans)
19284 + tr->tr_num_ida++;
19289 + gfs_unlinked_hold(sdp, ul);
19290 + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,);
19291 + set_bit(ULF_IC_LIST, &ul->ul_flags);
19292 + atomic_inc(&sdp->sd_unlinked_ic_count);
19294 + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags), sdp,);
19295 + clear_bit(ULF_IC_LIST, &ul->ul_flags);
19296 + gfs_unlinked_put(sdp, ul);
19297 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,);
19298 + atomic_dec(&sdp->sd_unlinked_ic_count);
19301 + le->le_trans = NULL;
19302 + list_del_init(&le->le_list);
19303 + gfs_unlinked_put(sdp, ul);
19307 + * unlinked_add_to_ail - Add this LE to the AIL
19308 + * @sdp: the filesystem
19309 + * @le: the log element
19314 +unlinked_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
19316 + struct gfs_unlinked *ul = container_of(le,
19317 + struct gfs_unlinked,
19319 + int i = !!test_bit(ULF_INCORE_UL, &ul->ul_flags);
19322 + gfs_unlinked_hold(sdp, ul);
19323 + GFS_ASSERT_SBD(!test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
19324 + set_bit(ULF_OD_LIST, &ul->ul_flags);
19325 + atomic_inc(&sdp->sd_unlinked_od_count);
19327 + GFS_ASSERT_SBD(test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
19328 + clear_bit(ULF_OD_LIST, &ul->ul_flags);
19329 + gfs_unlinked_put(sdp, ul);
19330 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,);
19331 + atomic_dec(&sdp->sd_unlinked_od_count);
19334 + le->le_trans = NULL;
19335 + list_del_init(&le->le_list);
19336 + gfs_unlinked_put(sdp, ul);
19340 + * unlinked_clean_dump - clean up a LE after a log dump
19341 + * @sdp: the filesystem
19342 + * @le: the log element
19347 +unlinked_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le)
19349 + le->le_trans = NULL;
19350 + list_del_init(&le->le_list);
19354 + * unlinked_trans_size - compute how much space the LE class takes up in a transaction
19355 + * @sdp: the filesystem
19356 + * @tr: the transaction
19357 + * @mblks: the number of regular metadata blocks
19358 + * @eblks: the number of extra blocks
19359 + * @blocks: the number of log blocks
19360 + * @bmem: the number of buffer-sized chunks of memory we need
19365 +unlinked_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
19366 + unsigned int *mblks, unsigned int *eblks,
19367 + unsigned int *blocks, unsigned int *bmem)
19369 + unsigned int ublks = 0;
19371 + if (tr->tr_num_iul)
19372 + ublks = gfs_struct2blk(sdp, tr->tr_num_iul,
19373 + sizeof(struct gfs_inum));
19374 + if (tr->tr_num_ida)
19375 + ublks += gfs_struct2blk(sdp, tr->tr_num_ida,
19376 + sizeof(struct gfs_inum));
19381 + *blocks += ublks;
19387 + * unlinked_trans_combine - combine to incore transactions
19388 + * @sdp: the filesystem
19389 + * @tr: the surviving transaction
19390 + * @new_tr: the transaction that's going to disappear
19395 +unlinked_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
19396 + struct gfs_trans *new_tr)
19398 + tr->tr_num_iul += new_tr->tr_num_iul;
19399 + tr->tr_num_ida += new_tr->tr_num_ida;
19403 + * unlinked_build_bhlist - create the buffers that will make up the ondisk part of a transaction
19404 + * @sdp: the filesystem
19405 + * @tr: the transaction
19410 +unlinked_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
19412 + struct list_head *tmp, *head;
19413 + struct gfs_log_element *le;
19414 + struct gfs_unlinked *ul;
19415 + struct gfs_log_descriptor desc;
19416 + struct gfs_log_buf *lb;
19417 + unsigned int pass = 2;
19418 + unsigned int type, number;
19419 + unsigned int offset, entries;
19422 + if (tr->tr_flags & TRF_LOG_DUMP) {
19424 + type = GFS_LOG_DESC_IUL;
19425 + number = tr->tr_num_iul;
19430 + type = GFS_LOG_DESC_IUL;
19431 + number = tr->tr_num_iul;
19433 + type = GFS_LOG_DESC_IDA;
19434 + number = tr->tr_num_ida;
19441 + lb = gfs_log_get_buf(sdp, tr);
19443 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
19444 + desc.ld_header.mh_magic = GFS_MAGIC;
19445 + desc.ld_header.mh_type = GFS_METATYPE_LD;
19446 + desc.ld_header.mh_format = GFS_FORMAT_LD;
19447 + desc.ld_type = type;
19448 + desc.ld_length = gfs_struct2blk(sdp, number, sizeof(struct gfs_inum));
19449 + desc.ld_data1 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE;
19450 + gfs_desc_out(&desc, lb->lb_bh.b_data);
19452 + offset = sizeof(struct gfs_log_descriptor);
19455 + for (head = &tr->tr_elements, tmp = head->next;
19457 + tmp = tmp->next) {
19458 + le = list_entry(tmp, struct gfs_log_element, le_list);
19459 + if (le->le_ops != &gfs_unlinked_lops)
19461 + if (tr->tr_flags & TRF_LOG_DUMP)
19462 + ul = container_of(le,
19463 + struct gfs_unlinked,
19466 + ul = container_of(le,
19467 + struct gfs_unlinked,
19469 + if (!!test_bit(ULF_INCORE_UL, &ul->ul_flags) != pass)
19473 + if (offset + sizeof(struct gfs_inum) > sdp->sd_sb.sb_bsize) {
19475 + lb = gfs_log_get_buf(sdp, tr);
19478 + gfs_inum_out(&ul->ul_inum,
19479 + lb->lb_bh.b_data + offset);
19481 + offset += sizeof(struct gfs_inum);
19485 + GFS_ASSERT_SBD(entries == number, sdp,);
19490 + * unlinked_dump_size - compute how much space the LE class takes up in a log dump
19491 + * @sdp: the filesystem
19492 + * @elements: the number of log elements in the dump
19493 + * @blocks: the number of blocks in the dump
19494 + * @bmem: the number of buffer-sized chunks of memory we need
19499 +unlinked_dump_size(struct gfs_sbd *sdp, unsigned int *elements,
19500 + unsigned int *blocks, unsigned int *bmem)
19502 + unsigned int c = atomic_read(&sdp->sd_unlinked_od_count);
19503 + unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_inum));
19514 + * unlinked_build_dump - create a transaction that represents a log dump for this LE class
19515 + * @sdp: the filesystem
19516 + * @tr: the transaction to fill
19521 +unlinked_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr)
19523 + struct list_head *tmp, *head;
19524 + struct gfs_unlinked *ul;
19525 + unsigned int x = 0;
19527 + tr->tr_num_iul = atomic_read(&sdp->sd_unlinked_od_count);
19529 + spin_lock(&sdp->sd_unlinked_lock);
19531 + for (head = &sdp->sd_unlinked_list, tmp = head->next;
19533 + tmp = tmp->next) {
19534 + ul = list_entry(tmp, struct gfs_unlinked, ul_list);
19535 + if (!test_bit(ULF_OD_LIST, &ul->ul_flags))
19538 + GFS_ASSERT_SBD(!ul->ul_ondisk_le.le_trans, sdp,);
19539 + ul->ul_ondisk_le.le_trans = tr;
19540 + list_add(&ul->ul_ondisk_le.le_list, &tr->tr_elements);
19545 + spin_unlock(&sdp->sd_unlinked_lock);
19547 + GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_unlinked_od_count), sdp,);
19551 + * unlinked_before_scan - called before a log dump is recovered
19552 + * @sdp: the filesystem
19553 + * @jid: the journal ID about to be scanned
19554 + * @head: the current head of the log
19555 + * @pass: the pass through the journal
19560 +unlinked_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19561 + struct gfs_log_header *head, unsigned int pass)
19563 + if (pass == GFS_RECPASS_B1)
19564 + clear_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags);
19568 + * unlinked_scan_elements - scan unlinked inodes from the journal
19569 + * @sdp: the filesystem
19570 + * @jdesc: the struct gfs_jindex structure for the journal being scaned
19571 + * @gl: the journal's glock
19572 + * @start: the starting block of the descriptor
19573 + * @desc: the descriptor structure
19574 + * @pass: the pass through the journal
19576 + * Returns: 0 on success, -EXXX on failure
19580 +unlinked_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19581 + struct gfs_glock *gl, uint64_t start,
19582 + struct gfs_log_descriptor *desc, unsigned int pass)
19584 + struct gfs_inum inum;
19585 + struct buffer_head *bh;
19586 + unsigned int offset = sizeof(struct gfs_log_descriptor);
19590 + if (pass != GFS_RECPASS_B1)
19593 + switch (desc->ld_type) {
19594 + case GFS_LOG_DESC_IUL:
19595 + if (test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags))
19596 + GFS_ASSERT_SBD(!desc->ld_data1, sdp,);
19598 + GFS_ASSERT_SBD(desc->ld_data1, sdp,);
19599 + set_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags);
19603 + case GFS_LOG_DESC_IDA:
19604 + GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags),
19612 + for (x = 0; x < desc->ld_length; x++) {
19613 + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
19618 + offset + sizeof(struct gfs_inum) <= sdp->sd_sb.sb_bsize;
19619 + offset += sizeof(struct gfs_inum)) {
19620 + gfs_inum_in(&inum, bh->b_data + offset);
19622 + if (inum.no_addr)
19623 + gfs_unlinked_merge(sdp, desc->ld_type, &inum);
19628 + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
19639 + * unlinked_after_scan - called after a log dump is recovered
19640 + * @sdp: the filesystem
19641 + * @jid: the journal ID about to be scanned
19642 + * @pass: the pass through the journal
19647 +unlinked_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
19649 + if (pass == GFS_RECPASS_B1) {
19650 + GFS_ASSERT_SBD(test_bit(SDF_FOUND_UL_DUMP, &sdp->sd_flags),
19652 + printk("GFS: fsid=%s: Found %d unlinked inodes\n",
19653 + sdp->sd_fsname, atomic_read(&sdp->sd_unlinked_ic_count));
19658 + * quota_print - print debug info about a log element
19659 + * @sdp: the filesystem
19660 + * @le: the log element
19661 + * @where: is this a new transaction or a incore transaction
19666 +quota_print(struct gfs_sbd *sdp, struct gfs_log_element *le, unsigned int where)
19668 + struct gfs_quota_le *ql;
19670 + ql = container_of(le, struct gfs_quota_le, ql_le);
19671 + printk(" quota: %s %u: %"PRId64" blocks\n",
19672 + (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ? "user" : "group",
19673 + ql->ql_data->qd_id, ql->ql_change);
19677 + * quota_incore_commit - commit this LE to the incore log
19678 + * @sdp: the filesystem
19679 + * @tr: the incore transaction this LE is a part of
19680 + * @le: the log element
19685 +quota_incore_commit(struct gfs_sbd *sdp, struct gfs_trans *tr,
19686 + struct gfs_log_element *le)
19688 + struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le);
19689 + struct gfs_quota_data *qd = ql->ql_data;
19691 + GFS_ASSERT_SBD(ql->ql_change, sdp,);
19693 + /* Make this change under the sd_quota_lock, so other processes
19694 + checking qd_change_ic don't have to acquire the log lock. */
19696 + spin_lock(&sdp->sd_quota_lock);
19697 + qd->qd_change_new -= ql->ql_change;
19698 + qd->qd_change_ic += ql->ql_change;
19699 + spin_unlock(&sdp->sd_quota_lock);
19701 + if (le->le_trans == tr)
19702 + list_add(&ql->ql_data_list, &qd->qd_le_list);
19704 + struct list_head *tmp, *head;
19705 + struct gfs_quota_le *tmp_ql;
19706 + int found = FALSE;
19708 + for (head = &qd->qd_le_list, tmp = head->next;
19710 + tmp = tmp->next) {
19711 + tmp_ql = list_entry(tmp, struct gfs_quota_le, ql_data_list);
19712 + if (tmp_ql->ql_le.le_trans != tr)
19715 + tmp_ql->ql_change += ql->ql_change;
19717 + list_del(&le->le_list);
19718 + gfs_quota_put(sdp, qd);
19721 + if (!tmp_ql->ql_change) {
19722 + list_del(&tmp_ql->ql_data_list);
19723 + list_del(&tmp_ql->ql_le.le_list);
19724 + gfs_quota_put(sdp, tmp_ql->ql_data);
19734 + le->le_trans = tr;
19735 + list_move(&le->le_list, &tr->tr_elements);
19737 + list_add(&ql->ql_data_list, &qd->qd_le_list);
19743 + * quota_add_to_ail - Add this LE to the AIL
19744 + * @sdp: the filesystem
19745 + * @le: the log element
19750 +quota_add_to_ail(struct gfs_sbd *sdp, struct gfs_log_element *le)
19752 + struct gfs_quota_le *ql = container_of(le, struct gfs_quota_le, ql_le);
19753 + struct gfs_quota_data *qd = ql->ql_data;
19755 + qd->qd_change_od += ql->ql_change;
19756 + if (qd->qd_change_od) {
19757 + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) {
19758 + gfs_quota_hold(sdp, qd);
19759 + set_bit(QDF_OD_LIST, &qd->qd_flags);
19760 + atomic_inc(&sdp->sd_quota_od_count);
19763 + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,);
19764 + clear_bit(QDF_OD_LIST, &qd->qd_flags);
19765 + gfs_quota_put(sdp, qd);
19766 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,);
19767 + atomic_dec(&sdp->sd_quota_od_count);
19770 + list_del(&ql->ql_data_list);
19771 + list_del(&le->le_list);
19772 + gfs_quota_put(sdp, qd);
19777 + * quota_clean_dump - clean up a LE after a log dump
19778 + * @sdp: the filesystem
19779 + * @le: the log element
19784 +quota_clean_dump(struct gfs_sbd *sdp, struct gfs_log_element *le)
19786 + le->le_trans = NULL;
19787 + list_del_init(&le->le_list);
19791 + * quota_trans_size - compute how much space the LE class takes up in a transaction
19792 + * @sdp: the filesystem
19793 + * @tr: the transaction
19794 + * @mblks: the number of regular metadata blocks
19795 + * @eblks: the number of extra blocks
19796 + * @blocks: the number of log blocks
19797 + * @bmem: the number of buffer-sized chunks of memory we need
19802 +quota_trans_size(struct gfs_sbd *sdp, struct gfs_trans *tr,
19803 + unsigned int *mblks, unsigned int *eblks,
19804 + unsigned int *blocks, unsigned int *bmem)
19806 + unsigned int qblks;
19808 + if (tr->tr_num_q) {
19809 + qblks = gfs_struct2blk(sdp, tr->tr_num_q,
19810 + sizeof(struct gfs_quota_tag));
19815 + *blocks += qblks;
19822 + * quota_trans_combine - combine to incore transactions
19823 + * @sdp: the filesystem
19824 + * @tr: the surviving transaction
19825 + * @new_tr: the transaction that's going to disappear
19830 +quota_trans_combine(struct gfs_sbd *sdp, struct gfs_trans *tr,
19831 + struct gfs_trans *new_tr)
19833 + tr->tr_num_q += new_tr->tr_num_q;
19837 + * quota_build_bhlist - create the buffers that will make up the ondisk part of a transaction
19838 + * @sdp: the filesystem
19839 + * @tr: the transaction
19844 +quota_build_bhlist(struct gfs_sbd *sdp, struct gfs_trans *tr)
19846 + struct list_head *tmp, *head;
19847 + struct gfs_log_element *le;
19848 + struct gfs_quota_le *ql;
19849 + struct gfs_log_descriptor desc;
19850 + struct gfs_quota_tag tag;
19851 + struct gfs_log_buf *lb;
19852 + unsigned int offset = sizeof(struct gfs_log_descriptor), entries = 0;
19854 + if (!tr->tr_num_q && !(tr->tr_flags & TRF_LOG_DUMP))
19857 + lb = gfs_log_get_buf(sdp, tr);
19859 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
19860 + desc.ld_header.mh_magic = GFS_MAGIC;
19861 + desc.ld_header.mh_type = GFS_METATYPE_LD;
19862 + desc.ld_header.mh_format = GFS_FORMAT_LD;
19863 + desc.ld_type = GFS_LOG_DESC_Q;
19864 + desc.ld_length = gfs_struct2blk(sdp, tr->tr_num_q,
19865 + sizeof(struct gfs_quota_tag));
19866 + desc.ld_data1 = tr->tr_num_q;
19867 + desc.ld_data2 = (tr->tr_flags & TRF_LOG_DUMP) ? TRUE : FALSE;
19868 + gfs_desc_out(&desc, lb->lb_bh.b_data);
19870 + for (head = &tr->tr_elements, tmp = head->next;
19872 + tmp = tmp->next) {
19873 + le = list_entry(tmp, struct gfs_log_element, le_list);
19874 + if (le->le_ops != &gfs_quota_lops)
19877 + ql = container_of(le, struct gfs_quota_le, ql_le);
19879 + if (offset + sizeof(struct gfs_quota_tag) >
19880 + sdp->sd_sb.sb_bsize) {
19882 + lb = gfs_log_get_buf(sdp, tr);
19885 + memset(&tag, 0, sizeof(struct gfs_quota_tag));
19886 + tag.qt_change = ql->ql_change;
19887 + tag.qt_flags = (test_bit(QDF_USER, &ql->ql_data->qd_flags)) ?
19888 + GFS_QTF_USER : 0;
19889 + tag.qt_id = ql->ql_data->qd_id;
19891 + gfs_quota_tag_out(&tag, lb->lb_bh.b_data + offset);
19893 + offset += sizeof(struct gfs_quota_tag);
19897 + GFS_ASSERT_SBD(entries == tr->tr_num_q, sdp,);
19901 + * quota_dump_size - compute how much space the LE class takes up in a log dump
19902 + * @sdp: the filesystem
19903 + * @elements: the number of log elements in the dump
19904 + * @blocks: the number of blocks in the dump
19905 + * @bmem: the number of buffer-sized chunks of memory we need
19910 +quota_dump_size(struct gfs_sbd *sdp, unsigned int *elements,
19911 + unsigned int *blocks, unsigned int *bmem)
19913 + unsigned int c = atomic_read(&sdp->sd_quota_od_count);
19914 + unsigned int b = gfs_struct2blk(sdp, c, sizeof(struct gfs_quota_tag));
19925 + * quota_build_dump - create a transaction that represents a log dump for this LE class
19926 + * @sdp: the filesystem
19927 + * @tr: the transaction to fill
19932 +quota_build_dump(struct gfs_sbd *sdp, struct gfs_trans *tr)
19934 + struct list_head *tmp, *head;
19935 + struct gfs_quota_data *qd;
19936 + struct gfs_quota_le *ql;
19937 + unsigned int x = 0;
19939 + tr->tr_num_q = atomic_read(&sdp->sd_quota_od_count);
19941 + spin_lock(&sdp->sd_quota_lock);
19943 + for (head = &sdp->sd_quota_list, tmp = head->next;
19945 + tmp = tmp->next) {
19946 + qd = list_entry(tmp, struct gfs_quota_data, qd_list);
19947 + if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
19950 + ql = &qd->qd_ondisk_ql;
19952 + ql->ql_le.le_ops = &gfs_quota_lops;
19953 + GFS_ASSERT_SBD(!ql->ql_le.le_trans, sdp,);
19954 + ql->ql_le.le_trans = tr;
19955 + list_add(&ql->ql_le.le_list, &tr->tr_elements);
19957 + ql->ql_data = qd;
19958 + ql->ql_change = qd->qd_change_od;
19963 + spin_unlock(&sdp->sd_quota_lock);
19965 + GFS_ASSERT_SBD(x == atomic_read(&sdp->sd_quota_od_count), sdp,);
19969 + * quota_before_scan - called before a log dump is recovered
19970 + * @sdp: the filesystem
19971 + * @jid: the journal ID about to be scanned
19972 + * @head: the current head of the log
19973 + * @pass: the pass through the journal
19978 +quota_before_scan(struct gfs_sbd *sdp, unsigned int jid,
19979 + struct gfs_log_header *head, unsigned int pass)
19981 + if (pass == GFS_RECPASS_B1)
19982 + clear_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags);
19986 + * quota_scan_elements - scan quota inodes from the journal
19987 + * @sdp: the filesystem
19988 + * @jdesc: the struct gfs_jindex structure for the journal being scaned
19989 + * @gl: the journal's glock
19990 + * @start: the starting block of the descriptor
19991 + * @desc: the descriptor structure
19992 + * @pass: the pass through the journal
19994 + * Returns: 0 on success, -EXXX on failure
19998 +quota_scan_elements(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
19999 + struct gfs_glock *gl, uint64_t start,
20000 + struct gfs_log_descriptor *desc, unsigned int pass)
20002 + struct gfs_quota_tag tag;
20003 + struct buffer_head *bh;
20004 + unsigned int num_tags = desc->ld_data1;
20005 + unsigned int offset = sizeof(struct gfs_log_descriptor);
20009 + if (pass != GFS_RECPASS_B1)
20011 + if (desc->ld_type != GFS_LOG_DESC_Q)
20014 + if (test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags))
20015 + GFS_ASSERT_SBD(!desc->ld_data2, sdp,);
20017 + GFS_ASSERT_SBD(desc->ld_data2, sdp,);
20018 + set_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags);
20024 + for (x = 0; x < desc->ld_length; x++) {
20025 + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
20029 + while (offset + sizeof(struct gfs_quota_tag) <=
20030 + sdp->sd_sb.sb_bsize) {
20031 + gfs_quota_tag_in(&tag, bh->b_data + offset);
20033 + error = gfs_quota_merge(sdp, &tag);
20040 + offset += sizeof(struct gfs_quota_tag);
20045 + error = gfs_increment_blkno(sdp, jdesc, gl, &start, TRUE);
20061 + * quota_after_scan - called after a log dump is recovered
20062 + * @sdp: the filesystem
20063 + * @jid: the journal ID about to be scanned
20064 + * @pass: the pass through the journal
20069 +quota_after_scan(struct gfs_sbd *sdp, unsigned int jid, unsigned int pass)
20071 + if (pass == GFS_RECPASS_B1) {
20072 + GFS_ASSERT_SBD(!sdp->sd_sb.sb_quota_di.no_formal_ino ||
20073 + test_bit(SDF_FOUND_Q_DUMP, &sdp->sd_flags),
20075 + printk("GFS: fsid=%s: Found quota changes for %d IDs\n",
20076 + sdp->sd_fsname, atomic_read(&sdp->sd_quota_od_count));
20080 +struct gfs_log_operations gfs_glock_lops = {
20081 + .lo_add = generic_le_add,
20082 + .lo_trans_end = glock_trans_end,
20083 + .lo_print = glock_print,
20084 + .lo_overlap_trans = glock_overlap_trans,
20085 + .lo_incore_commit = glock_incore_commit,
20086 + .lo_add_to_ail = glock_add_to_ail,
20087 + .lo_trans_combine = glock_trans_combine,
20088 + .lo_name = "glock"
20091 +struct gfs_log_operations gfs_buf_lops = {
20092 + .lo_add = generic_le_add,
20093 + .lo_print = buf_print,
20094 + .lo_incore_commit = buf_incore_commit,
20095 + .lo_add_to_ail = buf_add_to_ail,
20096 + .lo_trans_size = buf_trans_size,
20097 + .lo_trans_combine = buf_trans_combine,
20098 + .lo_build_bhlist = buf_build_bhlist,
20099 + .lo_before_scan = buf_before_scan,
20100 + .lo_scan_elements = buf_scan_elements,
20101 + .lo_after_scan = buf_after_scan,
20105 +struct gfs_log_operations gfs_unlinked_lops = {
20106 + .lo_add = generic_le_add,
20107 + .lo_print = unlinked_print,
20108 + .lo_incore_commit = unlinked_incore_commit,
20109 + .lo_add_to_ail = unlinked_add_to_ail,
20110 + .lo_clean_dump = unlinked_clean_dump,
20111 + .lo_trans_size = unlinked_trans_size,
20112 + .lo_trans_combine = unlinked_trans_combine,
20113 + .lo_build_bhlist = unlinked_build_bhlist,
20114 + .lo_dump_size = unlinked_dump_size,
20115 + .lo_build_dump = unlinked_build_dump,
20116 + .lo_before_scan = unlinked_before_scan,
20117 + .lo_scan_elements = unlinked_scan_elements,
20118 + .lo_after_scan = unlinked_after_scan,
20119 + .lo_name = "unlinked"
20122 +struct gfs_log_operations gfs_quota_lops = {
20123 + .lo_add = generic_le_add,
20124 + .lo_print = quota_print,
20125 + .lo_incore_commit = quota_incore_commit,
20126 + .lo_add_to_ail = quota_add_to_ail,
20127 + .lo_clean_dump = quota_clean_dump,
20128 + .lo_trans_size = quota_trans_size,
20129 + .lo_trans_combine = quota_trans_combine,
20130 + .lo_build_bhlist = quota_build_bhlist,
20131 + .lo_dump_size = quota_dump_size,
20132 + .lo_build_dump = quota_build_dump,
20133 + .lo_before_scan = quota_before_scan,
20134 + .lo_scan_elements = quota_scan_elements,
20135 + .lo_after_scan = quota_after_scan,
20136 + .lo_name = "quota"
20139 +struct gfs_log_operations *gfs_log_ops[] = {
20142 + &gfs_unlinked_lops,
20146 diff -urN linux-orig/fs/gfs/lops.h linux-patched/fs/gfs/lops.h
20147 --- linux-orig/fs/gfs/lops.h 1969-12-31 18:00:00.000000000 -0600
20148 +++ linux-patched/fs/gfs/lops.h 2004-06-30 13:27:49.348709970 -0500
20150 +/******************************************************************************
20151 +*******************************************************************************
20153 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20154 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20156 +** This copyrighted material is made available to anyone wishing to use,
20157 +** modify, copy, or redistribute it subject to the terms and conditions
20158 +** of the GNU General Public License v.2.
20160 +*******************************************************************************
20161 +******************************************************************************/
20163 +#ifndef __LOPS_DOT_H__
20164 +#define __LOPS_DOT_H__
20166 +extern struct gfs_log_operations gfs_glock_lops;
20167 +extern struct gfs_log_operations gfs_buf_lops;
20168 +extern struct gfs_log_operations gfs_unlinked_lops;
20169 +extern struct gfs_log_operations gfs_quota_lops;
20171 +extern struct gfs_log_operations *gfs_log_ops[];
20173 +#define INIT_LE(le, lops) \
20176 + (le)->le_ops = (lops); \
20177 + (le)->le_trans = NULL; \
20178 + INIT_LIST_HEAD(&(le)->le_list); \
20182 +#define LO_ADD(sdp, le) \
20185 + if ((le)->le_ops->lo_add) \
20186 + (le)->le_ops->lo_add((sdp), (le)); \
20190 +#define LO_TRANS_END(sdp, le) \
20193 + if ((le)->le_ops->lo_trans_end) \
20194 + (le)->le_ops->lo_trans_end((sdp), (le)); \
20198 +#define LO_PRINT(sdp, le, where) \
20201 + if ((le)->le_ops->lo_print) \
20202 + (le)->le_ops->lo_print((sdp), (le), (where)); \
20206 +static __inline__ struct gfs_trans *
20207 +LO_OVERLAP_TRANS(struct gfs_sbd *sdp, struct gfs_log_element *le)
20209 + if (le->le_ops->lo_overlap_trans)
20210 + return le->le_ops->lo_overlap_trans(sdp, le);
20215 +#define LO_INCORE_COMMIT(sdp, tr, le) \
20218 + if ((le)->le_ops->lo_incore_commit) \
20219 + (le)->le_ops->lo_incore_commit((sdp), (tr), (le)); \
20223 +#define LO_ADD_TO_AIL(sdp, le) \
20226 + if ((le)->le_ops->lo_add_to_ail) \
20227 + (le)->le_ops->lo_add_to_ail((sdp), (le)); \
20231 +#define LO_CLEAN_DUMP(sdp, le) \
20234 + if ((le)->le_ops->lo_clean_dump) \
20235 + (le)->le_ops->lo_clean_dump((sdp), (le)); \
20239 +#define LO_TRANS_SIZE(sdp, tr, mblks, eblks, blocks, bmem) \
20243 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20244 + if (gfs_log_ops[__lops_x]->lo_trans_size) \
20245 + gfs_log_ops[__lops_x]->lo_trans_size((sdp), (tr), (mblks), (eblks), (blocks), (bmem)); \
20249 +#define LO_TRANS_COMBINE(sdp, tr, new_tr) \
20253 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20254 + if (gfs_log_ops[__lops_x]->lo_trans_combine) \
20255 + gfs_log_ops[__lops_x]->lo_trans_combine((sdp), (tr), (new_tr)); \
20259 +#define LO_BUILD_BHLIST(sdp, tr) \
20263 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20264 + if (gfs_log_ops[__lops_x]->lo_build_bhlist) \
20265 + gfs_log_ops[__lops_x]->lo_build_bhlist((sdp), (tr)); \
20269 +#define LO_DUMP_SIZE(sdp, elements, blocks, bmem) \
20273 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20274 + if (gfs_log_ops[__lops_x]->lo_dump_size) \
20275 + gfs_log_ops[__lops_x]->lo_dump_size((sdp), (elements), (blocks), (bmem)); \
20279 +#define LO_BUILD_DUMP(sdp, tr) \
20283 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20284 + if (gfs_log_ops[__lops_x]->lo_build_dump) \
20285 + gfs_log_ops[__lops_x]->lo_build_dump((sdp), (tr)); \
20289 +#define LO_BEFORE_SCAN(sdp, jid, head, pass) \
20293 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20294 + if (gfs_log_ops[__lops_x]->lo_before_scan) \
20295 + gfs_log_ops[__lops_x]->lo_before_scan((sdp), (jid), (head), (pass)); \
20299 +static __inline__ int
20300 +LO_SCAN_ELEMENTS(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
20301 + struct gfs_glock *gl, uint64_t start,
20302 + struct gfs_log_descriptor *desc, unsigned int pass)
20307 + for (x = 0; gfs_log_ops[x]; x++)
20308 + if (gfs_log_ops[x]->lo_scan_elements) {
20309 + error = gfs_log_ops[x]->lo_scan_elements(sdp, jdesc, gl,
20310 + start, desc, pass);
20318 +#define LO_AFTER_SCAN(sdp, jid, pass) \
20322 + for (__lops_x = 0; gfs_log_ops[__lops_x]; __lops_x++) \
20323 + if (gfs_log_ops[__lops_x]->lo_after_scan) \
20324 + gfs_log_ops[__lops_x]->lo_after_scan((sdp), (jid), (pass)); \
20328 +#endif /* __LOPS_DOT_H__ */
20329 diff -urN linux-orig/fs/gfs/lvb.c linux-patched/fs/gfs/lvb.c
20330 --- linux-orig/fs/gfs/lvb.c 1969-12-31 18:00:00.000000000 -0600
20331 +++ linux-patched/fs/gfs/lvb.c 2004-06-30 13:27:49.349709738 -0500
20333 +/******************************************************************************
20334 +*******************************************************************************
20336 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20337 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20339 +** This copyrighted material is made available to anyone wishing to use,
20340 +** modify, copy, or redistribute it subject to the terms and conditions
20341 +** of the GNU General Public License v.2.
20343 +*******************************************************************************
20344 +******************************************************************************/
20346 +#include <linux/sched.h>
20347 +#include <linux/slab.h>
20348 +#include <linux/smp_lock.h>
20349 +#include <linux/spinlock.h>
20350 +#include <asm/semaphore.h>
20351 +#include <linux/completion.h>
20352 +#include <linux/buffer_head.h>
20356 +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member);
20358 +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
20359 +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
20360 +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));}
20361 +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));}
20362 +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));}
20363 +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));}
20364 +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));}
20365 +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));}
20368 + * gfs_rgrp_lvb_in - Read in rgrp data
20369 + * @rb: the cpu-order structure
20375 +gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb)
20377 + struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb;
20379 + CPIN_32(rb, str, rb_magic);
20380 + CPIN_32(rb, str, rb_free);
20381 + CPIN_32(rb, str, rb_useddi);
20382 + CPIN_32(rb, str, rb_freedi);
20383 + CPIN_32(rb, str, rb_usedmeta);
20384 + CPIN_32(rb, str, rb_freemeta);
20388 + * gfs_rgrp_lvb_out - Write out rgrp data
20389 + * @rb: the cpu-order structure
20395 +gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb)
20397 + struct gfs_rgrp_lvb *str = (struct gfs_rgrp_lvb *)lvb;
20399 + CPOUT_32(rb, str, rb_magic);
20400 + CPOUT_32(rb, str, rb_free);
20401 + CPOUT_32(rb, str, rb_useddi);
20402 + CPOUT_32(rb, str, rb_freedi);
20403 + CPOUT_32(rb, str, rb_usedmeta);
20404 + CPOUT_32(rb, str, rb_freemeta);
20408 + * gfs_rgrp_lvb_print - Print out rgrp data
20409 + * @rb: the cpu-order structure
20410 + * @console - TRUE if this should be printed to the console,
20411 + * FALSE if it should be just printed to the incore debug
20416 +gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb)
20418 + pv(rb, rb_magic, "%u");
20419 + pv(rb, rb_free, "%u");
20420 + pv(rb, rb_useddi, "%u");
20421 + pv(rb, rb_freedi, "%u");
20422 + pv(rb, rb_usedmeta, "%u");
20423 + pv(rb, rb_freemeta, "%u");
20427 + * gfs_quota_lvb_in - Read in quota data
20428 + * @rb: the cpu-order structure
20434 +gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb)
20436 + struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb;
20438 + CPIN_32(qb, str, qb_magic);
20439 + CPIN_32(qb, str, qb_pad);
20440 + CPIN_64(qb, str, qb_limit);
20441 + CPIN_64(qb, str, qb_warn);
20442 + CPIN_64(qb, str, qb_value);
20446 + * gfs_quota_lvb_out - Write out quota data
20447 + * @rb: the cpu-order structure
20453 +gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb)
20455 + struct gfs_quota_lvb *str = (struct gfs_quota_lvb *)lvb;
20457 + CPOUT_32(qb, str, qb_magic);
20458 + CPOUT_32(qb, str, qb_pad);
20459 + CPOUT_64(qb, str, qb_limit);
20460 + CPOUT_64(qb, str, qb_warn);
20461 + CPOUT_64(qb, str, qb_value);
20465 + * gfs_quota_lvb_print - Print out quota data
20466 + * @rb: the cpu-order structure
20467 + * @console - TRUE if this should be printed to the console,
20468 + * FALSE if it should be just printed to the incore debug
20473 +gfs_quota_lvb_print(struct gfs_quota_lvb *qb)
20475 + pv(qb, qb_magic, "%u");
20476 + pv(qb, qb_pad, "%u");
20477 + pv(qb, qb_limit, "%"PRIu64);
20478 + pv(qb, qb_warn, "%"PRIu64);
20479 + pv(qb, qb_value, "%"PRId64);
20481 diff -urN linux-orig/fs/gfs/lvb.h linux-patched/fs/gfs/lvb.h
20482 --- linux-orig/fs/gfs/lvb.h 1969-12-31 18:00:00.000000000 -0600
20483 +++ linux-patched/fs/gfs/lvb.h 2004-06-30 13:27:49.349709738 -0500
20485 +/******************************************************************************
20486 +*******************************************************************************
20488 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20489 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20491 +** This copyrighted material is made available to anyone wishing to use,
20492 +** modify, copy, or redistribute it subject to the terms and conditions
20493 +** of the GNU General Public License v.2.
20495 +*******************************************************************************
20496 +******************************************************************************/
20498 +#ifndef __LVB_DOT_H__
20499 +#define __LVB_DOT_H__
20501 +#define GFS_MIN_LVB_SIZE (32)
20503 +struct gfs_rgrp_lvb {
20504 + uint32_t rb_magic;
20505 + uint32_t rb_free;
20506 + uint32_t rb_useddi;
20507 + uint32_t rb_freedi;
20508 + uint32_t rb_usedmeta;
20509 + uint32_t rb_freemeta;
20512 +struct gfs_quota_lvb {
20513 + uint32_t qb_magic;
20515 + uint64_t qb_limit;
20516 + uint64_t qb_warn;
20517 + int64_t qb_value;
20520 +/* Translation functions */
20522 +void gfs_rgrp_lvb_in(struct gfs_rgrp_lvb *rb, char *lvb);
20523 +void gfs_rgrp_lvb_out(struct gfs_rgrp_lvb *rb, char *lvb);
20524 +void gfs_quota_lvb_in(struct gfs_quota_lvb *qb, char *lvb);
20525 +void gfs_quota_lvb_out(struct gfs_quota_lvb *qb, char *lvb);
20527 +/* Printing functions */
20529 +void gfs_rgrp_lvb_print(struct gfs_rgrp_lvb *rb);
20530 +void gfs_quota_lvb_print(struct gfs_quota_lvb *qb);
20532 +#endif /* __LVB_DOT_H__ */
20533 diff -urN linux-orig/fs/gfs/main.c linux-patched/fs/gfs/main.c
20534 --- linux-orig/fs/gfs/main.c 1969-12-31 18:00:00.000000000 -0600
20535 +++ linux-patched/fs/gfs/main.c 2004-06-30 13:27:49.349709738 -0500
20537 +/******************************************************************************
20538 +*******************************************************************************
20540 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20541 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20543 +** This copyrighted material is made available to anyone wishing to use,
20544 +** modify, copy, or redistribute it subject to the terms and conditions
20545 +** of the GNU General Public License v.2.
20547 +*******************************************************************************
20548 +******************************************************************************/
20550 +#include <linux/sched.h>
20551 +#include <linux/slab.h>
20552 +#include <linux/smp_lock.h>
20553 +#include <linux/spinlock.h>
20554 +#include <asm/semaphore.h>
20555 +#include <linux/completion.h>
20556 +#include <linux/buffer_head.h>
20557 +#include <linux/proc_fs.h>
20558 +#include <linux/module.h>
20559 +#include <linux/init.h>
20562 +#include "mount.h"
20563 +#include "ops_fstype.h"
20565 +struct proc_dir_entry *gfs_proc_entry = NULL;
20568 + * init_gfs_fs - Register GFS as a filesystem
20570 + * Returns: 0 on success, error code on failure
20578 + init_MUTEX(&gfs_mount_args_lock);
20580 + gfs_proc_entry = create_proc_read_entry("fs/gfs", S_IFREG | 0200, NULL, NULL, NULL);
20581 + if (!gfs_proc_entry) {
20582 + printk("GFS: can't register /proc/fs/gfs\n");
20586 + gfs_proc_entry->write_proc = gfs_proc_write;
20588 + gfs_random_number = xtime.tv_nsec;
20590 + gfs_glock_cachep = kmem_cache_create("gfs_glock", sizeof(struct gfs_glock),
20593 + if (!gfs_glock_cachep)
20596 + gfs_inode_cachep = kmem_cache_create("gfs_inode", sizeof(struct gfs_inode),
20599 + if (!gfs_inode_cachep)
20602 + gfs_bufdata_cachep = kmem_cache_create("gfs_bufdata", sizeof(struct gfs_bufdata),
20605 + if (!gfs_bufdata_cachep)
20608 + gfs_mhc_cachep = kmem_cache_create("gfs_meta_header_cache", sizeof(struct gfs_meta_header_cache),
20611 + if (!gfs_mhc_cachep)
20614 + error = register_filesystem(&gfs_fs_type);
20618 + printk("GFS %s (built %s %s) installed\n",
20619 + GFS_RELEASE_NAME, __DATE__, __TIME__);
20624 + if (gfs_mhc_cachep)
20625 + kmem_cache_destroy(gfs_mhc_cachep);
20627 + if (gfs_bufdata_cachep)
20628 + kmem_cache_destroy(gfs_bufdata_cachep);
20630 + if (gfs_inode_cachep)
20631 + kmem_cache_destroy(gfs_inode_cachep);
20633 + if (gfs_glock_cachep)
20634 + kmem_cache_destroy(gfs_glock_cachep);
20636 + down(&gfs_mount_args_lock);
20637 + if (gfs_mount_args) {
20638 + kfree(gfs_mount_args);
20639 + gfs_mount_args = NULL;
20641 + up(&gfs_mount_args_lock);
20642 + remove_proc_entry("fs/gfs", NULL);
20649 + * exit_gfs_fs - Unregister the file system
20656 + unregister_filesystem(&gfs_fs_type);
20658 + kmem_cache_destroy(gfs_mhc_cachep);
20659 + kmem_cache_destroy(gfs_bufdata_cachep);
20660 + kmem_cache_destroy(gfs_inode_cachep);
20661 + kmem_cache_destroy(gfs_glock_cachep);
20663 + down(&gfs_mount_args_lock);
20664 + if (gfs_mount_args) {
20665 + kfree(gfs_mount_args);
20666 + gfs_mount_args = NULL;
20668 + up(&gfs_mount_args_lock);
20669 + remove_proc_entry("fs/gfs", NULL);
20672 +MODULE_DESCRIPTION("Global File System " GFS_RELEASE_NAME);
20673 +MODULE_AUTHOR("Red Hat, Inc.");
20674 +MODULE_LICENSE("GPL");
20676 +module_init(init_gfs_fs);
20677 +module_exit(exit_gfs_fs);
20679 diff -urN linux-orig/fs/gfs/mount.c linux-patched/fs/gfs/mount.c
20680 --- linux-orig/fs/gfs/mount.c 1969-12-31 18:00:00.000000000 -0600
20681 +++ linux-patched/fs/gfs/mount.c 2004-06-30 13:27:49.349709738 -0500
20683 +/******************************************************************************
20684 +*******************************************************************************
20686 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20687 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20689 +** This copyrighted material is made available to anyone wishing to use,
20690 +** modify, copy, or redistribute it subject to the terms and conditions
20691 +** of the GNU General Public License v.2.
20693 +*******************************************************************************
20694 +******************************************************************************/
20696 +#include <linux/sched.h>
20697 +#include <linux/slab.h>
20698 +#include <linux/smp_lock.h>
20699 +#include <linux/spinlock.h>
20700 +#include <asm/semaphore.h>
20701 +#include <linux/completion.h>
20702 +#include <linux/buffer_head.h>
20703 +#include <linux/module.h>
20704 +#include <asm/uaccess.h>
20707 +#include "mount.h"
20709 +char *gfs_mount_args = NULL;
20710 +struct semaphore gfs_mount_args_lock;
20713 + * gfs_make_args - Parse mount arguments
20717 + * Return: 0 on success, -EXXX on failure
20721 +gfs_make_args(char *data, struct gfs_args *args)
20723 + char *options, *x, *y;
20724 + int do_free = FALSE;
20727 + /* If someone preloaded options, use those instead */
20729 + down(&gfs_mount_args_lock);
20730 + if (gfs_mount_args) {
20731 + data = gfs_mount_args;
20732 + gfs_mount_args = NULL;
20735 + up(&gfs_mount_args_lock);
20737 + /* Set some defaults */
20739 + memset(args, 0, sizeof(struct gfs_args));
20740 + args->ar_num_glockd = GFS_GLOCKD_DEFAULT;
20742 + /* Split the options into tokens with the "," character and
20745 + for (options = data; (x = strsep(&options, ",")); ) {
20749 + y = strchr(x, '=');
20753 + if (!strcmp(x, "lockproto")) {
20755 + printk("GFS: need argument to lockproto\n");
20759 + strncpy(args->ar_lockproto, y, 256);
20760 + args->ar_lockproto[255] = 0;
20763 + else if (!strcmp(x, "locktable")) {
20765 + printk("GFS: need argument to locktable\n");
20769 + strncpy(args->ar_locktable, y, 256);
20770 + args->ar_locktable[255] = 0;
20773 + else if (!strcmp(x, "hostdata")) {
20775 + printk("GFS: need argument to hostdata\n");
20779 + strncpy(args->ar_hostdata, y, 256);
20780 + args->ar_hostdata[255] = 0;
20783 + else if (!strcmp(x, "ignore_local_fs"))
20784 + args->ar_ignore_local_fs = TRUE;
20786 + else if (!strcmp(x, "localflocks"))
20787 + args->ar_localflocks = TRUE;
20789 + else if (!strcmp(x, "localcaching"))
20790 + args->ar_localcaching = TRUE;
20792 + else if (!strcmp(x, "upgrade"))
20793 + args->ar_upgrade = TRUE;
20795 + else if (!strcmp(x, "num_glockd")) {
20797 + printk("GFS: need argument to num_glockd\n");
20801 + sscanf(y, "%u", &args->ar_num_glockd);
20802 + if (!args->ar_num_glockd || args->ar_num_glockd > GFS_GLOCKD_MAX) {
20803 + printk("GFS: 0 < num_glockd <= %u (not %u)\n",
20804 + GFS_GLOCKD_MAX, args->ar_num_glockd);
20810 + else if (!strcmp(x, "acl"))
20811 + args->ar_posixacls = TRUE;
20816 + printk("GFS: unknown option: %s\n", x);
20823 + printk("GFS: invalid mount option(s)\n");
20832 + * gfs_proc_write - Read in some mount options
20834 + * @buffer: a buffer of mount options
20835 + * @count: the length of the mount options
20838 + * Called when someone writes to /proc/fs/gfs.
20839 + * It allows you to specify mount options when you can't do it
20840 + * from mount. i.e. from a inital ramdisk
20842 + * Returns: 0 on success, -EXXX on failure
20846 +gfs_proc_write(struct file *file,
20847 + const char *buffer, unsigned long count,
20853 + if (!try_module_get(THIS_MODULE))
20854 + return -EAGAIN; /* Huh!?! */
20855 + down(&gfs_mount_args_lock);
20857 + if (gfs_mount_args) {
20858 + kfree(gfs_mount_args);
20859 + gfs_mount_args = NULL;
20867 + gfs_mount_args = gmalloc(count + 1);
20870 + if (copy_from_user(gfs_mount_args, buffer, count))
20873 + gfs_mount_args[count] = 0;
20875 + /* Get rid of extra newlines */
20877 + for (p = gfs_mount_args; *p; p++)
20881 + up(&gfs_mount_args_lock);
20882 + module_put(THIS_MODULE);
20887 + kfree(gfs_mount_args);
20888 + gfs_mount_args = NULL;
20891 + up(&gfs_mount_args_lock);
20892 + module_put(THIS_MODULE);
20895 diff -urN linux-orig/fs/gfs/mount.h linux-patched/fs/gfs/mount.h
20896 --- linux-orig/fs/gfs/mount.h 1969-12-31 18:00:00.000000000 -0600
20897 +++ linux-patched/fs/gfs/mount.h 2004-06-30 13:27:49.349709738 -0500
20899 +/******************************************************************************
20900 +*******************************************************************************
20902 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20903 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20905 +** This copyrighted material is made available to anyone wishing to use,
20906 +** modify, copy, or redistribute it subject to the terms and conditions
20907 +** of the GNU General Public License v.2.
20909 +*******************************************************************************
20910 +******************************************************************************/
20912 +#ifndef __MOUNT_DOT_H__
20913 +#define __MOUNT_DOT_H__
20915 +int gfs_make_args(char *data, struct gfs_args *args);
20917 +/* Allow args to be passed to GFS when using an initial ram disk */
20919 +extern char *gfs_mount_args;
20920 +extern struct semaphore gfs_mount_args_lock;
20922 +int gfs_proc_write(struct file *file, const char *buffer,
20923 + unsigned long count, void *data);
20925 +#endif /* __MOUNT_DOT_H__ */
20926 diff -urN linux-orig/fs/gfs/ondisk.c linux-patched/fs/gfs/ondisk.c
20927 --- linux-orig/fs/gfs/ondisk.c 1969-12-31 18:00:00.000000000 -0600
20928 +++ linux-patched/fs/gfs/ondisk.c 2004-06-30 13:27:49.350709506 -0500
20930 +/******************************************************************************
20931 +*******************************************************************************
20933 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20934 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20936 +** This copyrighted material is made available to anyone wishing to use,
20937 +** modify, copy, or redistribute it subject to the terms and conditions
20938 +** of the GNU General Public License v.2.
20940 +*******************************************************************************
20941 +******************************************************************************/
20943 +#include <linux/sched.h>
20944 +#include <linux/slab.h>
20945 +#include <linux/smp_lock.h>
20946 +#include <linux/spinlock.h>
20947 +#include <asm/semaphore.h>
20948 +#include <linux/completion.h>
20949 +#include <linux/buffer_head.h>
20953 +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member);
20955 +#define WANT_GFS_CONVERSION_FUNCTIONS
20956 +#include <linux/gfs_ondisk.h>
20958 diff -urN linux-orig/fs/gfs/ops_address.c linux-patched/fs/gfs/ops_address.c
20959 --- linux-orig/fs/gfs/ops_address.c 1969-12-31 18:00:00.000000000 -0600
20960 +++ linux-patched/fs/gfs/ops_address.c 2004-06-30 13:27:49.350709506 -0500
20962 +/******************************************************************************
20963 +*******************************************************************************
20965 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
20966 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
20968 +** This copyrighted material is made available to anyone wishing to use,
20969 +** modify, copy, or redistribute it subject to the terms and conditions
20970 +** of the GNU General Public License v.2.
20972 +*******************************************************************************
20973 +******************************************************************************/
20975 +#include <linux/sched.h>
20976 +#include <linux/slab.h>
20977 +#include <linux/smp_lock.h>
20978 +#include <linux/spinlock.h>
20979 +#include <asm/semaphore.h>
20980 +#include <linux/completion.h>
20981 +#include <linux/buffer_head.h>
20982 +#include <linux/pagemap.h>
20988 +#include "glock.h"
20989 +#include "inode.h"
20990 +#include "ops_address.h"
20992 +#include "quota.h"
20993 +#include "trans.h"
20996 + * get_block - Fills in a buffer head with details about a block
20997 + * @inode: The inode
20998 + * @lblock: The block number to look up
20999 + * @bh_result: The buffer head to return the result in
21000 + * @create: Non-zero if we may add block to the file
21006 +get_block(struct inode *inode, sector_t lblock,
21007 + struct buffer_head *bh_result, int create)
21009 + struct gfs_inode *ip = vn2ip(inode);
21010 + int new = create;
21014 + error = gfs_block_map(ip, lblock, &new, &dblock, NULL);
21018 + GFS_ASSERT_INODE(dblock || !create, ip,);
21023 + map_bh(bh_result, inode->i_sb, dblock);
21025 + set_buffer_new(bh_result);
21031 + * get_block_noalloc - Fills in a buffer head with details about a block
21032 + * @inode: The inode
21033 + * @lblock: The block number to look up
21034 + * @bh_result: The buffer head to return the result in
21035 + * @create: Non-zero if we may add block to the file
21041 +get_block_noalloc(struct inode *inode, sector_t lblock,
21042 + struct buffer_head *bh_result, int create)
21046 + error = get_block(inode, lblock, bh_result, FALSE);
21048 + GFS_ASSERT_INODE(!create || buffer_mapped(bh_result),
21066 +get_blocks(struct inode *inode, sector_t lblock,
21067 + unsigned long max_blocks,
21068 + struct buffer_head *bh_result, int create)
21070 + struct gfs_inode *ip = vn2ip(inode);
21071 + int new = create;
21076 + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
21080 + GFS_ASSERT_INODE(dblock || !create, ip,);
21085 + map_bh(bh_result, inode->i_sb, dblock);
21087 + set_buffer_new(bh_result);
21089 + if (extlen > max_blocks)
21090 + extlen = max_blocks;
21091 + bh_result->b_size = extlen << inode->i_blkbits;
21097 + * get_blocks_noalloc -
21108 +get_blocks_noalloc(struct inode *inode, sector_t lblock,
21109 + unsigned long max_blocks,
21110 + struct buffer_head *bh_result, int create)
21114 + error = get_blocks(inode, lblock, max_blocks, bh_result, FALSE);
21116 + GFS_ASSERT_INODE(!create || buffer_mapped(bh_result),
21123 + * gfs_writepage - Write complete page
21124 + * @page: Page to write
21130 +gfs_writepage(struct page *page, struct writeback_control *wbc)
21132 + struct gfs_inode *ip = vn2ip(page->mapping->host);
21135 + atomic_inc(&ip->i_sbd->sd_ops_address);
21137 + GFS_ASSERT_INODE(gfs_glock_is_held_excl(ip->i_gl) &&
21138 + !gfs_is_stuffed(ip), ip,);
21140 + error = block_write_full_page(page, get_block_noalloc, wbc);
21142 + gfs_flush_meta_cache(ip);
21144 + if (error == -EIO)
21145 + gfs_io_error_inode(ip);
21151 + * stuffed_readpage - Fill in a Linux page with stuffed file data
21153 + * @page: the page
21159 +stuffed_readpage(struct gfs_inode *ip, struct page *page)
21161 + struct buffer_head *dibh;
21165 + GFS_ASSERT_INODE(PageLocked(page), ip,);
21167 + error = gfs_get_inode_buffer(ip, &dibh);
21169 + kaddr = kmap(page);
21170 + memcpy((char *)kaddr,
21171 + dibh->b_data + sizeof(struct gfs_dinode),
21172 + ip->i_di.di_size);
21173 + memset((char *)kaddr + ip->i_di.di_size,
21175 + PAGE_CACHE_SIZE - ip->i_di.di_size);
21180 + SetPageUptodate(page);
21187 + * readi_readpage - readpage that goes through gfs_internal_read()
21188 + * @page: The page to read
21194 +readi_readpage(struct page *page)
21196 + struct gfs_inode *ip = vn2ip(page->mapping->host);
21200 + kaddr = kmap(page);
21202 + ret = gfs_internal_read(ip, kaddr,
21203 + (uint64_t)page->index << PAGE_CACHE_SHIFT,
21204 + PAGE_CACHE_SIZE);
21206 + if (ret < PAGE_CACHE_SIZE)
21207 + memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
21208 + SetPageUptodate(page);
21214 + unlock_page(page);
21220 + * gfs_readpage - readpage with locking
21221 + * @file: The file to read a page for
21222 + * @page: The page to read
21228 +gfs_readpage(struct file *file, struct page *page)
21230 + struct gfs_inode *ip = vn2ip(page->mapping->host);
21233 + atomic_inc(&ip->i_sbd->sd_ops_address);
21235 + if (!gfs_glock_is_locked_by_me(ip->i_gl)) {
21236 + unlock_page(page);
21237 + bitch_about(ip->i_sbd, &ip->i_sbd->sd_last_unlocked_aop,
21238 + "unlocked readpage request");
21242 + if (!gfs_is_jdata(ip)) {
21243 + if (gfs_is_stuffed(ip) && !page->index) {
21244 + error = stuffed_readpage(ip, page);
21245 + unlock_page(page);
21247 + error = block_read_full_page(page, get_block);
21249 + error = readi_readpage(page);
21251 + if (error == -EIO)
21252 + gfs_io_error_inode(ip);
21258 + * gfs_prepare_write - Prepare to write to a file
21259 + * @file: The file to write to
21260 + * @page: The page which is to be prepared for writing
21261 + * @from: From (byte range within page)
21262 + * @to: To (byte range within page)
21268 +gfs_prepare_write(struct file *file, struct page *page,
21269 + unsigned from, unsigned to)
21271 + struct gfs_inode *ip = vn2ip(page->mapping->host);
21272 + struct gfs_sbd *sdp = ip->i_sbd;
21275 + atomic_inc(&sdp->sd_ops_address);
21277 + if (!gfs_glock_is_locked_by_me(ip->i_gl)) {
21278 + bitch_about(sdp, &sdp->sd_last_unlocked_aop,
21279 + "unlocked prepare_write request");
21283 + if (gfs_is_stuffed(ip)) {
21284 + uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
21286 + if (file_size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) {
21287 + error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, page);
21289 + error = block_prepare_write(page, from, to, get_block);
21290 + } else if (!PageUptodate(page))
21291 + error = stuffed_readpage(ip, page);
21293 + error = block_prepare_write(page, from, to, get_block);
21295 + if (error == -EIO)
21296 + gfs_io_error_inode(ip);
21302 + * gfs_commit_write - Commit write to a file
21303 + * @file: The file to write to
21304 + * @page: The page containing the data
21305 + * @from: From (byte range within page)
21306 + * @to: To (byte range within page)
21312 +gfs_commit_write(struct file *file, struct page *page,
21313 + unsigned from, unsigned to)
21315 + struct inode *inode = page->mapping->host;
21316 + struct gfs_inode *ip = vn2ip(inode);
21317 + struct gfs_sbd *sdp = ip->i_sbd;
21320 + atomic_inc(&sdp->sd_ops_address);
21322 + if (gfs_is_stuffed(ip)) {
21323 + struct buffer_head *dibh;
21324 + uint64_t file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
21327 + GFS_ASSERT_INODE(PageLocked(page), ip,);
21329 + error = gfs_get_inode_buffer(ip, &dibh);
21333 + gfs_trans_add_bh(ip->i_gl, dibh);
21335 + kaddr = kmap(page);
21336 + memcpy(dibh->b_data + sizeof(struct gfs_dinode) + from,
21337 + (char *)kaddr + from,
21343 + SetPageUptodate(page);
21345 + if (inode->i_size < file_size)
21346 + i_size_write(inode, file_size);
21348 + error = generic_commit_write(file, page, from, to);
21356 + ClearPageUptodate(page);
21362 + * gfs_bmap - Block map function
21363 + * @mapping: Address space info
21364 + * @lblock: The block to map
21366 + * Returns: The disk address for the block or 0 on hole or error
21370 +gfs_bmap(struct address_space *mapping, sector_t lblock)
21372 + struct gfs_inode *ip = vn2ip(mapping->host);
21373 + struct gfs_holder i_gh;
21377 + atomic_inc(&ip->i_sbd->sd_ops_address);
21379 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
21383 + if (!gfs_is_stuffed(ip))
21384 + dblock = generic_block_bmap(mapping, lblock, get_block);
21386 + gfs_glock_dq_uninit(&i_gh);
21392 + * gfs_direct_IO -
21403 +gfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
21404 + loff_t offset, unsigned long nr_segs)
21406 + struct file *file = iocb->ki_filp;
21407 + struct inode *inode = file->f_mapping->host;
21408 + struct gfs_inode *ip = vn2ip(inode);
21409 + get_blocks_t *gb = get_blocks;
21412 + atomic_inc(&ip->i_sbd->sd_ops_address);
21414 + GFS_ASSERT_INODE(gfs_glock_is_locked_by_me(ip->i_gl), ip,);
21415 + GFS_ASSERT_INODE(!gfs_is_stuffed(ip), ip,);
21417 + if (rw == WRITE && !current_transaction)
21418 + gb = get_blocks_noalloc;
21420 + error = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
21421 + offset, nr_segs, gb, NULL);
21423 + if (error == -EIO)
21424 + gfs_io_error_inode(ip);
21429 +struct address_space_operations gfs_file_aops = {
21430 + .writepage = gfs_writepage,
21431 + .readpage = gfs_readpage,
21432 + .sync_page = block_sync_page,
21433 + .prepare_write = gfs_prepare_write,
21434 + .commit_write = gfs_commit_write,
21435 + .bmap = gfs_bmap,
21436 + .direct_IO = gfs_direct_IO,
21438 diff -urN linux-orig/fs/gfs/ops_address.h linux-patched/fs/gfs/ops_address.h
21439 --- linux-orig/fs/gfs/ops_address.h 1969-12-31 18:00:00.000000000 -0600
21440 +++ linux-patched/fs/gfs/ops_address.h 2004-06-30 13:27:49.350709506 -0500
21442 +/******************************************************************************
21443 +*******************************************************************************
21445 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
21446 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
21448 +** This copyrighted material is made available to anyone wishing to use,
21449 +** modify, copy, or redistribute it subject to the terms and conditions
21450 +** of the GNU General Public License v.2.
21452 +*******************************************************************************
21453 +******************************************************************************/
21455 +#ifndef __OPS_ADDRESS_DOT_H__
21456 +#define __OPS_ADDRESS_DOT_H__
21458 +extern struct address_space_operations gfs_file_aops;
21460 +#endif /* __OPS_ADDRESS_DOT_H__ */
21461 diff -urN linux-orig/fs/gfs/ops_dentry.c linux-patched/fs/gfs/ops_dentry.c
21462 --- linux-orig/fs/gfs/ops_dentry.c 1969-12-31 18:00:00.000000000 -0600
21463 +++ linux-patched/fs/gfs/ops_dentry.c 2004-06-30 13:27:49.350709506 -0500
21465 +/******************************************************************************
21466 +*******************************************************************************
21468 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
21469 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
21471 +** This copyrighted material is made available to anyone wishing to use,
21472 +** modify, copy, or redistribute it subject to the terms and conditions
21473 +** of the GNU General Public License v.2.
21475 +*******************************************************************************
21476 +******************************************************************************/
21478 +#include <linux/sched.h>
21479 +#include <linux/slab.h>
21480 +#include <linux/smp_lock.h>
21481 +#include <linux/spinlock.h>
21482 +#include <asm/semaphore.h>
21483 +#include <linux/completion.h>
21484 +#include <linux/buffer_head.h>
21488 +#include "glock.h"
21489 +#include "ops_dentry.h"
21492 + * gfs_drevalidate - Check directory lookup consistency
21493 + * @dentry: the mapping to check
21496 + * Check to make sure the lookup necessary to arrive at this inode from its
21497 + * parent is still good.
21499 + * Returns: 1 if the dentry is ok, 0 if it isn't
21503 +gfs_drevalidate(struct dentry *dentry, struct nameidata *nd)
21505 + struct dentry *parent = dget_parent(dentry);
21506 + struct gfs_inode *dip;
21507 + struct inode *inode;
21508 + struct gfs_holder d_gh;
21509 + struct gfs_inode *ip;
21510 + struct gfs_inum inum;
21511 + unsigned int type;
21516 + dip = vn2ip(parent->d_inode);
21517 + GFS_ASSERT(dip,);
21519 + atomic_inc(&dip->i_sbd->sd_ops_dentry);
21521 + if (dip->i_sbd->sd_args.ar_localcaching)
21524 + inode = dentry->d_inode;
21525 + if (inode && is_bad_inode(inode))
21528 + error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
21532 + error = gfs_dir_search(dip, &dentry->d_name, &inum, &type);
21536 + goto invalid_gunlock;
21540 + goto valid_gunlock;
21541 + goto invalid_gunlock;
21543 + goto fail_gunlock;
21546 + ip = vn2ip(inode);
21547 + GFS_ASSERT_SBD(ip, dip->i_sbd,);
21549 + if (ip->i_num.no_formal_ino != inum.no_formal_ino)
21550 + goto invalid_gunlock;
21552 + GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
21555 + gfs_glock_dq_uninit(&d_gh);
21563 + gfs_glock_dq_uninit(&d_gh);
21566 + if (inode && S_ISDIR(inode->i_mode)) {
21567 + if (have_submounts(dentry))
21569 + shrink_dcache_parent(dentry);
21578 + gfs_glock_dq_uninit(&d_gh);
21586 +struct dentry_operations gfs_dops = {
21587 + .d_revalidate = gfs_drevalidate,
21589 diff -urN linux-orig/fs/gfs/ops_dentry.h linux-patched/fs/gfs/ops_dentry.h
21590 --- linux-orig/fs/gfs/ops_dentry.h 1969-12-31 18:00:00.000000000 -0600
21591 +++ linux-patched/fs/gfs/ops_dentry.h 2004-06-30 13:27:49.351709274 -0500
21593 +/******************************************************************************
21594 +*******************************************************************************
21596 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
21597 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
21599 +** This copyrighted material is made available to anyone wishing to use,
21600 +** modify, copy, or redistribute it subject to the terms and conditions
21601 +** of the GNU General Public License v.2.
21603 +*******************************************************************************
21604 +******************************************************************************/
21606 +#ifndef __OPS_DENTRY_DOT_H__
21607 +#define __OPS_DENTRY_DOT_H__
21609 +extern struct dentry_operations gfs_dops;
21611 +#endif /* __OPS_DENTRY_DOT_H__ */
21612 diff -urN linux-orig/fs/gfs/ops_export.c linux-patched/fs/gfs/ops_export.c
21613 --- linux-orig/fs/gfs/ops_export.c 1969-12-31 18:00:00.000000000 -0600
21614 +++ linux-patched/fs/gfs/ops_export.c 2004-06-30 13:27:49.351709274 -0500
21616 +/******************************************************************************
21617 +*******************************************************************************
21619 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
21620 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
21622 +** This copyrighted material is made available to anyone wishing to use,
21623 +** modify, copy, or redistribute it subject to the terms and conditions
21624 +** of the GNU General Public License v.2.
21626 +*******************************************************************************
21627 +******************************************************************************/
21629 +#include <linux/sched.h>
21630 +#include <linux/slab.h>
21631 +#include <linux/smp_lock.h>
21632 +#include <linux/spinlock.h>
21633 +#include <asm/semaphore.h>
21634 +#include <linux/completion.h>
21635 +#include <linux/buffer_head.h>
21640 +#include "glock.h"
21641 +#include "glops.h"
21642 +#include "inode.h"
21643 +#include "ops_export.h"
21646 +struct inode_cookie
21648 + uint64_t formal_ino;
21653 +struct get_name_filldir
21655 + uint64_t formal_ino;
21660 + * gfs_decode_fh -
21661 + * @param1: description
21662 + * @param2: description
21663 + * @param3: description
21665 + * Function description
21667 + * Returns: what is returned
21671 +gfs_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, int fh_type,
21672 + int (*acceptable)(void *context, struct dentry *dentry),
21675 + struct inode_cookie this, parent;
21677 + atomic_inc(&vfs2sdp(sb)->sd_ops_export);
21679 + if (fh_type != fh_len)
21682 + memset(&parent, 0, sizeof(struct inode_cookie));
21684 + switch (fh_type) {
21686 + parent.gen_valid = TRUE;
21687 + parent.gen = fh[5];
21689 + parent.formal_ino = ((uint64_t)gfs32_to_cpu(fh[3])) << 32;
21690 + parent.formal_ino |= (uint64_t)gfs32_to_cpu(fh[4]);
21692 + this.gen_valid = TRUE;
21693 + this.gen = gfs32_to_cpu(fh[2]);
21694 + this.formal_ino = ((uint64_t)gfs32_to_cpu(fh[0])) << 32;
21695 + this.formal_ino |= (uint64_t)gfs32_to_cpu(fh[1]);
21701 + return gfs_export_ops.find_exported_dentry(sb, &this, &parent,
21702 + acceptable, context);
21706 + * gfs_encode_fh -
21707 + * @param1: description
21708 + * @param2: description
21709 + * @param3: description
21711 + * Function description
21713 + * Returns: what is returned
21717 +gfs_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
21720 + struct inode *inode = dentry->d_inode;
21721 + struct gfs_inode *ip = vn2ip(inode);
21722 + int maxlen = *len;
21724 + atomic_inc(&ip->i_sbd->sd_ops_export);
21729 + fh[0] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32));
21730 + fh[1] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF));
21731 + fh[2] = cpu_to_gfs32(inode->i_generation);
21734 + if (maxlen < 5 || !connectable)
21737 + spin_lock(&dentry->d_lock);
21739 + inode = dentry->d_parent->d_inode;
21740 + ip = vn2ip(inode);
21742 + fh[3] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino >> 32));
21743 + fh[4] = cpu_to_gfs32((uint32_t)(ip->i_num.no_formal_ino & 0xFFFFFFFF));
21746 + if (maxlen < 6) {
21747 + spin_unlock(&dentry->d_lock);
21751 + fh[5] = cpu_to_gfs32(inode->i_generation);
21753 + spin_unlock(&dentry->d_lock);
21761 + * get_name_filldir -
21762 + * @param1: description
21763 + * @param2: description
21764 + * @param3: description
21766 + * Function description
21768 + * Returns: what is returned
21772 +get_name_filldir(void *opaque,
21773 + const char *name, unsigned int length,
21775 + struct gfs_inum *inum, unsigned int type)
21777 + struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
21779 + if (inum->no_formal_ino != gnfd->formal_ino)
21782 + memcpy(gnfd->name, name, length);
21783 + gnfd->name[length] = 0;
21790 + * @param1: description
21791 + * @param2: description
21792 + * @param3: description
21794 + * Function description
21796 + * Returns: what is returned
21799 +int gfs_get_name(struct dentry *parent, char *name,
21800 + struct dentry *child)
21802 + struct inode *dir = parent->d_inode;
21803 + struct inode *inode = child->d_inode;
21804 + struct gfs_inode *dip, *ip;
21805 + struct get_name_filldir gnfd;
21806 + struct gfs_holder gh;
21807 + uint64_t offset = 0;
21813 + atomic_inc(&vfs2sdp(dir->i_sb)->sd_ops_export);
21815 + if (!S_ISDIR(dir->i_mode) || !inode)
21818 + dip = vn2ip(dir);
21819 + ip = vn2ip(inode);
21822 + gnfd.formal_ino = ip->i_num.no_formal_ino;
21823 + gnfd.name = name;
21825 + error = gfs_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
21829 + error = gfs_dir_read(dip, &offset, &gnfd, get_name_filldir);
21831 + gfs_glock_dq_uninit(&gh);
21833 + if (!error & !*name)
21840 + * gfs_get_parent -
21841 + * @param1: description
21842 + * @param2: description
21843 + * @param3: description
21845 + * Function description
21847 + * Returns: what is returned
21851 +gfs_get_parent(struct dentry *child)
21853 + struct gfs_inode *dip = vn2ip(child->d_inode);
21854 + struct gfs_holder d_gh, i_gh;
21855 + struct qstr dotdot = { .name = "..", .len = 2 };
21856 + struct gfs_inode *ip;
21857 + struct inode *inode;
21858 + struct dentry *dentry;
21861 + atomic_inc(&dip->i_sbd->sd_ops_export);
21863 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
21864 + error = gfs_lookupi(&d_gh, &dotdot, TRUE, &i_gh);
21872 + ip = gl2ip(i_gh.gh_gl);
21874 + gfs_glock_dq_uninit(&d_gh);
21875 + gfs_glock_dq_uninit(&i_gh);
21877 + inode = gfs_iget(ip, CREATE);
21878 + gfs_inode_put(ip);
21881 + return ERR_PTR(-ENOMEM);
21883 + dentry = d_alloc_anon(inode);
21886 + return ERR_PTR(-ENOMEM);
21892 + gfs_holder_uninit(&d_gh);
21893 + return ERR_PTR(error);
21897 + * gfs_get_dentry -
21898 + * @param1: description
21899 + * @param2: description
21900 + * @param3: description
21902 + * Function description
21904 + * Returns: what is returned
21908 +gfs_get_dentry(struct super_block *sb, void *inump)
21910 + struct gfs_sbd *sdp = vfs2sdp(sb);
21911 + struct inode_cookie *cookie = (struct inode_cookie *)inump;
21912 + struct gfs_inum inum;
21913 + struct gfs_holder i_gh, ri_gh, rgd_gh;
21914 + struct gfs_rgrpd *rgd;
21915 + struct buffer_head *bh;
21916 + struct gfs_dinode *di;
21917 + struct gfs_inode *ip;
21918 + struct inode *inode;
21919 + struct dentry *dentry;
21922 + atomic_inc(&sdp->sd_ops_export);
21924 + if (!cookie->formal_ino ||
21925 + cookie->formal_ino == sdp->sd_jiinode->i_num.no_formal_ino ||
21926 + cookie->formal_ino == sdp->sd_riinode->i_num.no_formal_ino ||
21927 + cookie->formal_ino == sdp->sd_qinode->i_num.no_formal_ino ||
21928 + cookie->formal_ino == sdp->sd_linode->i_num.no_formal_ino)
21929 + return ERR_PTR(-EINVAL);
21931 + inum.no_formal_ino = cookie->formal_ino;
21932 + inum.no_addr = cookie->formal_ino;
21934 + error = gfs_glock_nq_num(sdp,
21935 + inum.no_formal_ino, &gfs_inode_glops,
21936 + LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
21939 + return ERR_PTR(error);
21941 + error = gfs_inode_get(i_gh.gh_gl, &inum, NO_CREATE, &ip);
21947 + error = gfs_rindex_hold(sdp, &ri_gh);
21952 + rgd = gfs_blk2rgrpd(sdp, inum.no_addr);
21954 + goto fail_rindex;
21956 + error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
21958 + goto fail_rindex;
21961 + if (gfs_get_block_type(rgd, inum.no_addr) != GFS_BLKST_USEDMETA)
21964 + error = gfs_dread(sdp, inum.no_addr, i_gh.gh_gl,
21965 + DIO_START | DIO_WAIT, &bh);
21969 + di = (struct gfs_dinode *)bh->b_data;
21972 + if (gfs32_to_cpu(di->di_header.mh_magic) != GFS_MAGIC ||
21973 + gfs32_to_cpu(di->di_header.mh_type) != GFS_METATYPE_DI ||
21974 + (gfs32_to_cpu(di->di_flags) & GFS_DIF_UNUSED))
21978 + gfs_glock_dq_uninit(&rgd_gh);
21979 + gfs_glock_dq_uninit(&ri_gh);
21981 + error = gfs_inode_get(i_gh.gh_gl, &inum, CREATE, &ip);
21985 + atomic_inc(&sdp->sd_fh2dentry_misses);
21988 + gfs_glock_dq_uninit(&i_gh);
21990 + inode = gfs_iget(ip, CREATE);
21991 + gfs_inode_put(ip);
21994 + return ERR_PTR(-ENOMEM);
21996 + if (cookie->gen_valid && cookie->gen != inode->i_generation) {
21998 + return ERR_PTR(-ESTALE);
22001 + dentry = d_alloc_anon(inode);
22004 + return ERR_PTR(-ENOMEM);
22013 + gfs_glock_dq_uninit(&rgd_gh);
22016 + gfs_glock_dq_uninit(&ri_gh);
22019 + gfs_glock_dq_uninit(&i_gh);
22020 + return ERR_PTR(error);
22023 +struct export_operations gfs_export_ops = {
22024 + .decode_fh = gfs_decode_fh,
22025 + .encode_fh = gfs_encode_fh,
22026 + .get_name = gfs_get_name,
22027 + .get_parent = gfs_get_parent,
22028 + .get_dentry = gfs_get_dentry,
22031 diff -urN linux-orig/fs/gfs/ops_export.h linux-patched/fs/gfs/ops_export.h
22032 --- linux-orig/fs/gfs/ops_export.h 1969-12-31 18:00:00.000000000 -0600
22033 +++ linux-patched/fs/gfs/ops_export.h 2004-06-30 13:27:49.351709274 -0500
22035 +/******************************************************************************
22036 +*******************************************************************************
22038 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
22039 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
22041 +** This copyrighted material is made available to anyone wishing to use,
22042 +** modify, copy, or redistribute it subject to the terms and conditions
22043 +** of the GNU General Public License v.2.
22045 +*******************************************************************************
22046 +******************************************************************************/
22048 +#ifndef __OPS_EXPORT_DOT_H__
22049 +#define __OPS_EXPORT_DOT_H__
22051 +extern struct export_operations gfs_export_ops;
22053 +#endif /* __OPS_EXPORT_DOT_H__ */
22054 diff -urN linux-orig/fs/gfs/ops_file.c linux-patched/fs/gfs/ops_file.c
22055 --- linux-orig/fs/gfs/ops_file.c 1969-12-31 18:00:00.000000000 -0600
22056 +++ linux-patched/fs/gfs/ops_file.c 2004-06-30 13:27:49.352709042 -0500
22058 +/******************************************************************************
22059 +*******************************************************************************
22061 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
22062 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
22064 +** This copyrighted material is made available to anyone wishing to use,
22065 +** modify, copy, or redistribute it subject to the terms and conditions
22066 +** of the GNU General Public License v.2.
22068 +*******************************************************************************
22069 +******************************************************************************/
22071 +#include <linux/sched.h>
22072 +#include <linux/slab.h>
22073 +#include <linux/smp_lock.h>
22074 +#include <linux/spinlock.h>
22075 +#include <asm/semaphore.h>
22076 +#include <linux/completion.h>
22077 +#include <linux/buffer_head.h>
22078 +#include <asm/uaccess.h>
22079 +#include <linux/pagemap.h>
22080 +#include <linux/uio.h>
22081 +#include <linux/blkdev.h>
22082 +#include <linux/mm.h>
22089 +#include "flock.h"
22090 +#include "glock.h"
22091 +#include "glops.h"
22092 +#include "inode.h"
22093 +#include "ioctl.h"
22095 +#include "ops_file.h"
22096 +#include "ops_vm.h"
22097 +#include "quota.h"
22099 +#include "trans.h"
22101 +struct filldir_bad_entry {
22103 + unsigned int fbe_length;
22104 + uint64_t fbe_offset;
22105 + struct gfs_inum fbe_inum;
22106 + unsigned int fbe_type;
22109 +struct filldir_bad {
22110 + struct gfs_sbd *fdb_sbd;
22111 + int fdb_prefetch;
22113 + struct filldir_bad_entry *fdb_entry;
22114 + unsigned int fdb_entry_num;
22115 + unsigned int fdb_entry_off;
22118 + unsigned int fdb_name_size;
22119 + unsigned int fdb_name_off;
22122 +struct filldir_reg {
22123 + struct gfs_sbd *fdr_sbd;
22124 + int fdr_prefetch;
22126 + filldir_t fdr_filldir;
22127 + void *fdr_opaque;
22130 +typedef ssize_t(*do_rw_t) (struct file * file,
22132 + size_t size, loff_t * offset,
22133 + unsigned int num_gh, struct gfs_holder * ghs);
22136 + * gfs_llseek - seek to a location in a file
22137 + * @file: the file
22138 + * @offset: the offset
22139 + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
22141 + * SEEK_END requires the glock for the file because it references the
22144 + * Returns: The new offset, or -EXXX on error
22148 +gfs_llseek(struct file *file, loff_t offset, int origin)
22150 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22151 + struct gfs_holder i_gh;
22154 + atomic_inc(&ip->i_sbd->sd_ops_file);
22156 + if (origin == 2) {
22157 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
22159 + error = remote_llseek(file, offset, origin);
22160 + gfs_glock_dq_uninit(&i_gh);
22163 + error = remote_llseek(file, offset, origin);
22168 +#define vma2state(vma) \
22169 +((((vma)->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == \
22170 + (VM_MAYWRITE | VM_MAYSHARE)) ? \
22171 + LM_ST_EXCLUSIVE : LM_ST_SHARED) \
22174 + * functionname - summary
22175 + * @param1: description
22176 + * @param2: description
22177 + * @param3: description
22179 + * Function description
22181 + * Returns: what is returned
22185 +walk_vm_hard(struct file *file, char *buf, size_t size, loff_t *offset,
22186 + do_rw_t operation)
22188 + struct gfs_holder *ghs;
22189 + unsigned int num_gh = 0;
22193 + struct super_block *sb = file->f_dentry->d_inode->i_sb;
22194 + struct mm_struct *mm = current->mm;
22195 + struct vm_area_struct *vma;
22196 + unsigned long start = (unsigned long)buf;
22197 + unsigned long end = start + size;
22198 + int dumping = (current->flags & PF_DUMPCORE);
22199 + unsigned int x = 0;
22201 + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22202 + if (end <= vma->vm_start)
22204 + if (vma->vm_file &&
22205 + vma->vm_file->f_dentry->d_inode->i_sb == sb) {
22210 + ghs = kmalloc((num_gh + 1) * sizeof(struct gfs_holder), GFP_KERNEL);
22213 + up_read(&mm->mmap_sem);
22217 + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22218 + if (end <= vma->vm_start)
22220 + if (vma->vm_file) {
22221 + struct inode *inode = vma->vm_file->f_dentry->d_inode;
22222 + if (inode->i_sb == sb)
22223 + gfs_holder_init(vn2ip(inode)->i_gl,
22230 + up_read(&mm->mmap_sem);
22232 + GFS_ASSERT_SBD(x == num_gh, vfs2sdp(sb),);
22235 + count = operation(file, buf, size, offset, num_gh, ghs);
22238 + gfs_holder_uninit(&ghs[num_gh]);
22245 + * walk_vma - Walk the vmas associated with a buffer for read or write.
22246 + * If any of them are gfs, pass the gfs inode down to the read/write
22247 + * worker function so that locks can be acquired in the correct order.
22248 + * @file: The file to read/write from/to
22249 + * @buf: The buffer to copy to/from
22250 + * @size: The amount of data requested
22251 + * @offset: The current file offset
22252 + * @operation: The read or write worker function
22254 + * Outputs: Offset - updated according to number of bytes written
22256 + * Returns: The number of bytes written, -errno on failure
22260 +walk_vm(struct file *file, char *buf, size_t size, loff_t *offset,
22261 + do_rw_t operation)
22263 + if (current->mm) {
22264 + struct super_block *sb = file->f_dentry->d_inode->i_sb;
22265 + struct mm_struct *mm = current->mm;
22266 + struct vm_area_struct *vma;
22267 + unsigned long start = (unsigned long)buf;
22268 + unsigned long end = start + size;
22269 + int dumping = (current->flags & PF_DUMPCORE);
22272 + down_read(&mm->mmap_sem);
22274 + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
22275 + if (end <= vma->vm_start)
22277 + if (vma->vm_file &&
22278 + vma->vm_file->f_dentry->d_inode->i_sb == sb)
22283 + up_read(&mm->mmap_sem);
22287 + struct gfs_holder gh;
22288 + return operation(file, buf, size, offset, 0, &gh);
22292 + return walk_vm_hard(file, buf, size, offset, operation);
22296 + * functionname - summary
22297 + * @param1: description
22298 + * @param2: description
22299 + * @param3: description
22301 + * Function description
22303 + * Returns: what is returned
22307 +do_read_readi(struct file *file, char *buf, size_t size, loff_t *offset)
22309 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22310 + ssize_t count = 0;
22314 + if (!access_ok(VERIFY_WRITE, buf, size))
22317 + if (!(file->f_flags & O_LARGEFILE)) {
22318 + if (*offset >= 0x7FFFFFFFull)
22320 + if (*offset + size > 0x7FFFFFFFull)
22321 + size = 0x7FFFFFFFull - *offset;
22324 + count = gfs_readi(ip, buf, *offset, size, gfs_copy2user);
22327 + *offset += count;
22333 + * do_read_direct - Read bytes from a file
22334 + * @file: The file to read from
22335 + * @buf: The buffer to copy into
22336 + * @size: The amount of data requested
22337 + * @offset: The current file offset
22338 + * @num_gh: The number of other locks we need to do the read
22339 + * @ghs: the locks we need plus one for our lock
22341 + * Outputs: Offset - updated according to number of bytes read
22343 + * Returns: The number of bytes read, -EXXX on failure
22347 +do_read_direct(struct file *file, char *buf, size_t size, loff_t *offset,
22348 + unsigned int num_gh, struct gfs_holder *ghs)
22350 + struct inode *inode = file->f_mapping->host;
22351 + struct gfs_inode *ip = vn2ip(inode);
22352 + unsigned int state = LM_ST_DEFERRED;
22355 + ssize_t count = 0;
22358 + for (x = 0; x < num_gh; x++)
22359 + if (ghs[x].gh_gl == ip->i_gl) {
22360 + state = LM_ST_SHARED;
22361 + flags |= GL_LOCAL_EXCL;
22365 + gfs_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
22367 + error = gfs_glock_nq_m(num_gh + 1, ghs);
22372 + if (gfs_is_jdata(ip))
22373 + goto out_gunlock;
22375 + if (gfs_is_stuffed(ip)) {
22376 + size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
22378 + if (((*offset) & mask) || (((unsigned long)buf) & mask))
22379 + goto out_gunlock;
22381 + count = do_read_readi(file, buf, size & ~mask, offset);
22384 + count = generic_file_read(file, buf, size, offset);
22389 + gfs_glock_dq_m(num_gh + 1, ghs);
22392 + gfs_holder_uninit(&ghs[num_gh]);
22394 + return (count) ? count : error;
22398 + * do_read_buf - Read bytes from a file
22399 + * @file: The file to read from
22400 + * @buf: The buffer to copy into
22401 + * @size: The amount of data requested
22402 + * @offset: The current file offset
22403 + * @num_gh: The number of other locks we need to do the read
22404 + * @ghs: the locks we need plus one for our lock
22406 + * Outputs: Offset - updated according to number of bytes read
22408 + * Returns: The number of bytes read, -EXXX on failure
22412 +do_read_buf(struct file *file, char *buf, size_t size, loff_t *offset,
22413 + unsigned int num_gh, struct gfs_holder *ghs)
22415 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22416 + ssize_t count = 0;
22419 + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
22421 + error = gfs_glock_nq_m_atime(num_gh + 1, ghs);
22425 + if (gfs_is_jdata(ip) ||
22426 + (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags)))
22427 + count = do_read_readi(file, buf, size, offset);
22429 + count = generic_file_read(file, buf, size, offset);
22431 + gfs_glock_dq_m(num_gh + 1, ghs);
22434 + gfs_holder_uninit(&ghs[num_gh]);
22436 + return (count) ? count : error;
22440 + * gfs_read - Read bytes from a file
22441 + * @file: The file to read from
22442 + * @buf: The buffer to copy into
22443 + * @size: The amount of data requested
22444 + * @offset: The current file offset
22446 + * Outputs: Offset - updated according to number of bytes read
22448 + * Returns: The number of bytes read, -EXXX on failure
22452 +gfs_read(struct file *file, char *buf, size_t size, loff_t *offset)
22454 + atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
22456 + if (file->f_flags & O_DIRECT)
22457 + return walk_vm(file, buf, size, offset, do_read_direct);
22459 + return walk_vm(file, buf, size, offset, do_read_buf);
22463 + * grope_mapping - feel up a mapping that needs to be written
22464 + * @buf: the start of the memory to be written
22465 + * @size: the size of the memory to be written
22467 + * We do this after acquiring the locks on the mapping,
22468 + * but before starting the write transaction. We need to make
22469 + * sure that we don't cause recursive transactions if blocks
22470 + * need to be allocated to the file backing the mapping.
22472 + * Returns: 0 on success, -EXXX on failure
22476 +grope_mapping(char *buf, size_t size)
22478 + unsigned long start = (unsigned long)buf;
22479 + unsigned long stop = start + size;
22482 + while (start < stop) {
22483 + if (copy_from_user(&c, (char *)start, 1))
22486 + start += PAGE_CACHE_SIZE;
22487 + start &= PAGE_CACHE_MASK;
22494 + * do_write_direct_alloc - Write bytes to a file
22495 + * @file: The file to write to
22496 + * @buf: The buffer to copy from
22497 + * @size: The amount of data requested
22498 + * @offset: The current file offset
22500 + * Outputs: Offset - updated according to number of bytes written
22502 + * Returns: The number of bytes written, -EXXX on failure
22506 +do_write_direct_alloc(struct file *file, char *buf, size_t size, loff_t *offset)
22508 + struct inode *inode = file->f_mapping->host;
22509 + struct gfs_inode *ip = vn2ip(inode);
22510 + struct gfs_sbd *sdp = ip->i_sbd;
22511 + struct gfs_alloc *al = NULL;
22512 + struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22513 + struct buffer_head *dibh;
22514 + unsigned int data_blocks, ind_blocks;
22518 + gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
22520 + al = gfs_alloc_get(ip);
22522 + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
22526 + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
22528 + goto fail_gunlock_q;
22530 + al->al_requested_meta = ind_blocks;
22531 + al->al_requested_data = data_blocks;
22533 + error = gfs_inplace_reserve(ip);
22535 + goto fail_gunlock_q;
22537 + /* Trans may require:
22538 + All blocks for a RG bitmap, whatever indirect blocks we
22539 + need, a modified dinode, and a quota change. */
22541 + error = gfs_trans_begin(sdp,
22542 + 1 + al->al_rgd->rd_ri.ri_length + ind_blocks,
22547 + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
22548 + error = gfs_get_inode_buffer(ip, &dibh);
22550 + goto fail_end_trans;
22552 + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID);
22554 + gfs_trans_add_bh(ip->i_gl, dibh);
22555 + gfs_dinode_out(&ip->i_di, dibh->b_data);
22559 + if (gfs_is_stuffed(ip)) {
22560 + error = gfs_unstuff_dinode(ip, gfs_unstuffer_sync, NULL);
22562 + goto fail_end_trans;
22565 + count = generic_file_write_nolock(file, &local_iov, 1, offset);
22568 + goto fail_end_trans;
22571 + error = gfs_get_inode_buffer(ip, &dibh);
22573 + goto fail_end_trans;
22575 + if (ip->i_di.di_size < inode->i_size)
22576 + ip->i_di.di_size = inode->i_size;
22577 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
22579 + gfs_trans_add_bh(ip->i_gl, dibh);
22580 + gfs_dinode_out(&ip->i_di, dibh->b_data);
22583 + gfs_trans_end(sdp);
22585 + if (file->f_flags & O_SYNC)
22586 + gfs_log_flush_glock(ip->i_gl);
22588 + gfs_inplace_release(ip);
22589 + gfs_quota_unlock_m(ip);
22590 + gfs_alloc_put(ip);
22595 + gfs_trans_end(sdp);
22598 + gfs_inplace_release(ip);
22601 + gfs_quota_unlock_m(ip);
22604 + gfs_alloc_put(ip);
22610 + * do_write_direct - Write bytes to a file
22611 + * @file: The file to write to
22612 + * @buf: The buffer to copy from
22613 + * @size: The amount of data requested
22614 + * @offset: The current file offset
22615 + * @num_gh: The number of other locks we need to do the read
22616 + * @gh: the locks we need plus one for our lock
22618 + * Outputs: Offset - updated according to number of bytes written
22620 + * Returns: The number of bytes written, -EXXX on failure
22624 +do_write_direct(struct file *file, char *buf, size_t size, loff_t *offset,
22625 + unsigned int num_gh, struct gfs_holder *ghs)
22627 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22628 + struct gfs_sbd *sdp = ip->i_sbd;
22629 + struct gfs_file *fp = vf2fp(file);
22630 + unsigned int state = LM_ST_DEFERRED;
22631 + int alloc_required;
22634 + ssize_t count = 0;
22637 + if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
22638 + state = LM_ST_EXCLUSIVE;
22640 + for (x = 0; x < num_gh; x++)
22641 + if (ghs[x].gh_gl == ip->i_gl) {
22642 + state = LM_ST_EXCLUSIVE;
22647 + gfs_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
22649 + error = gfs_glock_nq_m(num_gh + 1, ghs);
22654 + if (gfs_is_jdata(ip))
22655 + goto out_gunlock;
22658 + error = grope_mapping(buf, size);
22660 + goto out_gunlock;
22663 + if (file->f_flags & O_APPEND)
22664 + *offset = ip->i_di.di_size;
22666 + if (!(file->f_flags & O_LARGEFILE)) {
22668 + if (*offset >= 0x7FFFFFFFull)
22669 + goto out_gunlock;
22670 + if (*offset + size > 0x7FFFFFFFull)
22671 + size = 0x7FFFFFFFull - *offset;
22674 + if (gfs_is_stuffed(ip) ||
22675 + *offset + size > ip->i_di.di_size ||
22676 + ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
22677 + alloc_required = TRUE;
22679 + error = gfs_write_alloc_required(ip, *offset, size,
22680 + &alloc_required);
22682 + goto out_gunlock;
22685 + if (alloc_required && state != LM_ST_EXCLUSIVE) {
22686 + gfs_glock_dq_m(num_gh + 1, ghs);
22687 + gfs_holder_uninit(&ghs[num_gh]);
22688 + state = LM_ST_EXCLUSIVE;
22692 + if (alloc_required) {
22693 + set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
22696 + s = sdp->sd_tune.gt_max_atomic_write;
22700 + error = do_write_direct_alloc(file, buf, s, offset);
22702 + goto out_gunlock;
22709 + struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22710 + struct gfs_holder t_gh;
22712 + clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
22714 + error = gfs_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
22716 + goto out_gunlock;
22718 + count = generic_file_write_nolock(file, &local_iov, 1, offset);
22720 + gfs_glock_dq_uninit(&t_gh);
22726 + gfs_glock_dq_m(num_gh + 1, ghs);
22729 + gfs_holder_uninit(&ghs[num_gh]);
22731 + return (count) ? count : error;
22735 + * do_do_write_buf - Write bytes to a file
22736 + * @file: The file to write to
22737 + * @buf: The buffer to copy from
22738 + * @size: The amount of data requested
22739 + * @offset: The current file offset
22741 + * Outputs: Offset - updated according to number of bytes written
22743 + * Returns: The number of bytes written, -EXXX on failure
22747 +do_do_write_buf(struct file *file, char *buf, size_t size, loff_t *offset)
22749 + struct inode *inode = file->f_mapping->host;
22750 + struct gfs_inode *ip = vn2ip(inode);
22751 + struct gfs_sbd *sdp = ip->i_sbd;
22752 + struct gfs_alloc *al = NULL;
22753 + struct buffer_head *dibh;
22754 + unsigned int data_blocks, ind_blocks;
22755 + int alloc_required, journaled;
22759 + journaled = gfs_is_jdata(ip);
22761 + gfs_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
22763 + error = gfs_write_alloc_required(ip, *offset, size, &alloc_required);
22767 + if (alloc_required) {
22768 + al = gfs_alloc_get(ip);
22770 + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
22774 + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
22776 + goto fail_gunlock_q;
22779 + al->al_requested_meta = ind_blocks + data_blocks;
22781 + al->al_requested_meta = ind_blocks;
22782 + al->al_requested_data = data_blocks;
22785 + error = gfs_inplace_reserve(ip);
22787 + goto fail_gunlock_q;
22789 + /* Trans may require:
22790 + All blocks for a RG bitmap, whatever indirect blocks we
22791 + need, a modified dinode, and a quota change. */
22793 + error = gfs_trans_begin(sdp,
22794 + 1 + al->al_rgd->rd_ri.ri_length +
22796 + ((journaled) ? data_blocks : 0), 1);
22800 + /* Trans may require:
22801 + A modified dinode. */
22803 + error = gfs_trans_begin(sdp,
22804 + 1 + ((journaled) ? data_blocks : 0), 0);
22809 + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
22810 + error = gfs_get_inode_buffer(ip, &dibh);
22812 + goto fail_end_trans;
22814 + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? (~(S_ISUID | S_ISGID)) : (~S_ISUID);
22816 + gfs_trans_add_bh(ip->i_gl, dibh);
22817 + gfs_dinode_out(&ip->i_di, dibh->b_data);
22822 + (gfs_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) &&
22823 + *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode))) {
22825 + count = gfs_writei(ip, buf, *offset, size, gfs_copy_from_user);
22828 + goto fail_end_trans;
22831 + *offset += count;
22833 + struct iovec local_iov = { .iov_base = buf, .iov_len = size };
22835 + count = generic_file_write_nolock(file, &local_iov, 1, offset);
22838 + goto fail_end_trans;
22841 + error = gfs_get_inode_buffer(ip, &dibh);
22843 + goto fail_end_trans;
22845 + if (ip->i_di.di_size < inode->i_size)
22846 + ip->i_di.di_size = inode->i_size;
22847 + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
22849 + gfs_trans_add_bh(ip->i_gl, dibh);
22850 + gfs_dinode_out(&ip->i_di, dibh->b_data);
22854 + gfs_trans_end(sdp);
22856 + if (file->f_flags & O_SYNC)
22857 + gfs_log_flush_glock(ip->i_gl);
22859 + if (alloc_required) {
22860 + GFS_ASSERT_INODE(count != size ||
22861 + al->al_alloced_meta ||
22862 + al->al_alloced_data, ip,);
22863 + gfs_inplace_release(ip);
22864 + gfs_quota_unlock_m(ip);
22865 + gfs_alloc_put(ip);
22871 + gfs_trans_end(sdp);
22874 + if (alloc_required)
22875 + gfs_inplace_release(ip);
22878 + if (alloc_required)
22879 + gfs_quota_unlock_m(ip);
22882 + if (alloc_required)
22883 + gfs_alloc_put(ip);
22889 + * do_write_buf - Write bytes to a file
22890 + * @file: The file to write to
22891 + * @buf: The buffer to copy from
22892 + * @size: The amount of data requested
22893 + * @offset: The current file offset
22894 + * @num_gh: The number of other locks we need to do the read
22895 + * @gh: the locks we need plus one for our lock
22897 + * Outputs: Offset - updated according to number of bytes written
22899 + * Returns: The number of bytes written, -EXXX on failure
22903 +do_write_buf(struct file *file,
22904 + char *buf, size_t size, loff_t *offset,
22905 + unsigned int num_gh, struct gfs_holder *ghs)
22907 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
22908 + struct gfs_sbd *sdp = ip->i_sbd;
22910 + ssize_t count = 0;
22913 + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
22915 + error = gfs_glock_nq_m(num_gh + 1, ghs);
22920 + error = grope_mapping(buf, size);
22922 + goto out_gunlock;
22925 + if (file->f_flags & O_APPEND)
22926 + *offset = ip->i_di.di_size;
22928 + if (!(file->f_flags & O_LARGEFILE)) {
22930 + if (*offset >= 0x7FFFFFFFull)
22931 + goto out_gunlock;
22932 + if (*offset + size > 0x7FFFFFFFull)
22933 + size = 0x7FFFFFFFull - *offset;
22937 + s = sdp->sd_tune.gt_max_atomic_write;
22941 + error = do_do_write_buf(file, buf, s, offset);
22943 + goto out_gunlock;
22953 + gfs_glock_dq_m(num_gh + 1, ghs);
22956 + gfs_holder_uninit(&ghs[num_gh]);
22958 + return (count) ? count : error;
22962 + * gfs_write - Write bytes to a file
22963 + * @file: The file to write to
22964 + * @buf: The buffer to copy from
22965 + * @size: The amount of data requested
22966 + * @offset: The current file offset
22968 + * Outputs: Offset - updated according to number of bytes written
22970 + * Returns: The number of bytes written, -EXXX on failure
22974 +gfs_write(struct file *file, const char *buf, size_t size, loff_t *offset)
22976 + struct inode *inode = file->f_mapping->host;
22979 + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file);
22983 + if (!access_ok(VERIFY_READ, buf, size))
22986 + down(&inode->i_sem);
22987 + if (file->f_flags & O_DIRECT)
22988 + count = walk_vm(file, (char *)buf, size, offset, do_write_direct);
22990 + count = walk_vm(file, (char *)buf, size, offset, do_write_buf);
22991 + up(&inode->i_sem);
22997 + * filldir_reg_func - Report a directory entry to the caller of gfs_dir_read()
22998 + * @opaque: opaque data used by the function
22999 + * @name: the name of the directory entry
23000 + * @length: the length of the name
23001 + * @offset: the entry's offset in the directory
23002 + * @inum: the inode number the entry points to
23003 + * @type: the type of inode the entry points to
23005 + * Returns: 0 on success, 1 if buffer full
23009 +filldir_reg_func(void *opaque,
23010 + const char *name, unsigned int length,
23012 + struct gfs_inum *inum, unsigned int type)
23014 + struct filldir_reg *fdr = (struct filldir_reg *)opaque;
23015 + struct gfs_sbd *sdp = fdr->fdr_sbd;
23016 + unsigned int vfs_type;
23020 + case GFS_FILE_NON:
23021 + vfs_type = DT_UNKNOWN;
23023 + case GFS_FILE_REG:
23024 + vfs_type = DT_REG;
23026 + case GFS_FILE_DIR:
23027 + vfs_type = DT_DIR;
23029 + case GFS_FILE_LNK:
23030 + vfs_type = DT_LNK;
23032 + case GFS_FILE_BLK:
23033 + vfs_type = DT_BLK;
23035 + case GFS_FILE_CHR:
23036 + vfs_type = DT_CHR;
23038 + case GFS_FILE_FIFO:
23039 + vfs_type = DT_FIFO;
23041 + case GFS_FILE_SOCK:
23042 + vfs_type = DT_SOCK;
23045 + GFS_ASSERT_SBD(FALSE, sdp,
23046 + printk("type = %u\n", type););
23049 + error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
23050 + inum->no_formal_ino, vfs_type);
23054 + if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
23055 + gfs_glock_prefetch_num(sdp,
23056 + inum->no_formal_ino, &gfs_inode_glops,
23057 + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
23058 + gfs_glock_prefetch_num(sdp,
23059 + inum->no_addr, &gfs_iopen_glops,
23060 + LM_ST_SHARED, LM_FLAG_TRY);
23067 + * readdir_reg - Read directory entries from a directory
23068 + * @file: The directory to read from
23069 + * @dirent: Buffer for dirents
23070 + * @filldir: Function used to do the copying
23072 + * Returns: 0 on success, -EXXXX on failure
23076 +readdir_reg(struct file *file, void *dirent, filldir_t filldir)
23078 + struct gfs_inode *dip = vn2ip(file->f_mapping->host);
23079 + struct filldir_reg fdr;
23080 + struct gfs_holder d_gh;
23081 + uint64_t offset = file->f_pos;
23084 + fdr.fdr_sbd = dip->i_sbd;
23085 + fdr.fdr_prefetch = GFS_ASYNC_LM(dip->i_sbd);
23086 + fdr.fdr_filldir = filldir;
23087 + fdr.fdr_opaque = dirent;
23089 + gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
23090 + error = gfs_glock_nq_atime(&d_gh);
23092 + gfs_holder_uninit(&d_gh);
23096 + error = gfs_dir_read(dip, &offset, &fdr, filldir_reg_func);
23098 + gfs_glock_dq_uninit(&d_gh);
23100 + file->f_pos = offset;
23106 + * filldir_bad_func - Report a directory entry to the caller of gfs_dir_read()
23107 + * @opaque: opaque data used by the function
23108 + * @name: the name of the directory entry
23109 + * @length: the length of the name
23110 + * @offset: the entry's offset in the directory
23111 + * @inum: the inode number the entry points to
23112 + * @type: the type of inode the entry points to
23114 + * Returns: 0 on success, 1 if buffer full
23118 +filldir_bad_func(void *opaque,
23119 + const char *name, unsigned int length,
23121 + struct gfs_inum *inum, unsigned int type)
23123 + struct filldir_bad *fdb = (struct filldir_bad *)opaque;
23124 + struct gfs_sbd *sdp = fdb->fdb_sbd;
23125 + struct filldir_bad_entry *fbe;
23127 + if (fdb->fdb_entry_off == fdb->fdb_entry_num ||
23128 + fdb->fdb_name_off + length > fdb->fdb_name_size)
23131 + fbe = &fdb->fdb_entry[fdb->fdb_entry_off];
23132 + fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off;
23133 + memcpy(fbe->fbe_name, name, length);
23134 + fbe->fbe_length = length;
23135 + fbe->fbe_offset = offset;
23136 + fbe->fbe_inum = *inum;
23137 + fbe->fbe_type = type;
23139 + fdb->fdb_entry_off++;
23140 + fdb->fdb_name_off += length;
23142 + if (fdb->fdb_prefetch && !(length == 1 && *name == '.')) {
23143 + gfs_glock_prefetch_num(sdp,
23144 + inum->no_formal_ino, &gfs_inode_glops,
23145 + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
23146 + gfs_glock_prefetch_num(sdp,
23147 + inum->no_addr, &gfs_iopen_glops,
23148 + LM_ST_SHARED, LM_FLAG_TRY);
23155 + * readdir_bad - Read directory entries from a directory
23156 + * @file: The directory to read from
23157 + * @dirent: Buffer for dirents
23158 + * @filldir: Function used to do the copying
23160 + * Returns: 0 on success, -EXXXX on failure
23164 +readdir_bad(struct file *file, void *dirent, filldir_t filldir)
23166 + struct gfs_inode *dip = vn2ip(file->f_mapping->host);
23167 + struct gfs_sbd *sdp = dip->i_sbd;
23168 + struct filldir_reg fdr;
23169 + unsigned int entries, size;
23170 + struct filldir_bad *fdb;
23171 + struct gfs_holder d_gh;
23172 + uint64_t offset = file->f_pos;
23174 + struct filldir_bad_entry *fbe;
23177 + entries = sdp->sd_tune.gt_entries_per_readdir;
23178 + size = sizeof(struct filldir_bad) +
23179 + entries * (sizeof(struct filldir_bad_entry) + GFS_FAST_NAME_SIZE);
23181 + fdb = gmalloc(size);
23182 + memset(fdb, 0, size);
23184 + fdb->fdb_sbd = sdp;
23185 + fdb->fdb_prefetch = GFS_ASYNC_LM(sdp);
23186 + fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1);
23187 + fdb->fdb_entry_num = entries;
23188 + fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) +
23189 + entries * sizeof(struct filldir_bad_entry);
23190 + fdb->fdb_name_size = entries * GFS_FAST_NAME_SIZE;
23192 + gfs_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
23193 + error = gfs_glock_nq_atime(&d_gh);
23195 + gfs_holder_uninit(&d_gh);
23199 + error = gfs_dir_read(dip, &offset, fdb, filldir_bad_func);
23201 + gfs_glock_dq_uninit(&d_gh);
23203 + fdr.fdr_sbd = sdp;
23204 + fdr.fdr_prefetch = FALSE;
23205 + fdr.fdr_filldir = filldir;
23206 + fdr.fdr_opaque = dirent;
23208 + for (x = 0; x < fdb->fdb_entry_off; x++) {
23209 + fbe = &fdb->fdb_entry[x];
23211 + error = filldir_reg_func(&fdr,
23212 + fbe->fbe_name, fbe->fbe_length,
23214 + &fbe->fbe_inum, fbe->fbe_type);
23216 + file->f_pos = fbe->fbe_offset;
23222 + file->f_pos = offset;
23231 + * gfs_readdir - Read directory entries from a directory
23232 + * @file: The directory to read from
23233 + * @dirent: Buffer for dirents
23234 + * @filldir: Function used to do the copying
23236 + * Returns: 0 on success, -EXXXX on failure
23240 +gfs_readdir(struct file *file, void *dirent, filldir_t filldir)
23244 + atomic_inc(&vfs2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
23246 + if (strcmp(current->comm, "nfsd") != 0)
23247 + error = readdir_reg(file, dirent, filldir);
23249 + error = readdir_bad(file, dirent, filldir);
23255 + * gfs_ioctl - do an ioctl on a file
23256 + * @inode: the inode
23257 + * @file: the file pointer
23258 + * @cmd: the ioctl command
23259 + * @arg: the argument
23261 + * Returns: 0 on success, -EXXXX on failure
23265 +gfs_ioctl(struct inode *inode, struct file *file,
23266 + unsigned int cmd, unsigned long arg)
23268 + struct gfs_inode *ip = vn2ip(inode);
23269 + atomic_inc(&ip->i_sbd->sd_ops_file);
23270 + return gfs_ioctli(ip, cmd, (void *)arg);
23274 + * gfs_open - open a file
23275 + * @inode: the inode to open
23276 + * @file: the struct file for this opening
23278 + * Returns: 0 on success, -EXXX on failure
23282 +gfs_open(struct inode *inode, struct file *file)
23284 + struct gfs_inode *ip = vn2ip(inode);
23285 + struct gfs_holder i_gh;
23286 + struct gfs_file *fp;
23289 + atomic_inc(&ip->i_sbd->sd_ops_file);
23291 + fp = gmalloc(sizeof(struct gfs_file));
23292 + memset(fp, 0, sizeof(struct gfs_file));
23294 + init_MUTEX(&fp->f_fl_lock);
23296 + fp->f_inode = ip;
23297 + fp->f_vfile = file;
23299 + GFS_ASSERT_INODE(!vf2fp(file), ip,);
23300 + vf2fp(file) = fp;
23302 + if (ip->i_di.di_type == GFS_FILE_REG) {
23303 + error = gfs_glock_nq_init(ip->i_gl,
23304 + LM_ST_SHARED, LM_FLAG_ANY,
23309 + if (!(file->f_flags & O_LARGEFILE) &&
23310 + ip->i_di.di_size > 0x7FFFFFFFull) {
23312 + goto fail_gunlock;
23315 + /* If this is an exclusive create, make sure our gfs_create()
23316 + says we created the file. The O_EXCL flag isn't passed
23317 + to gfs_create(), so we have to check it here. */
23319 + if (file->f_flags & O_CREAT) {
23320 + if (ip->i_creat_task == current &&
23321 + ip->i_creat_pid == current->pid) {
23322 + ip->i_creat_task = NULL;
23323 + ip->i_creat_pid = 0;
23324 + } else if (file->f_flags & O_EXCL) {
23326 + goto fail_gunlock;
23330 + /* Listen to the Direct I/O flag */
23332 + if (ip->i_di.di_flags & GFS_DIF_DIRECTIO)
23333 + file->f_flags |= O_DIRECT;
23335 + /* Don't let the user open O_DIRECT on a jdata file */
23337 + if ((file->f_flags & O_DIRECT) && gfs_is_jdata(ip)) {
23339 + goto fail_gunlock;
23342 + gfs_glock_dq_uninit(&i_gh);
23348 + gfs_glock_dq_uninit(&i_gh);
23351 + vf2fp(file) = NULL;
23358 + * gfs_close - called to close a struct file
23359 + * @inode: the inode the struct file belongs to
23360 + * @file: the struct file being closed
23362 + * Returns: 0 on success, -EXXX on failure
23366 +gfs_close(struct inode *inode, struct file *file)
23368 + struct gfs_file *fp;
23370 + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_file);
23372 + fp = vf2fp(file);
23373 + vf2fp(file) = NULL;
23383 + * gfs_fsync - sync the dirty data for a file (across the cluster)
23384 + * @file: the file that points to the dentry (Huh?)
23385 + * @dentry: the dentry that points to the inode to sync
23387 + * Returns: 0 on success, -EXXX on failure
23391 +gfs_fsync(struct file *file, struct dentry *dentry, int datasync)
23393 + struct gfs_inode *ip = vn2ip(dentry->d_inode);
23394 + struct gfs_holder i_gh;
23397 + atomic_inc(&ip->i_sbd->sd_ops_file);
23399 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
23403 + if (gfs_is_jdata(ip))
23404 + gfs_log_flush_glock(ip->i_gl);
23406 + i_gh.gh_flags |= GL_SYNC;
23408 + gfs_glock_dq_uninit(&i_gh);
23414 + * gfs_lock - acquire/release a flock or posix lock on a file
23415 + * @file: the file pointer
23416 + * @cmd: either modify or retrieve lock state, possibly wait
23417 + * @fl: type and range of lock
23419 + * Returns: 0 on success, -EXXX on failure
23423 +gfs_lock(struct file *file, int cmd, struct file_lock *fl)
23425 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
23426 + struct gfs_sbd *sdp = ip->i_sbd;
23427 + struct lm_lockname name;
23428 + uint64_t start = fl->fl_start, end = fl->fl_end;
23429 + pid_t pid = fl->fl_pid;
23430 + int plock = (fl->fl_flags & FL_POSIX);
23431 + int flock = (fl->fl_flags & FL_FLOCK);
23432 + int get, set, wait, ex, sh, un;
23435 + atomic_inc(&sdp->sd_ops_file);
23437 + if (sdp->sd_args.ar_localflocks)
23438 + return LOCK_USE_CLNT;
23440 + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
23443 + if (!flock && !plock)
23446 + get = (IS_GETLK(cmd)) ? TRUE : FALSE;
23447 + set = (IS_SETLK(cmd)) ? TRUE : FALSE;
23448 + wait = (IS_SETLKW(cmd)) ? TRUE : FALSE;
23450 + if ((flock && (get || (!set && !wait))) ||
23451 + (plock && (!get && !set && !wait)))
23454 + ex = (fl->fl_type == F_WRLCK) ? TRUE : FALSE;
23455 + sh = (fl->fl_type == F_RDLCK) ? TRUE : FALSE;
23456 + un = (fl->fl_type == F_UNLCK) ? TRUE : FALSE;
23458 + if (!ex && !sh && !un)
23462 + struct gfs_file *fp = vf2fp(file);
23466 + error = gfs_funlock(fp);
23468 + error = gfs_flock(fp, ex, wait);
23470 + name.ln_number = ip->i_num.no_formal_ino;
23471 + name.ln_type = LM_TYPE_PLOCK;
23473 + error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
23474 + sdp->sd_lockstruct.ls_lockspace,
23475 + &name, (unsigned long)fl->fl_owner,
23476 + &start, &end, &ex, (unsigned long*)&pid);
23480 + fl->fl_type = F_UNLCK;
23484 + fl->fl_start = start;
23485 + fl->fl_end = end;
23486 + fl->fl_pid = pid;
23487 + fl->fl_type = (ex) ? F_WRLCK : F_RDLCK;
23491 + error = sdp->sd_lockstruct.ls_ops->lm_punlock(
23492 + sdp->sd_lockstruct.ls_lockspace,
23493 + &name, (unsigned long)fl->fl_owner,
23496 + error = sdp->sd_lockstruct.ls_ops->lm_plock(
23497 + sdp->sd_lockstruct.ls_lockspace,
23498 + &name, (unsigned long)fl->fl_owner,
23499 + wait, ex, start, end);
23506 + * gfs_sendfile - Send bytes to a file or socket
23507 + * @in_file: The file to read from
23508 + * @out_file: The file to write to
23509 + * @count: The amount of data
23510 + * @offset: The beginning file offset
23512 + * Outputs: offset - updated according to number of bytes read
23514 + * Returns: The number of bytes sent, -EXXX on failure
23518 +gfs_sendfile(struct file *in_file, loff_t *offset, size_t count, read_actor_t actor, void __user *target)
23520 + struct gfs_inode *ip = vn2ip(in_file->f_mapping->host);
23521 + struct gfs_holder gh;
23524 + atomic_inc(&ip->i_sbd->sd_ops_file);
23526 + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
23528 + retval = gfs_glock_nq_atime(&gh);
23532 + if (gfs_is_jdata(ip))
23533 + retval = -ENOSYS;
23535 + retval = generic_file_sendfile(in_file, offset, count, actor, target);
23537 + gfs_glock_dq(&gh);
23540 + gfs_holder_uninit(&gh);
23546 + * gfs_mmap - We don't support shared writable mappings right now
23547 + * @file: The file to map
23548 + * @vma: The VMA which described the mapping
23550 + * Returns: 0 or error code
23554 +gfs_mmap(struct file *file, struct vm_area_struct *vma)
23556 + struct gfs_inode *ip = vn2ip(file->f_mapping->host);
23557 + struct gfs_holder i_gh;
23560 + atomic_inc(&ip->i_sbd->sd_ops_file);
23562 + gfs_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
23563 + error = gfs_glock_nq_atime(&i_gh);
23565 + gfs_holder_uninit(&i_gh);
23569 + if (gfs_is_jdata(ip)) {
23570 + if (vma->vm_flags & VM_MAYSHARE)
23573 + vma->vm_ops = &gfs_vm_ops_private;
23575 + /* This is VM_MAYWRITE instead of VM_WRITE because a call
23576 + to mprotect() can turn on VM_WRITE later. */
23578 + if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == (VM_MAYSHARE | VM_MAYWRITE))
23579 + vma->vm_ops = &gfs_vm_ops_sharewrite;
23581 + vma->vm_ops = &gfs_vm_ops_private;
23584 + gfs_glock_dq_uninit(&i_gh);
23589 +struct file_operations gfs_file_fops = {
23590 + .llseek = gfs_llseek,
23591 + .read = gfs_read,
23592 + .write = gfs_write,
23593 + .ioctl = gfs_ioctl,
23594 + .mmap = gfs_mmap,
23595 + .open = gfs_open,
23596 + .release = gfs_close,
23597 + .fsync = gfs_fsync,
23598 + .lock = gfs_lock,
23599 + .sendfile = gfs_sendfile,
23602 +struct file_operations gfs_dir_fops = {
23603 + .readdir = gfs_readdir,
23604 + .ioctl = gfs_ioctl,
23605 + .open = gfs_open,
23606 + .release = gfs_close,
23607 + .fsync = gfs_fsync,
23608 + .lock = gfs_lock,
23610 diff -urN linux-orig/fs/gfs/ops_file.h linux-patched/fs/gfs/ops_file.h
23611 --- linux-orig/fs/gfs/ops_file.h 1969-12-31 18:00:00.000000000 -0600
23612 +++ linux-patched/fs/gfs/ops_file.h 2004-06-30 13:27:49.352709042 -0500
23614 +/******************************************************************************
23615 +*******************************************************************************
23617 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
23618 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
23620 +** This copyrighted material is made available to anyone wishing to use,
23621 +** modify, copy, or redistribute it subject to the terms and conditions
23622 +** of the GNU General Public License v.2.
23624 +*******************************************************************************
23625 +******************************************************************************/
23627 +#ifndef __OPS_FILE_DOT_H__
23628 +#define __OPS_FILE_DOT_H__
23630 +extern struct file_operations gfs_file_fops;
23631 +extern struct file_operations gfs_dir_fops;
23633 +#endif /* __OPS_FILE_DOT_H__ */
23634 diff -urN linux-orig/fs/gfs/ops_fstype.c linux-patched/fs/gfs/ops_fstype.c
23635 --- linux-orig/fs/gfs/ops_fstype.c 1969-12-31 18:00:00.000000000 -0600
23636 +++ linux-patched/fs/gfs/ops_fstype.c 2004-06-30 13:27:49.353708810 -0500
23638 +/******************************************************************************
23639 +*******************************************************************************
23641 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
23642 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
23644 +** This copyrighted material is made available to anyone wishing to use,
23645 +** modify, copy, or redistribute it subject to the terms and conditions
23646 +** of the GNU General Public License v.2.
23648 +*******************************************************************************
23649 +******************************************************************************/
23651 +#include <linux/sched.h>
23652 +#include <linux/slab.h>
23653 +#include <linux/smp_lock.h>
23654 +#include <linux/spinlock.h>
23655 +#include <asm/semaphore.h>
23656 +#include <linux/completion.h>
23657 +#include <linux/buffer_head.h>
23658 +#include <linux/vmalloc.h>
23659 +#include <linux/blkdev.h>
23662 +#include "daemon.h"
23663 +#include "glock.h"
23664 +#include "glops.h"
23665 +#include "inode.h"
23666 +#include "locking.h"
23667 +#include "mount.h"
23668 +#include "ops_export.h"
23669 +#include "ops_fstype.h"
23670 +#include "ops_super.h"
23671 +#include "quota.h"
23672 +#include "recovery.h"
23674 +#include "super.h"
23675 +#include "unlinked.h"
23678 + * gfs_read_super - Read in superblock
23679 + * @sb: The VFS superblock
23680 + * @data: Mount options
23681 + * @silent: Don't complain if its not a GFS filesystem
23683 + * Returns: The VFS superblock, or NULL on error
23687 +fill_super(struct super_block *sb, void *data, int silent)
23689 + struct gfs_sbd *sdp;
23690 + struct gfs_holder mount_gh, sb_gh, ji_gh;
23691 + struct inode *inode;
23692 + int super = TRUE, jindex = TRUE;
23697 + sdp = vmalloc(sizeof(struct gfs_sbd));
23701 + memset(sdp, 0, sizeof(struct gfs_sbd));
23703 + vfs2sdp(sb) = sdp;
23704 + sdp->sd_vfs = sb;
23706 + /* Init rgrp variables */
23708 + INIT_LIST_HEAD(&sdp->sd_rglist);
23709 + init_MUTEX(&sdp->sd_rindex_lock);
23710 + INIT_LIST_HEAD(&sdp->sd_rg_mru_list);
23711 + spin_lock_init(&sdp->sd_rg_mru_lock);
23712 + INIT_LIST_HEAD(&sdp->sd_rg_recent);
23713 + spin_lock_init(&sdp->sd_rg_recent_lock);
23714 + spin_lock_init(&sdp->sd_rg_forward_lock);
23716 + for (x = 0; x < GFS_GL_HASH_SIZE; x++) {
23717 + sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED;
23718 + INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list);
23721 + INIT_LIST_HEAD(&sdp->sd_reclaim_list);
23722 + spin_lock_init(&sdp->sd_reclaim_lock);
23723 + init_waitqueue_head(&sdp->sd_reclaim_wchan);
23725 + for (x = 0; x < GFS_MHC_HASH_SIZE; x++)
23726 + INIT_LIST_HEAD(&sdp->sd_mhc[x]);
23727 + INIT_LIST_HEAD(&sdp->sd_mhc_single);
23728 + spin_lock_init(&sdp->sd_mhc_lock);
23730 + for (x = 0; x < GFS_DEPEND_HASH_SIZE; x++)
23731 + INIT_LIST_HEAD(&sdp->sd_depend[x]);
23732 + spin_lock_init(&sdp->sd_depend_lock);
23734 + init_MUTEX(&sdp->sd_freeze_lock);
23736 + init_MUTEX(&sdp->sd_thread_lock);
23737 + init_completion(&sdp->sd_thread_completion);
23739 + spin_lock_init(&sdp->sd_log_seg_lock);
23740 + INIT_LIST_HEAD(&sdp->sd_log_seg_list);
23741 + init_waitqueue_head(&sdp->sd_log_seg_wait);
23742 + INIT_LIST_HEAD(&sdp->sd_log_ail);
23743 + INIT_LIST_HEAD(&sdp->sd_log_incore);
23744 + init_MUTEX(&sdp->sd_log_lock);
23745 + INIT_LIST_HEAD(&sdp->sd_unlinked_list);
23746 + spin_lock_init(&sdp->sd_unlinked_lock);
23747 + INIT_LIST_HEAD(&sdp->sd_quota_list);
23748 + spin_lock_init(&sdp->sd_quota_lock);
23750 + INIT_LIST_HEAD(&sdp->sd_dirty_j);
23751 + spin_lock_init(&sdp->sd_dirty_j_lock);
23753 + spin_lock_init(&sdp->sd_ail_lock);
23754 + INIT_LIST_HEAD(&sdp->sd_recovery_bufs);
23756 + gfs_init_tune_data(sdp);
23758 + error = gfs_make_args((char *)data, &sdp->sd_args);
23760 + printk("GFS: can't parse mount arguments\n");
23764 + /* Copy out mount flags */
23766 + if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
23767 + set_bit(SDF_NOATIME, &sdp->sd_flags);
23768 + if (sb->s_flags & MS_RDONLY)
23769 + set_bit(SDF_ROFS, &sdp->sd_flags);
23771 + /* Setup up Virtual Super Block */
23773 + sb->s_magic = GFS_MAGIC;
23774 + sb->s_op = &gfs_super_ops;
23775 + sb->s_export_op = &gfs_export_ops;
23776 + sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
23777 + sb->s_maxbytes = ~0ULL;
23779 + if (sdp->sd_args.ar_posixacls)
23780 + sb->s_flags |= MS_POSIXACL;
23782 + /* Set up the buffer cache and fill in some fake values
23783 + to allow us to read in the superblock. */
23785 + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS_BASIC_BLOCK);
23786 + sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
23787 + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT;
23788 + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
23790 + GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= sdp->sd_sb.sb_bsize, sdp,);
23792 + error = gfs_mount_lockproto(sdp, silent);
23796 + printk("GFS: fsid=%s: Joined cluster. Now mounting FS...\n",
23799 + if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
23800 + !sdp->sd_args.ar_ignore_local_fs) {
23801 + /* Force local [p|f]locks */
23802 + sdp->sd_args.ar_localflocks = TRUE;
23804 + /* Force local read ahead and caching */
23805 + sdp->sd_args.ar_localcaching = TRUE;
23808 + /* Start up the scand thread */
23810 + error = kernel_thread(gfs_scand, sdp, 0);
23812 + printk("GFS: fsid=%s: can't start scand thread: %d\n",
23813 + sdp->sd_fsname, error);
23814 + goto fail_lockproto;
23816 + wait_for_completion(&sdp->sd_thread_completion);
23818 + /* Start up the glockd thread */
23820 + for (sdp->sd_glockd_num = 0;
23821 + sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
23822 + sdp->sd_glockd_num++) {
23823 + error = kernel_thread(gfs_glockd, sdp, 0);
23825 + printk("GFS: fsid=%s: can't start glockd thread: %d\n",
23826 + sdp->sd_fsname, error);
23827 + goto fail_glockd;
23829 + wait_for_completion(&sdp->sd_thread_completion);
23832 + error = gfs_glock_nq_num(sdp,
23833 + GFS_MOUNT_LOCK, &gfs_nondisk_glops,
23834 + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
23837 + printk("GFS: fsid=%s: can't acquire mount glock: %d\n",
23838 + sdp->sd_fsname, error);
23839 + goto fail_glockd;
23842 + error = gfs_glock_nq_num(sdp,
23843 + GFS_LIVE_LOCK, &gfs_nondisk_glops,
23844 + LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
23845 + &sdp->sd_live_gh);
23847 + printk("GFS: fsid=%s: can't acquire live glock: %d\n",
23848 + sdp->sd_fsname, error);
23849 + goto fail_gunlock_mount;
23852 + sdp->sd_live_gh.gh_owner = NULL;
23854 + error = gfs_glock_nq_num(sdp,
23855 + GFS_SB_LOCK, &gfs_meta_glops,
23856 + (sdp->sd_args.ar_upgrade) ? LM_ST_EXCLUSIVE : LM_ST_SHARED,
23859 + printk("GFS: fsid=%s: can't acquire superblock glock: %d\n",
23860 + sdp->sd_fsname, error);
23861 + goto fail_gunlock_live;
23864 + error = gfs_read_sb(sdp, sb_gh.gh_gl, silent);
23866 + printk("GFS: fsid=%s: can't read superblock: %d\n",
23867 + sdp->sd_fsname, error);
23868 + goto fail_gunlock_sb;
23871 + /* Set up the buffer cache and SB for real */
23874 + if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
23875 + printk("GFS: fsid=%s: FS block size (%u) is too small for device block size (%u)\n",
23876 + sdp->sd_fsname, sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
23877 + goto fail_gunlock_sb;
23879 + if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
23880 + printk("GFS: fsid=%s: FS block size (%u) is too big for machine page size (%u)\n",
23881 + sdp->sd_fsname, sdp->sd_sb.sb_bsize,
23882 + (unsigned int)PAGE_SIZE);
23883 + goto fail_gunlock_sb;
23886 + /* Get rid of buffers from the original block size */
23887 + sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA);
23888 + sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift;
23890 + sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
23892 + /* Read in journal index inode */
23894 + error = gfs_get_jiinode(sdp);
23896 + printk("GFS: fsid=%s: can't get journal index inode: %d\n",
23897 + sdp->sd_fsname, error);
23898 + goto fail_gunlock_sb;
23901 + init_MUTEX(&sdp->sd_jindex_lock);
23903 + /* Get a handle on the transaction glock */
23905 + error = gfs_glock_get(sdp, GFS_TRANS_LOCK, &gfs_trans_glops,
23906 + CREATE, &sdp->sd_trans_gl);
23908 + goto fail_ji_free;
23909 + set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
23911 + /* Upgrade version numbers if we need to */
23913 + if (sdp->sd_args.ar_upgrade) {
23914 + error = gfs_do_upgrade(sdp, sb_gh.gh_gl);
23916 + goto fail_trans_gl;
23919 + /* Load in the journal index */
23921 + error = gfs_jindex_hold(sdp, &ji_gh);
23923 + printk("GFS: fsid=%s: can't read journal index: %d\n",
23924 + sdp->sd_fsname, error);
23925 + goto fail_trans_gl;
23929 + if (sdp->sd_lockstruct.ls_jid >= sdp->sd_journals) {
23930 + printk("GFS: fsid=%s: can't mount journal #%u\n",
23931 + sdp->sd_fsname, sdp->sd_lockstruct.ls_jid);
23932 + printk("GFS: fsid=%s: there are only %u journals (0 - %u)\n",
23933 + sdp->sd_fsname, sdp->sd_journals, sdp->sd_journals - 1);
23934 + goto fail_gunlock_ji;
23936 + sdp->sd_jdesc = sdp->sd_jindex[sdp->sd_lockstruct.ls_jid];
23937 + sdp->sd_log_seg_free = sdp->sd_jdesc.ji_nsegment - 1;
23939 + error = gfs_glock_nq_num(sdp,
23940 + sdp->sd_jdesc.ji_addr, &gfs_meta_glops,
23941 + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
23942 + &sdp->sd_journal_gh);
23944 + printk("GFS: fsid=%s: can't acquire the journal glock: %d\n",
23945 + sdp->sd_fsname, error);
23946 + goto fail_gunlock_ji;
23949 + if (sdp->sd_lockstruct.ls_first) {
23950 + for (x = 0; x < sdp->sd_journals; x++) {
23951 + error = gfs_recover_journal(sdp,
23952 + x, sdp->sd_jindex + x,
23955 + printk("GFS: fsid=%s: error recovering journal %u: %d\n",
23956 + sdp->sd_fsname, x, error);
23957 + goto fail_gunlock_journal;
23961 + sdp->sd_lockstruct.ls_ops->lm_others_may_mount(sdp->sd_lockstruct.ls_lockspace);
23962 + sdp->sd_lockstruct.ls_first = FALSE;
23964 + error = gfs_recover_journal(sdp,
23965 + sdp->sd_lockstruct.ls_jid, &sdp->sd_jdesc,
23968 + printk("GFS: fsid=%s: error recovering my journal: %d\n",
23969 + sdp->sd_fsname, error);
23970 + goto fail_gunlock_journal;
23974 + gfs_glock_dq_uninit(&ji_gh);
23977 + /* Disown my Journal glock */
23979 + sdp->sd_journal_gh.gh_owner = NULL;
23981 + /* Drop our cache and reread all the things we read before the replay. */
23983 + error = gfs_read_sb(sdp, sb_gh.gh_gl, FALSE);
23985 + printk("GFS: fsid=%s: can't read superblock: %d\n",
23986 + sdp->sd_fsname, error);
23987 + goto fail_gunlock_journal;
23990 + gfs_glock_force_drop(sdp->sd_jiinode->i_gl);
23992 + error = gfs_jindex_hold(sdp, &ji_gh);
23994 + printk("GFS: fsid=%s: can't read journal index: %d\n",
23995 + sdp->sd_fsname, error);
23996 + goto fail_gunlock_journal;
23998 + gfs_glock_dq_uninit(&ji_gh);
24000 + /* Make the FS read/write */
24002 + if (!test_bit(SDF_ROFS, &sdp->sd_flags)) {
24003 + error = gfs_make_fs_rw(sdp);
24005 + printk("GFS: fsid=%s: can't make FS RW: %d\n",
24006 + sdp->sd_fsname, error);
24007 + goto fail_gunlock_journal;
24011 + /* Start up the recover thread */
24013 + error = kernel_thread(gfs_recoverd, sdp, 0);
24015 + printk("GFS: fsid=%s: can't start recoverd thread: %d\n",
24016 + sdp->sd_fsname, error);
24017 + goto fail_recover_dump;
24019 + wait_for_completion(&sdp->sd_thread_completion);
24021 + /* Read in the resource index inode */
24023 + error = gfs_get_riinode(sdp);
24025 + printk("GFS: fsid=%s: can't get resource index inode: %d\n",
24026 + sdp->sd_fsname, error);
24027 + goto fail_recoverd;
24030 + /* Get the root inode */
24032 + error = gfs_get_rootinode(sdp);
24034 + printk("GFS: fsid=%s: can't read in root inode: %d\n",
24035 + sdp->sd_fsname, error);
24036 + goto fail_ri_free;
24039 + /* Read in the quota inode */
24041 + error = gfs_get_qinode(sdp);
24043 + printk("GFS: fsid=%s: can't get quota file inode: %d\n",
24044 + sdp->sd_fsname, error);
24045 + goto fail_root_free;
24048 + /* Read in the license inode */
24050 + error = gfs_get_linode(sdp);
24052 + printk("GFS: fsid=%s: can't get license file inode: %d\n",
24053 + sdp->sd_fsname, error);
24054 + goto fail_qi_free;
24057 + /* We're through with the superblock lock */
24059 + gfs_glock_dq_uninit(&sb_gh);
24062 + /* Get the inode/dentry */
24064 + inode = gfs_iget(sdp->sd_rooti, CREATE);
24066 + printk("GFS: fsid=%s: can't get root inode\n", sdp->sd_fsname);
24068 + goto fail_li_free;
24071 + sb->s_root = d_alloc_root(inode);
24072 + if (!sb->s_root) {
24074 + printk("GFS: fsid=%s: can't get root dentry\n", sdp->sd_fsname);
24076 + goto fail_li_free;
24079 + /* Start up the logd thread */
24081 + sdp->sd_jindex_refresh_time = jiffies;
24083 + error = kernel_thread(gfs_logd, sdp, 0);
24085 + printk("GFS: fsid=%s: can't start logd thread: %d\n",
24086 + sdp->sd_fsname, error);
24089 + wait_for_completion(&sdp->sd_thread_completion);
24091 + /* Start up the quotad thread */
24093 + error = kernel_thread(gfs_quotad, sdp, 0);
24095 + printk("GFS: fsid=%s: can't start quotad thread: %d\n",
24096 + sdp->sd_fsname, error);
24099 + wait_for_completion(&sdp->sd_thread_completion);
24101 + /* Start up the inoded thread */
24103 + error = kernel_thread(gfs_inoded, sdp, 0);
24105 + printk("GFS: fsid=%s: can't start inoded thread: %d\n",
24106 + sdp->sd_fsname, error);
24107 + goto fail_quotad;
24109 + wait_for_completion(&sdp->sd_thread_completion);
24111 + /* Get a handle on the rename lock */
24113 + error = gfs_glock_get(sdp, GFS_RENAME_LOCK, &gfs_nondisk_glops,
24114 + CREATE, &sdp->sd_rename_gl);
24116 + goto fail_inoded;
24118 + gfs_glock_dq_uninit(&mount_gh);
24123 + down(&sdp->sd_thread_lock);
24124 + clear_bit(SDF_INODED_RUN, &sdp->sd_flags);
24125 + wake_up_process(sdp->sd_inoded_process);
24126 + up(&sdp->sd_thread_lock);
24127 + wait_for_completion(&sdp->sd_thread_completion);
24130 + down(&sdp->sd_thread_lock);
24131 + clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
24132 + wake_up_process(sdp->sd_quotad_process);
24133 + up(&sdp->sd_thread_lock);
24134 + wait_for_completion(&sdp->sd_thread_completion);
24137 + down(&sdp->sd_thread_lock);
24138 + clear_bit(SDF_LOGD_RUN, &sdp->sd_flags);
24139 + wake_up_process(sdp->sd_logd_process);
24140 + up(&sdp->sd_thread_lock);
24141 + wait_for_completion(&sdp->sd_thread_completion);
24144 + dput(sb->s_root);
24147 + gfs_inode_put(sdp->sd_linode);
24150 + gfs_inode_put(sdp->sd_qinode);
24153 + gfs_inode_put(sdp->sd_rooti);
24156 + gfs_inode_put(sdp->sd_riinode);
24157 + gfs_clear_rgrpd(sdp);
24160 + down(&sdp->sd_thread_lock);
24161 + clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
24162 + wake_up_process(sdp->sd_recoverd_process);
24163 + up(&sdp->sd_thread_lock);
24164 + wait_for_completion(&sdp->sd_thread_completion);
24166 + fail_recover_dump:
24167 + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
24168 + gfs_unlinked_cleanup(sdp);
24169 + gfs_quota_cleanup(sdp);
24171 + fail_gunlock_journal:
24172 + gfs_glock_dq_uninit(&sdp->sd_journal_gh);
24176 + gfs_glock_dq_uninit(&ji_gh);
24179 + gfs_glock_put(sdp->sd_trans_gl);
24182 + gfs_inode_put(sdp->sd_jiinode);
24183 + gfs_clear_journals(sdp);
24187 + gfs_glock_dq_uninit(&sb_gh);
24189 + fail_gunlock_live:
24190 + gfs_glock_dq_uninit(&sdp->sd_live_gh);
24192 + fail_gunlock_mount:
24193 + gfs_glock_dq_uninit(&mount_gh);
24196 + clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
24197 + wake_up(&sdp->sd_reclaim_wchan);
24198 + while (sdp->sd_glockd_num--)
24199 + wait_for_completion(&sdp->sd_thread_completion);
24201 + down(&sdp->sd_thread_lock);
24202 + clear_bit(SDF_SCAND_RUN, &sdp->sd_flags);
24203 + wake_up_process(sdp->sd_scand_process);
24204 + up(&sdp->sd_thread_lock);
24205 + wait_for_completion(&sdp->sd_thread_completion);
24208 + gfs_gl_hash_clear(sdp, TRUE);
24209 + gfs_unmount_lockproto(sdp);
24210 + gfs_clear_dirty_j(sdp);
24211 + while (invalidate_inodes(sb))
24218 + vfs2sdp(sb) = NULL;
24229 + * Returns: the new superblock
24232 +struct super_block *gfs_get_sb(struct file_system_type *fs_type, int flags,
24233 + const char *dev_name, void *data)
24235 + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super);
24238 +struct file_system_type gfs_fs_type = {
24240 + .fs_flags = FS_REQUIRES_DEV,
24241 + .get_sb = gfs_get_sb,
24242 + .kill_sb = kill_block_super,
24243 + .owner = THIS_MODULE,
24245 diff -urN linux-orig/fs/gfs/ops_fstype.h linux-patched/fs/gfs/ops_fstype.h
24246 --- linux-orig/fs/gfs/ops_fstype.h 1969-12-31 18:00:00.000000000 -0600
24247 +++ linux-patched/fs/gfs/ops_fstype.h 2004-06-30 13:27:49.353708810 -0500
24249 +/******************************************************************************
24250 +*******************************************************************************
24252 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
24253 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
24255 +** This copyrighted material is made available to anyone wishing to use,
24256 +** modify, copy, or redistribute it subject to the terms and conditions
24257 +** of the GNU General Public License v.2.
24259 +*******************************************************************************
24260 +******************************************************************************/
24262 +#ifndef __OPS_FSTYPE_DOT_H__
24263 +#define __OPS_FSTYPE_DOT_H__
24265 +extern struct file_system_type gfs_fs_type;
24267 +#endif /* __OPS_FSTYPE_DOT_H__ */
24268 diff -urN linux-orig/fs/gfs/ops_inode.c linux-patched/fs/gfs/ops_inode.c
24269 --- linux-orig/fs/gfs/ops_inode.c 1969-12-31 18:00:00.000000000 -0600
24270 +++ linux-patched/fs/gfs/ops_inode.c 2004-06-30 13:27:49.354708578 -0500
24272 +/******************************************************************************
24273 +*******************************************************************************
24275 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
24276 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
24278 +** This copyrighted material is made available to anyone wishing to use,
24279 +** modify, copy, or redistribute it subject to the terms and conditions
24280 +** of the GNU General Public License v.2.
24282 +*******************************************************************************
24283 +******************************************************************************/
24285 +#include <linux/sched.h>
24286 +#include <linux/slab.h>
24287 +#include <linux/smp_lock.h>
24288 +#include <linux/spinlock.h>
24289 +#include <asm/semaphore.h>
24290 +#include <linux/completion.h>
24291 +#include <linux/buffer_head.h>
24292 +#include <linux/namei.h>
24293 +#include <linux/utsname.h>
24294 +#include <asm/uaccess.h>
24295 +#include <linux/xattr.h>
24296 +#include <linux/mm.h>
24297 +#include <linux/posix_acl.h>
24304 +#include "eattr.h"
24305 +#include "glock.h"
24306 +#include "inode.h"
24307 +#include "ops_dentry.h"
24308 +#include "ops_inode.h"
24310 +#include "quota.h"
24312 +#include "trans.h"
24313 +#include "unlinked.h"
24316 + * gfs_create - Create a file
24317 + * @dir: The directory in which to create the file
24318 + * @dentry: The dentry of the new file
24319 + * @mode: The mode of the new file
24321 + * Returns: 0 on success, -EXXXX on failure
24325 +gfs_create(struct inode *dir, struct dentry *dentry,
24326 + int mode, struct nameidata *nd)
24328 + struct gfs_inode *dip = vn2ip(dir), *ip;
24329 + struct gfs_sbd *sdp = dip->i_sbd;
24330 + struct gfs_holder d_gh, i_gh;
24331 + struct inode *inode;
24335 + atomic_inc(&sdp->sd_ops_inode);
24337 + gfs_unlinked_limit(sdp);
24339 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24342 + error = gfs_createi(&d_gh, &dentry->d_name,
24343 + GFS_FILE_REG, mode,
24347 + else if (error != -EEXIST) {
24348 + gfs_holder_uninit(&d_gh);
24352 + error = gfs_lookupi(&d_gh, &dentry->d_name,
24355 + if (i_gh.gh_gl) {
24360 + gfs_holder_uninit(&d_gh);
24365 + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24366 + ip = gl2ip(i_gh.gh_gl);
24369 + gfs_trans_end(sdp);
24370 + if (dip->i_alloc->al_rgd)
24371 + gfs_inplace_release(dip);
24372 + gfs_quota_unlock_m(dip);
24373 + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24374 + gfs_alloc_put(dip);
24376 + ip->i_creat_task = current;
24377 + ip->i_creat_pid = current->pid;
24380 + gfs_glock_dq_uninit(&d_gh);
24381 + gfs_glock_dq_uninit(&i_gh);
24383 + inode = gfs_iget(ip, CREATE);
24384 + gfs_inode_put(ip);
24389 + d_instantiate(dentry, inode);
24391 + mark_inode_dirty(inode);
24397 + * lookup_cdpn_sub_at - Maybe lookup a Context Dependent Pathname
24398 + * @sdp: the filesystem
24399 + * @dentry: the original dentry to lookup
24400 + * @new_dentry: the new dentry, if this was a substitutable path.
24405 +lookup_cdpn_sub_at(struct gfs_sbd *sdp, struct dentry *dentry,
24406 + struct dentry **new_dentry)
24408 + struct dentry *parent = dget_parent(dentry);
24409 + char *buf = gmalloc(2 * __NEW_UTS_LEN + 2);
24411 + if (gfs_filecmp(&dentry->d_name, "@hostname", 9))
24412 + *new_dentry = lookup_one_len(system_utsname.nodename,
24414 + strlen(system_utsname.nodename));
24415 + else if (gfs_filecmp(&dentry->d_name, "@mach", 5))
24416 + *new_dentry = lookup_one_len(system_utsname.machine,
24418 + strlen(system_utsname.machine));
24419 + else if (gfs_filecmp(&dentry->d_name, "@os", 3))
24420 + *new_dentry = lookup_one_len(system_utsname.sysname,
24422 + strlen(system_utsname.sysname));
24423 + else if (gfs_filecmp(&dentry->d_name, "@uid", 4))
24424 + *new_dentry = lookup_one_len(buf,
24426 + sprintf(buf, "%u", current->fsuid));
24427 + else if (gfs_filecmp(&dentry->d_name, "@gid", 4))
24428 + *new_dentry = lookup_one_len(buf,
24430 + sprintf(buf, "%u", current->fsgid));
24431 + else if (gfs_filecmp(&dentry->d_name, "@sys", 4))
24432 + *new_dentry = lookup_one_len(buf,
24434 + sprintf(buf, "%s_%s",
24435 + system_utsname.machine,
24436 + system_utsname.sysname));
24437 + else if (gfs_filecmp(&dentry->d_name, "@jid", 4))
24438 + *new_dentry = lookup_one_len(buf,
24440 + sprintf(buf, "%u",
24441 + sdp->sd_lockstruct.ls_jid));
24448 + * lookup_cdpn_sub_brace - Maybe lookup a Context Dependent Pathname
24449 + * @sdp: the filesystem
24450 + * @dentry: the original dentry to lookup
24451 + * @new_dentry: the new dentry, if this was a substitutable path.
24456 +lookup_cdpn_sub_brace(struct gfs_sbd *sdp, struct dentry *dentry,
24457 + struct dentry **new_dentry)
24459 + struct dentry *parent = dget_parent(dentry);
24460 + char *buf = gmalloc(2 * __NEW_UTS_LEN + 2);
24462 + if (gfs_filecmp(&dentry->d_name, "{hostname}", 10))
24463 + *new_dentry = lookup_one_len(system_utsname.nodename,
24465 + strlen(system_utsname.nodename));
24466 + else if (gfs_filecmp(&dentry->d_name, "{mach}", 6))
24467 + *new_dentry = lookup_one_len(system_utsname.machine,
24469 + strlen(system_utsname.machine));
24470 + else if (gfs_filecmp(&dentry->d_name, "{os}", 4))
24471 + *new_dentry = lookup_one_len(system_utsname.sysname,
24473 + strlen(system_utsname.sysname));
24474 + else if (gfs_filecmp(&dentry->d_name, "{uid}", 5))
24475 + *new_dentry = lookup_one_len(buf,
24477 + sprintf(buf, "%u", current->fsuid));
24478 + else if (gfs_filecmp(&dentry->d_name, "{gid}", 5))
24479 + *new_dentry = lookup_one_len(buf,
24481 + sprintf(buf, "%u", current->fsgid));
24482 + else if (gfs_filecmp(&dentry->d_name, "{sys}", 5))
24483 + *new_dentry = lookup_one_len(buf,
24485 + sprintf(buf, "%s_%s",
24486 + system_utsname.machine,
24487 + system_utsname.sysname));
24488 + else if (gfs_filecmp(&dentry->d_name, "{jid}", 5))
24489 + *new_dentry = lookup_one_len(buf,
24491 + sprintf(buf, "%u",
24492 + sdp->sd_lockstruct.ls_jid));
24499 + * gfs_lookup - Look up a filename in a directory and return its inode
24500 + * @dir: The directory inode
24501 + * @dentry: The dentry of the new inode
24503 + * Called by the VFS layer. Lock dir and call gfs_lookupi()
24505 + * Returns: 0 on success, -EXXXX on failure
24508 +static struct dentry *
24509 +gfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
24511 + struct gfs_inode *dip = vn2ip(dir), *ip;
24512 + struct gfs_holder d_gh, i_gh;
24513 + struct inode *inode = NULL;
24516 + atomic_inc(&dip->i_sbd->sd_ops_inode);
24518 + /* Do Context Dependent Path Name expansion */
24520 + if (*dentry->d_name.name == '@' && dentry->d_name.len > 1) {
24521 + struct dentry *new_dentry = NULL;
24522 + lookup_cdpn_sub_at(dip->i_sbd, dentry, &new_dentry);
24524 + return new_dentry;
24525 + } else if (*dentry->d_name.name == '{' && dentry->d_name.len > 2) {
24526 + struct dentry *new_dentry = NULL;
24527 + lookup_cdpn_sub_brace(dip->i_sbd, dentry, &new_dentry);
24529 + return new_dentry;
24532 + dentry->d_op = &gfs_dops;
24534 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24536 + error = gfs_lookupi(&d_gh, &dentry->d_name, FALSE, &i_gh);
24538 + gfs_holder_uninit(&d_gh);
24539 + return ERR_PTR(error);
24542 + if (i_gh.gh_gl) {
24543 + ip = gl2ip(i_gh.gh_gl);
24545 + gfs_glock_dq_uninit(&d_gh);
24546 + gfs_glock_dq_uninit(&i_gh);
24548 + inode = gfs_iget(ip, CREATE);
24549 + gfs_inode_put(ip);
24552 + return ERR_PTR(-ENOMEM);
24554 + gfs_holder_uninit(&d_gh);
24557 + return d_splice_alias(inode, dentry);
24558 + d_add(dentry, inode);
24563 + * gfs_link - Link to a file
24564 + * @old_dentry: The inode to link
24565 + * @dir: Add link to this directory
24566 + * @dentry: The name of the link
24568 + * Link the inode in "old_dentry" into the directory "dir" with the
24569 + * name in "dentry".
24571 + * Returns: 0 on success, -EXXXX on failure
24575 +gfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
24577 + struct gfs_inode *dip = vn2ip(dir);
24578 + struct gfs_sbd *sdp = dip->i_sbd;
24579 + struct inode *inode = old_dentry->d_inode;
24580 + struct gfs_inode *ip = vn2ip(inode);
24581 + struct gfs_alloc *al = NULL;
24582 + struct gfs_holder ghs[2];
24583 + int alloc_required;
24586 + atomic_inc(&sdp->sd_ops_inode);
24588 + if (ip->i_di.di_type == GFS_FILE_DIR)
24591 + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24592 + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
24594 + error = gfs_glock_nq_m(2, ghs);
24598 + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
24600 + goto fail_gunlock;
24602 + error = gfs_dir_search(dip, &dentry->d_name, NULL, NULL);
24609 + goto fail_gunlock;
24612 + if (!dip->i_di.di_nlink) {
24614 + goto fail_gunlock;
24616 + if (dip->i_di.di_entries == (uint32_t)-1) {
24618 + goto fail_gunlock;
24620 + if (!ip->i_di.di_nlink) {
24622 + goto fail_gunlock;
24624 + if (ip->i_di.di_nlink == (uint32_t)-1) {
24626 + goto fail_gunlock;
24629 + error = gfs_diradd_alloc_required(dip, &dentry->d_name, &alloc_required);
24631 + goto fail_gunlock;
24633 + if (alloc_required) {
24634 + al = gfs_alloc_get(dip);
24636 + error = gfs_quota_lock_m(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
24640 + error = gfs_quota_check(dip, dip->i_di.di_uid, dip->i_di.di_gid);
24642 + goto fail_gunlock_q;
24644 + al->al_requested_meta = sdp->sd_max_dirres;
24646 + error = gfs_inplace_reserve(dip);
24648 + goto fail_gunlock_q;
24650 + /* Trans may require:
24651 + two dinode blocks, directory modifications to add an entry,
24652 + RG bitmap blocks to allocate from, and quota change */
24654 + error = gfs_trans_begin(sdp,
24655 + 2 + sdp->sd_max_dirres +
24656 + al->al_rgd->rd_ri.ri_length,
24661 + /* Trans may require:
24662 + Two dinode blocks and a leaf block. */
24664 + error = gfs_trans_begin(sdp, 3, 0);
24669 + error = gfs_dir_add(dip, &dentry->d_name, &ip->i_num, ip->i_di.di_type);
24671 + goto fail_end_trans;
24673 + error = gfs_change_nlink(ip, +1);
24675 + goto fail_end_trans;
24677 + gfs_trans_end(sdp);
24679 + if (alloc_required) {
24680 + GFS_ASSERT_INODE(al->al_alloced_meta, dip,);
24681 + gfs_inplace_release(dip);
24682 + gfs_quota_unlock_m(dip);
24683 + gfs_alloc_put(dip);
24686 + gfs_glock_dq_m(2, ghs);
24688 + gfs_holder_uninit(&ghs[0]);
24689 + gfs_holder_uninit(&ghs[1]);
24691 + atomic_inc(&inode->i_count);
24693 + d_instantiate(dentry, inode);
24694 + mark_inode_dirty(inode);
24699 + gfs_trans_end(sdp);
24702 + if (alloc_required)
24703 + gfs_inplace_release(dip);
24706 + if (alloc_required)
24707 + gfs_quota_unlock_m(dip);
24710 + if (alloc_required)
24711 + gfs_alloc_put(dip);
24714 + gfs_glock_dq_m(2, ghs);
24717 + gfs_holder_uninit(&ghs[0]);
24718 + gfs_holder_uninit(&ghs[1]);
24724 + * gfs_unlink - Unlink a file
24725 + * @dir: The inode of the directory containing the file to unlink
24726 + * @dentry: The file itself
24728 + * Unlink a file. Call gfs_unlinki()
24730 + * Returns: 0 on success, -EXXXX on failure
24734 +gfs_unlink(struct inode *dir, struct dentry *dentry)
24736 + struct gfs_inode *dip = vn2ip(dir);
24737 + struct gfs_sbd *sdp = dip->i_sbd;
24738 + struct gfs_inode *ip = vn2ip(dentry->d_inode);
24739 + struct gfs_holder ghs[2];
24742 + atomic_inc(&sdp->sd_ops_inode);
24744 + gfs_unlinked_limit(sdp);
24746 + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24747 + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
24749 + error = gfs_glock_nq_m(2, ghs);
24753 + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
24755 + goto fail_gunlock;
24757 + if ((dip->i_di.di_mode & S_ISVTX) &&
24758 + dip->i_di.di_uid != current->fsuid &&
24759 + ip->i_di.di_uid != current->fsuid &&
24760 + !capable(CAP_FOWNER)) {
24762 + goto fail_gunlock;
24765 + error = gfs_revalidate(dip, &dentry->d_name, ip);
24767 + goto fail_gunlock;
24769 + /* Trans may require:
24770 + Two dinode blocks and one modified directory leaf block
24771 + and one unlinked tag. */
24773 + error = gfs_trans_begin(sdp, 3, 1);
24775 + goto fail_gunlock;
24777 + error = gfs_unlinki(dip, &dentry->d_name, ip);
24779 + goto fail_end_trans;
24781 + gfs_trans_end(sdp);
24783 + gfs_glock_dq_m(2, ghs);
24785 + gfs_holder_uninit(&ghs[0]);
24786 + gfs_holder_uninit(&ghs[1]);
24791 + gfs_trans_end(sdp);
24794 + gfs_glock_dq_m(2, ghs);
24797 + gfs_holder_uninit(&ghs[0]);
24798 + gfs_holder_uninit(&ghs[1]);
24804 + * gfs_symlink - Create a symlink
24805 + * @dir: The directory to create the symlink in
24806 + * @dentry: The dentry to put the symlink in
24807 + * @symname: The thing which the link points to
24809 + * Returns: 0 on success, -EXXXX on failure
24813 +gfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
24815 + struct gfs_inode *dip = vn2ip(dir), *ip;
24816 + struct gfs_sbd *sdp = dip->i_sbd;
24817 + struct gfs_holder d_gh, i_gh;
24818 + struct inode *inode;
24819 + struct buffer_head *dibh;
24823 + atomic_inc(&sdp->sd_ops_inode);
24825 + gfs_unlinked_limit(sdp);
24827 + /* Must be stuffed with a null terminator for gfs_follow_link() */
24828 + size = strlen(symname);
24829 + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode) - 1)
24830 + return -ENAMETOOLONG;
24832 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24834 + error = gfs_createi(&d_gh, &dentry->d_name,
24835 + GFS_FILE_LNK, 0777,
24838 + gfs_holder_uninit(&d_gh);
24842 + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24843 + ip = gl2ip(i_gh.gh_gl);
24845 + ip->i_di.di_size = size;
24847 + error = gfs_get_inode_buffer(ip, &dibh);
24848 + GFS_ASSERT_INODE(!error, ip,);
24850 + gfs_dinode_out(&ip->i_di, dibh->b_data);
24851 + memcpy(dibh->b_data + sizeof(struct gfs_dinode), symname, size);
24855 + gfs_trans_end(sdp);
24856 + if (dip->i_alloc->al_rgd)
24857 + gfs_inplace_release(dip);
24858 + gfs_quota_unlock_m(dip);
24859 + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24860 + gfs_alloc_put(dip);
24862 + gfs_glock_dq_uninit(&d_gh);
24863 + gfs_glock_dq_uninit(&i_gh);
24865 + inode = gfs_iget(ip, CREATE);
24866 + gfs_inode_put(ip);
24871 + d_instantiate(dentry, inode);
24872 + mark_inode_dirty(inode);
24878 + * gfs_mkdir - Make a directory
24879 + * @dir: The parent directory of the new one
24880 + * @dentry: The dentry of the new directory
24881 + * @mode: The mode of the new directory
24883 + * Returns: 0 on success, -EXXXX on failure
24887 +gfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
24889 + struct gfs_inode *dip = vn2ip(dir), *ip;
24890 + struct gfs_sbd *sdp = dip->i_sbd;
24891 + struct gfs_holder d_gh, i_gh;
24892 + struct inode *inode;
24893 + struct buffer_head *dibh;
24894 + struct gfs_dinode *di;
24895 + struct gfs_dirent *dent;
24898 + atomic_inc(&sdp->sd_ops_inode);
24900 + gfs_unlinked_limit(sdp);
24902 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
24904 + error = gfs_createi(&d_gh, &dentry->d_name,
24905 + GFS_FILE_DIR, mode,
24908 + gfs_holder_uninit(&d_gh);
24912 + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
24913 + ip = gl2ip(i_gh.gh_gl);
24915 + ip->i_di.di_nlink = 2;
24916 + ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
24917 + ip->i_di.di_flags |= GFS_DIF_JDATA;
24918 + ip->i_di.di_payload_format = GFS_FORMAT_DE;
24919 + ip->i_di.di_entries = 2;
24921 + error = gfs_get_inode_buffer(ip, &dibh);
24922 + GFS_ASSERT_INODE(!error, ip,);
24924 + di = (struct gfs_dinode *)dibh->b_data;
24926 + error = gfs_dirent_alloc(ip, dibh, 1, &dent);
24927 + GFS_ASSERT_INODE(!error, ip,); /* This should never fail */
24929 + dent->de_inum = di->di_num; /* already GFS endian */
24930 + dent->de_hash = gfs_dir_hash(".", 1);
24931 + dent->de_hash = cpu_to_gfs32(dent->de_hash);
24932 + dent->de_type = cpu_to_gfs16(GFS_FILE_DIR);
24933 + memcpy((char *) (dent + 1), ".", 1);
24934 + di->di_entries = cpu_to_gfs32(1);
24936 + error = gfs_dirent_alloc(ip, dibh, 2, &dent);
24937 + GFS_ASSERT_INODE(!error, ip,); /* This should never fail */
24939 + gfs_inum_out(&dip->i_num, (char *) &dent->de_inum);
24940 + dent->de_hash = gfs_dir_hash("..", 2);
24941 + dent->de_hash = cpu_to_gfs32(dent->de_hash);
24942 + dent->de_type = cpu_to_gfs16(GFS_FILE_DIR);
24943 + memcpy((char *) (dent + 1), "..", 2);
24945 + gfs_dinode_out(&ip->i_di, (char *)di);
24949 + error = gfs_change_nlink(dip, +1);
24950 + GFS_ASSERT_INODE(!error, dip,); /* dip already pinned */
24952 + gfs_trans_end(sdp);
24953 + if (dip->i_alloc->al_rgd)
24954 + gfs_inplace_release(dip);
24955 + gfs_quota_unlock_m(dip);
24956 + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
24957 + gfs_alloc_put(dip);
24959 + gfs_glock_dq_uninit(&d_gh);
24960 + gfs_glock_dq_uninit(&i_gh);
24962 + inode = gfs_iget(ip, CREATE);
24963 + gfs_inode_put(ip);
24968 + d_instantiate(dentry, inode);
24969 + mark_inode_dirty(inode);
24975 + * gfs_rmdir - Remove a directory
24976 + * @dir: The parent directory of the directory to be removed
24977 + * @dentry: The dentry of the directory to remove
24979 + * Remove a directory. Call gfs_rmdiri()
24981 + * Returns: 0 on success, -EXXXX on failure
24985 +gfs_rmdir(struct inode *dir, struct dentry *dentry)
24987 + struct gfs_inode *dip = vn2ip(dir);
24988 + struct gfs_sbd *sdp = dip->i_sbd;
24989 + struct gfs_inode *ip = vn2ip(dentry->d_inode);
24990 + struct gfs_holder ghs[2];
24993 + atomic_inc(&sdp->sd_ops_inode);
24995 + gfs_unlinked_limit(sdp);
24997 + gfs_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
24998 + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
25000 + error = gfs_glock_nq_m(2, ghs);
25004 + error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
25006 + goto fail_gunlock;
25008 + if ((dip->i_di.di_mode & S_ISVTX) &&
25009 + dip->i_di.di_uid != current->fsuid &&
25010 + ip->i_di.di_uid != current->fsuid &&
25011 + !capable(CAP_FOWNER)) {
25013 + goto fail_gunlock;
25016 + error = gfs_revalidate(dip, &dentry->d_name, ip);
25018 + goto fail_gunlock;
25020 + GFS_ASSERT_INODE(ip->i_di.di_entries >= 2, ip,
25021 + gfs_dinode_print(&ip->i_di););
25023 + if (ip->i_di.di_entries > 2) {
25024 + error = -ENOTEMPTY;
25025 + goto fail_gunlock;
25028 + /* Trans may require:
25029 + Two dinode blocks, one directory leaf block containing the
25030 + entry to be rmdired, two leaf blocks containing . and .. of
25031 + the directory being rmdired, and one unlinked tag */
25033 + error = gfs_trans_begin(sdp, 5, 1);
25035 + goto fail_gunlock;
25037 + error = gfs_rmdiri(dip, &dentry->d_name, ip);
25039 + goto fail_end_trans;
25041 + gfs_trans_end(sdp);
25043 + gfs_glock_dq_m(2, ghs);
25045 + gfs_holder_uninit(&ghs[0]);
25046 + gfs_holder_uninit(&ghs[1]);
25051 + gfs_trans_end(sdp);
25054 + gfs_glock_dq_m(2, ghs);
25057 + gfs_holder_uninit(&ghs[0]);
25058 + gfs_holder_uninit(&ghs[1]);
25064 + * gfs_mknod - Make a special file
25065 + * @dir: The directory in which the special file will reside
25066 + * @dentry: The dentry of the special file
25067 + * @mode: The mode of the special file
25068 + * @rdev: The device specification of the special file
25073 +gfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
25075 + struct gfs_inode *dip = vn2ip(dir), *ip;
25076 + struct gfs_sbd *sdp = dip->i_sbd;
25077 + struct gfs_holder d_gh, i_gh;
25078 + struct inode *inode;
25079 + struct buffer_head *dibh;
25080 + uint16_t type = 0;
25081 + uint32_t major = 0, minor = 0;
25084 + atomic_inc(&sdp->sd_ops_inode);
25086 + gfs_unlinked_limit(sdp);
25088 + switch (mode & S_IFMT) {
25090 + type = GFS_FILE_BLK;
25091 + major = MAJOR(dev);
25092 + minor = MINOR(dev);
25095 + type = GFS_FILE_CHR;
25096 + major = MAJOR(dev);
25097 + minor = MINOR(dev);
25100 + type = GFS_FILE_FIFO;
25103 + type = GFS_FILE_SOCK;
25106 + GFS_ASSERT_SBD(FALSE, sdp,
25107 + printk("mode = %d\n", mode););
25111 + gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
25113 + error = gfs_createi(&d_gh, &dentry->d_name,
25117 + gfs_holder_uninit(&d_gh);
25121 + GFS_ASSERT_SBD(i_gh.gh_gl, sdp,);
25122 + ip = gl2ip(i_gh.gh_gl);
25124 + ip->i_di.di_major = major;
25125 + ip->i_di.di_minor = minor;
25127 + error = gfs_get_inode_buffer(ip, &dibh);
25128 + GFS_ASSERT_INODE(!error, ip,);
25130 + gfs_dinode_out(&ip->i_di, dibh->b_data);
25134 + gfs_trans_end(sdp);
25135 + if (dip->i_alloc->al_rgd)
25136 + gfs_inplace_release(dip);
25137 + gfs_quota_unlock_m(dip);
25138 + gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
25139 + gfs_alloc_put(dip);
25141 + gfs_glock_dq_uninit(&d_gh);
25142 + gfs_glock_dq_uninit(&i_gh);
25144 + inode = gfs_iget(ip, CREATE);
25145 + gfs_inode_put(ip);
25150 + d_instantiate(dentry, inode);
25151 + mark_inode_dirty(inode);
25157 + * gfs_rename - Rename a file
25158 + * @odir: Parent directory of old file name
25159 + * @odentry: The old dentry of the file
25160 + * @ndir: Parent directory of new file name
25161 + * @ndentry: The new dentry of the file
25163 + * Returns: 0 on success, -EXXXX on failure
25167 +gfs_rename(struct inode *odir, struct dentry *odentry,
25168 + struct inode *ndir, struct dentry *ndentry)
25170 + struct gfs_inode *odip = vn2ip(odir);
25171 + struct gfs_inode *ndip = vn2ip(ndir);
25172 + struct gfs_inode *ip = vn2ip(odentry->d_inode);
25173 + struct gfs_inode *nip = NULL;
25174 + struct gfs_sbd *sdp = odip->i_sbd;
25175 + struct qstr name;
25176 + struct gfs_alloc *al;
25177 + struct gfs_holder ghs[4], r_gh;
25178 + unsigned int num_gh;
25179 + int dir_rename = FALSE;
25180 + int alloc_required;
25184 + atomic_inc(&sdp->sd_ops_inode);
25186 + gfs_unlinked_limit(sdp);
25188 + if (ndentry->d_inode) {
25189 + nip = vn2ip(ndentry->d_inode);
25194 + /* Make sure we aren't trying to move a dirctory into it's subdir */
25196 + if (ip->i_di.di_type == GFS_FILE_DIR && odip != ndip) {
25197 + dir_rename = TRUE;
25199 + error = gfs_glock_nq_init(sdp->sd_rename_gl,
25200 + LM_ST_EXCLUSIVE, 0,
25205 + error = gfs_ok_to_move(ip, ndip);
25210 + gfs_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[0]);
25211 + gfs_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[1]);
25215 + gfs_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]);
25218 + gfs_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh++]);
25220 + error = gfs_glock_nq_m(num_gh, ghs);
25222 + goto fail_uninit;
25224 + /* Check out the old directory */
25226 + error = permission(odir, MAY_WRITE | MAY_EXEC, NULL);
25228 + goto fail_gunlock;
25230 + if ((odip->i_di.di_mode & S_ISVTX) &&
25231 + odip->i_di.di_uid != current->fsuid &&
25232 + ip->i_di.di_uid != current->fsuid &&
25233 + !capable(CAP_FOWNER)) {
25235 + goto fail_gunlock;
25238 + error = gfs_revalidate(odip, &odentry->d_name, ip);
25240 + goto fail_gunlock;
25242 + /* Check out the new directory */
25244 + error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
25246 + goto fail_gunlock;
25249 + if ((ndip->i_di.di_mode & S_ISVTX) &&
25250 + ndip->i_di.di_uid != current->fsuid &&
25251 + nip->i_di.di_uid != current->fsuid &&
25252 + !capable(CAP_FOWNER)) {
25254 + goto fail_gunlock;
25257 + error = gfs_revalidate(ndip, &ndentry->d_name, nip);
25259 + goto fail_gunlock;
25261 + if (nip->i_di.di_type == GFS_FILE_DIR) {
25262 + GFS_ASSERT_INODE(nip->i_di.di_entries >= 2, ip,
25263 + gfs_dinode_print(&nip->i_di););
25264 + if (nip->i_di.di_entries > 2) {
25265 + error = -ENOTEMPTY;
25266 + goto fail_gunlock;
25270 + error = gfs_dir_search(ndip, &ndentry->d_name, NULL, NULL);
25278 + goto fail_gunlock;
25281 + if (odip != ndip) {
25282 + if (!ndip->i_di.di_nlink) {
25284 + goto fail_gunlock;
25286 + if (ndip->i_di.di_entries == (uint32_t)-1) {
25288 + goto fail_gunlock;
25290 + if (ip->i_di.di_type == GFS_FILE_DIR &&
25291 + ndip->i_di.di_nlink == (uint32_t)-1) {
25293 + goto fail_gunlock;
25298 + error = gfs_diradd_alloc_required(ndip, &ndentry->d_name, &alloc_required);
25300 + goto fail_gunlock;
25302 + if (alloc_required) {
25303 + al = gfs_alloc_get(ndip);
25305 + error = gfs_quota_lock_m(ndip,
25306 + NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
25310 + error = gfs_quota_check(ndip, ndip->i_di.di_uid, ndip->i_di.di_gid);
25312 + goto fail_gunlock_q;
25314 + al->al_requested_meta = sdp->sd_max_dirres;
25316 + error = gfs_inplace_reserve(ndip);
25318 + goto fail_gunlock_q;
25320 + /* Trans may require:
25321 + Dinodes for the srcdir, srcino, dstdir, dstino. Blocks for
25322 + adding the entry to dstdir. RG bitmaps for that allocation.
25323 + One leaf block in the srcdir for removal of the entry.
25324 + One leaf block for changing .. in srcino (if it's a directory).
25325 + Two leaf blocks for removing . and .. from dstino (if it exists
25326 + and it's a directory), one unlinked tag, and one quota block. */
25328 + error = gfs_trans_begin(sdp,
25329 + 8 + sdp->sd_max_dirres +
25330 + al->al_rgd->rd_ri.ri_length,
25335 + /* Trans may require:
25336 + Dinodes for the srcdir, srcino, dstdir, dstino. One block for
25337 + adding the entry to dstdir.
25338 + One leaf block in the srcdir for removal of the entry.
25339 + One leaf block for changing .. in srcino (if it's a directory).
25340 + Two leaf blocks for removing . and .. from dstino (if it exists
25341 + and it's a directory), and one unlinked tag. */
25343 + error = gfs_trans_begin(sdp, 9, 1);
25348 + /* Remove the target file, if it exists */
25351 + if (nip->i_di.di_type == GFS_FILE_DIR)
25352 + error = gfs_rmdiri(ndip, &ndentry->d_name, nip);
25354 + error = gfs_unlinki(ndip, &ndentry->d_name, nip);
25357 + goto fail_end_trans;
25360 + if (dir_rename) {
25361 + error = gfs_change_nlink(ndip, +1);
25363 + goto fail_end_trans;
25364 + error = gfs_change_nlink(odip, -1);
25366 + goto fail_end_trans;
25369 + name.name = "..";
25371 + error = gfs_dir_mvino(ip, &name, &ndip->i_num, GFS_FILE_DIR);
25373 + goto fail_end_trans;
25376 + error = gfs_dir_del(odip, &odentry->d_name);
25378 + goto fail_end_trans;
25380 + error = gfs_dir_add(ndip, &ndentry->d_name, &ip->i_num, ip->i_di.di_type);
25382 + goto fail_end_trans;
25385 + gfs_trans_add_gl(sdp->sd_rename_gl);
25387 + gfs_trans_end(sdp);
25389 + if (alloc_required) {
25390 + /* Don't check al->al_alloced_meta and friends. */
25391 + gfs_inplace_release(ndip);
25392 + gfs_quota_unlock_m(ndip);
25393 + gfs_alloc_put(ndip);
25396 + gfs_glock_dq_m(num_gh, ghs);
25398 + for (x = 0; x < num_gh; x++)
25399 + gfs_holder_uninit(&ghs[x]);
25402 + gfs_glock_dq_uninit(&r_gh);
25407 + gfs_trans_end(sdp);
25410 + if (alloc_required)
25411 + gfs_inplace_release(ndip);
25414 + if (alloc_required)
25415 + gfs_quota_unlock_m(ndip);
25418 + if (alloc_required)
25419 + gfs_alloc_put(ndip);
25422 + gfs_glock_dq_m(num_gh, ghs);
25425 + for (x = 0; x < num_gh; x++)
25426 + gfs_holder_uninit(&ghs[x]);
25430 + gfs_glock_dq_uninit(&r_gh);
25436 + * gfs_readlink - Read the value of a symlink
25437 + * @dentry: the symlink
25438 + * @buf: the buffer to read the symlink data into
25439 + * @size: the size of the buffer
25441 + * Returns: 0 on success, -EXXX on failure
25445 +gfs_readlink(struct dentry *dentry, char *user_buf, int user_size)
25447 + struct gfs_inode *ip = vn2ip(dentry->d_inode);
25448 + char array[GFS_FAST_NAME_SIZE], *buf = array;
25449 + unsigned int len = GFS_FAST_NAME_SIZE;
25452 + atomic_inc(&ip->i_sbd->sd_ops_inode);
25454 + error = gfs_readlinki(ip, &buf, &len);
25458 + GFS_ASSERT_INODE(len, ip,);
25460 + if (user_size > len - 1)
25461 + user_size = len - 1;
25463 + if (copy_to_user(user_buf, buf, user_size))
25466 + error = user_size;
25468 + if (buf != array)
25475 + * gfs_follow_link - Follow a symbolic link
25476 + * @dentry: The dentry of the link
25477 + * @nd: Data that we pass to vfs_follow_link()
25479 + * This can handle symlinks of any size. It is optimised for symlinks
25480 + * under GFS_FAST_NAME_SIZE.
25482 + * Returns: 0 on success or error code
25486 +gfs_follow_link(struct dentry *dentry, struct nameidata *nd)
25488 + struct gfs_inode *ip = vn2ip(dentry->d_inode);
25489 + char array[GFS_FAST_NAME_SIZE], *buf = array;
25490 + unsigned int len = GFS_FAST_NAME_SIZE;
25493 + atomic_inc(&ip->i_sbd->sd_ops_inode);
25495 + error = gfs_readlinki(ip, &buf, &len);
25497 + error = vfs_follow_link(nd, buf);
25498 + if (buf != array)
25506 + * gfs_permission -
25515 +gfs_permission(struct inode *inode, int mask, struct nameidata *nd)
25517 + struct gfs_inode *ip = vn2ip(inode);
25518 + struct gfs_holder i_gh;
25519 + struct posix_acl *acl;
25520 + umode_t mode = inode->i_mode;
25523 + atomic_inc(&ip->i_sbd->sd_ops_inode);
25525 + error = gfs_glock_nq_init(ip->i_gl,
25526 + LM_ST_SHARED, LM_FLAG_ANY,
25531 + if (mask & MAY_WRITE) {
25532 + if (IS_RDONLY(inode) &&
25533 + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
25537 + if (IS_IMMUTABLE(inode)) {
25543 + if (capable(CAP_DAC_OVERRIDE))
25544 + if (!(mask & MAY_EXEC) || (mode & S_IXUGO))
25547 + if (capable(CAP_DAC_READ_SEARCH) &&
25548 + (mask == MAY_READ ||
25549 + (!(mask & MAY_WRITE) && S_ISDIR(mode))))
25552 + if (inode->i_uid == current->fsuid) {
25553 + if ((mask & (mode >> 6)) != mask)
25558 + if ((mask & (mode >> 3)) == mask) {
25559 + error = gfs_getacl(inode, TRUE, &acl);
25561 + error = posix_acl_permission(inode, acl, mask);
25563 + } else if (error && error != -ENODATA)
25566 + if (in_group_p(inode->i_gid)) {
25570 + } else if (in_group_p(inode->i_gid)) {
25575 + if ((mask & mode) == mask)
25581 + gfs_glock_dq_uninit(&i_gh);
25587 + * gfs_setattr - Change attributes on an inode
25588 + * @dentry: The dentry which is changing
25589 + * @attr: The structure describing the change
25591 + * The VFS layer wants to change one or more of an inodes attributes. Write
25592 + * that change out to disk.
25594 + * Returns: 0 on success, -EXXXX on failure
25598 +gfs_setattr(struct dentry *dentry, struct iattr *attr)
25600 + struct inode *inode = dentry->d_inode;
25601 + struct gfs_inode *ip = vn2ip(inode);
25602 + struct gfs_sbd *sdp = ip->i_sbd;
25603 + struct gfs_holder i_gh;
25604 + struct gfs_alloc *al;
25605 + struct buffer_head *dibh;
25606 + uint32_t ouid, ogid, nuid, ngid;
25609 + atomic_inc(&sdp->sd_ops_inode);
25611 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
25615 + error = inode_change_ok(inode, attr);
25619 + if (attr->ia_valid & ATTR_SIZE) {
25620 + error = permission(inode, MAY_WRITE, NULL);
25624 + if (attr->ia_size != ip->i_di.di_size) {
25625 + error = vmtruncate(inode, attr->ia_size);
25630 + error = gfs_truncatei(ip, attr->ia_size, gfs_truncator_page);
25634 + if ((sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) &&
25635 + !gfs_is_jdata(ip))
25636 + i_gh.gh_flags |= GL_SYNC;
25639 + else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) {
25640 + ouid = ip->i_di.di_uid;
25641 + ogid = ip->i_di.di_gid;
25642 + nuid = attr->ia_uid;
25643 + ngid = attr->ia_gid;
25645 + if (!(attr->ia_valid & ATTR_UID) || ouid == nuid)
25646 + ouid = nuid = NO_QUOTA_CHANGE;
25647 + if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
25648 + ogid = ngid = NO_QUOTA_CHANGE;
25650 + al = gfs_alloc_get(ip);
25652 + error = gfs_quota_lock_m(ip, nuid, ngid);
25656 + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
25657 + error = gfs_quota_check(ip, nuid, ngid);
25659 + goto fail_gunlock_q;
25662 + /* Trans may require:
25663 + one dinode block and one quota change block */
25665 + error = gfs_trans_begin(sdp, 1, 1);
25667 + goto fail_gunlock_q;
25669 + error = gfs_get_inode_buffer(ip, &dibh);
25671 + goto fail_end_trans;
25673 + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
25674 + gfs_trans_add_quota(sdp, -ip->i_di.di_blocks,
25676 + gfs_trans_add_quota(sdp, ip->i_di.di_blocks,
25680 + inode_setattr(inode, attr);
25681 + gfs_inode_attr_out(ip);
25683 + gfs_trans_add_bh(ip->i_gl, dibh);
25684 + gfs_dinode_out(&ip->i_di, dibh->b_data);
25687 + gfs_trans_end(sdp);
25689 + gfs_quota_unlock_m(ip);
25690 + gfs_alloc_put(ip);
25694 + /* Trans may require:
25695 + one dinode block plus changes for acl. */
25697 + error = gfs_trans_begin(sdp,
25698 + 1 + GFS_MAX_EA_ACL_BLKS, 0);
25702 + error = gfs_get_inode_buffer(ip, &dibh);
25704 + inode_setattr(inode, attr);
25705 + gfs_inode_attr_out(ip);
25707 + if (attr->ia_valid & ATTR_MODE)
25708 + error = gfs_acl_setattr(inode);
25710 + gfs_trans_add_bh(ip->i_gl, dibh);
25711 + gfs_dinode_out(&ip->i_di, dibh->b_data);
25715 + gfs_trans_end(sdp);
25718 + gfs_glock_dq_uninit(&i_gh);
25720 + mark_inode_dirty(inode);
25725 + gfs_trans_end(sdp);
25728 + gfs_quota_unlock_m(ip);
25731 + gfs_alloc_put(ip);
25734 + gfs_glock_dq_uninit(&i_gh);
25740 + * gfs_getattr - Read out an inode's attributes
25742 + * @dentry: The dentry to stat
25743 + * @stat: The inode's stats
25745 + * Returns: 0 on success, -EXXXX on failure
25749 +gfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
25751 + struct inode *inode = dentry->d_inode;
25752 + struct gfs_inode *ip = vn2ip(inode);
25753 + struct gfs_holder gh;
25756 + atomic_inc(&ip->i_sbd->sd_ops_inode);
25758 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
25761 + generic_fillattr(inode, stat);
25762 + gfs_glock_dq_uninit(&gh);
25769 + * get_eatype - get the type of the ea, and trucate the type from the name
25770 + * @namep: ea name, possibly with type appended
25772 + * Returns: GFS_EATYPE_XXX
25776 +get_eatype(const char *name, char **truncated_name)
25780 + if (strncmp(name, "system.", 7) == 0) {
25781 + type = GFS_EATYPE_SYS;
25782 + *truncated_name = strchr(name, '.') + 1;
25783 + } else if (strncmp(name, "user.", 5) == 0) {
25784 + type = GFS_EATYPE_USR;
25785 + *truncated_name = strchr(name, '.') + 1;
25787 + type = GFS_EATYPE_UNUSED;
25788 + *truncated_name = NULL;
25795 + * gfs_setxattr - Set (or create or replace) an inode's extended attribute
25796 + * @dentry: inode's dentry
25797 + * @name: name of the extended attribute
25798 + * @data: the value of the extended attribute
25799 + * @size: the size of data
25800 + * @flags: used to specify create or replace actions
25802 + * Returns: 0 on success, -EXXX on error
25806 +gfs_setxattr(struct dentry *dentry, const char *name,
25807 + const void *data, size_t size,
25810 + struct inode *inode = dentry->d_inode;
25811 + struct gfs_inode *ip = vn2ip(inode);
25812 + struct gfs_sbd *sdp = ip->i_sbd;
25813 + struct gfs_easet_io req;
25814 + char *truncated_name;
25817 + atomic_inc(&sdp->sd_ops_inode);
25819 + req.es_type = get_eatype(name, &truncated_name);
25821 + if (req.es_type == GFS_EATYPE_UNUSED)
25822 + error = -EOPNOTSUPP;
25824 + req.es_data = data;
25825 + req.es_name = truncated_name;
25826 + req.es_data_len = size;
25827 + req.es_name_len = strlen(truncated_name);
25828 + if (flags & XATTR_CREATE)
25829 + req.es_cmd = GFS_EACMD_CREATE;
25830 + else if (flags & XATTR_REPLACE)
25831 + req.es_cmd = GFS_EACMD_REPLACE;
25833 + req.es_cmd = GFS_EACMD_SET;
25834 + error = gfs_set_eattr(sdp, ip, &req);
25847 + * Returns: 0 on success, -EXXX on error
25851 +gfs_getxattr(struct dentry *dentry, const char *name,
25852 + void *data, size_t size)
25854 + struct inode *inode = dentry->d_inode;
25855 + struct gfs_inode *ip = vn2ip(inode);
25856 + struct gfs_sbd *sdp = ip->i_sbd;
25857 + struct gfs_eaget_io req;
25858 + char *truncated_name;
25861 + atomic_inc(&sdp->sd_ops_inode);
25863 + req.eg_type = get_eatype(name, &truncated_name);
25865 + if (req.eg_type == GFS_EATYPE_UNUSED)
25866 + error = -EOPNOTSUPP;
25868 + req.eg_name = truncated_name;
25869 + req.eg_name_len = strlen(truncated_name);
25870 + req.eg_data = data;
25871 + req.eg_data_len = size;
25872 + req.eg_len = NULL;
25873 + error = gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
25880 + * gfs_listxattr -
25885 + * Returns: 0 on success, -EXXX on error
25889 +gfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
25891 + struct inode *inode = dentry->d_inode;
25892 + struct gfs_inode *ip = vn2ip(inode);
25893 + struct gfs_sbd *sdp = ip->i_sbd;
25894 + struct gfs_eaget_io req;
25896 + atomic_inc(&sdp->sd_ops_inode);
25899 + req.eg_name = NULL;
25900 + req.eg_name_len = 0;
25901 + req.eg_data = buffer;
25902 + req.eg_data_len = size;
25903 + req.eg_len = NULL;
25905 + return gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
25909 + * gfs_removexattr -
25913 + * Returns: 0 on success, -EXXX on error
25917 +gfs_removexattr(struct dentry *dentry, const char *name)
25919 + struct inode *inode = dentry->d_inode;
25920 + struct gfs_inode *ip = vn2ip(inode);
25921 + struct gfs_sbd *sdp = ip->i_sbd;
25922 + struct gfs_easet_io req;
25923 + char *truncated_name;
25926 + atomic_inc(&sdp->sd_ops_inode);
25928 + req.es_type = get_eatype(name, &truncated_name);
25930 + if (req.es_type == GFS_EATYPE_UNUSED)
25931 + error = -EOPNOTSUPP;
25933 + req.es_name = truncated_name;
25934 + req.es_data = NULL;
25935 + req.es_data_len = 0;
25936 + req.es_name_len = strlen(truncated_name);
25937 + req.es_cmd = GFS_EACMD_REMOVE;
25938 + error = gfs_set_eattr(sdp, ip, &req);
25944 +struct inode_operations gfs_file_iops = {
25945 + .permission = gfs_permission,
25946 + .setattr = gfs_setattr,
25947 + .getattr = gfs_getattr,
25948 + .setxattr = gfs_setxattr,
25949 + .getxattr = gfs_getxattr,
25950 + .listxattr = gfs_listxattr,
25951 + .removexattr = gfs_removexattr,
25954 +struct inode_operations gfs_dev_iops = {
25955 + .permission = gfs_permission,
25956 + .setattr = gfs_setattr,
25957 + .getattr = gfs_getattr,
25958 + .setxattr = gfs_setxattr,
25959 + .getxattr = gfs_getxattr,
25960 + .listxattr = gfs_listxattr,
25961 + .removexattr = gfs_removexattr,
25964 +struct inode_operations gfs_dir_iops = {
25965 + .create = gfs_create,
25966 + .lookup = gfs_lookup,
25967 + .link = gfs_link,
25968 + .unlink = gfs_unlink,
25969 + .symlink = gfs_symlink,
25970 + .mkdir = gfs_mkdir,
25971 + .rmdir = gfs_rmdir,
25972 + .mknod = gfs_mknod,
25973 + .rename = gfs_rename,
25974 + .permission = gfs_permission,
25975 + .setattr = gfs_setattr,
25976 + .getattr = gfs_getattr,
25977 + .setxattr = gfs_setxattr,
25978 + .getxattr = gfs_getxattr,
25979 + .listxattr = gfs_listxattr,
25980 + .removexattr = gfs_removexattr,
25983 +struct inode_operations gfs_symlink_iops = {
25984 + .readlink = gfs_readlink,
25985 + .follow_link = gfs_follow_link,
25986 + .permission = gfs_permission,
25987 + .setattr = gfs_setattr,
25988 + .getattr = gfs_getattr,
25989 + .setxattr = gfs_setxattr,
25990 + .getxattr = gfs_getxattr,
25991 + .listxattr = gfs_listxattr,
25992 + .removexattr = gfs_removexattr,
25995 diff -urN linux-orig/fs/gfs/ops_inode.h linux-patched/fs/gfs/ops_inode.h
25996 --- linux-orig/fs/gfs/ops_inode.h 1969-12-31 18:00:00.000000000 -0600
25997 +++ linux-patched/fs/gfs/ops_inode.h 2004-06-30 13:27:49.354708578 -0500
25999 +/******************************************************************************
26000 +*******************************************************************************
26002 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26003 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26005 +** This copyrighted material is made available to anyone wishing to use,
26006 +** modify, copy, or redistribute it subject to the terms and conditions
26007 +** of the GNU General Public License v.2.
26009 +*******************************************************************************
26010 +******************************************************************************/
26012 +#ifndef __OPS_INODE_DOT_H__
26013 +#define __OPS_INODE_DOT_H__
26015 +extern struct inode_operations gfs_file_iops;
26016 +extern struct inode_operations gfs_dir_iops;
26017 +extern struct inode_operations gfs_symlink_iops;
26018 +extern struct inode_operations gfs_dev_iops;
26020 +#endif /* __OPS_INODE_DOT_H__ */
26021 diff -urN linux-orig/fs/gfs/ops_super.c linux-patched/fs/gfs/ops_super.c
26022 --- linux-orig/fs/gfs/ops_super.c 1969-12-31 18:00:00.000000000 -0600
26023 +++ linux-patched/fs/gfs/ops_super.c 2004-06-30 13:27:49.354708578 -0500
26025 +/******************************************************************************
26026 +*******************************************************************************
26028 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26029 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26031 +** This copyrighted material is made available to anyone wishing to use,
26032 +** modify, copy, or redistribute it subject to the terms and conditions
26033 +** of the GNU General Public License v.2.
26035 +*******************************************************************************
26036 +******************************************************************************/
26038 +#include <linux/sched.h>
26039 +#include <linux/slab.h>
26040 +#include <linux/smp_lock.h>
26041 +#include <linux/spinlock.h>
26042 +#include <asm/semaphore.h>
26043 +#include <linux/completion.h>
26044 +#include <linux/buffer_head.h>
26045 +#include <linux/vmalloc.h>
26046 +#include <linux/statfs.h>
26047 +#include <linux/seq_file.h>
26048 +#include <linux/mount.h>
26052 +#include "glock.h"
26053 +#include "inode.h"
26054 +#include "locking.h"
26056 +#include "ops_super.h"
26058 +#include "quota.h"
26059 +#include "recovery.h"
26061 +#include "super.h"
26064 + * gfs_write_inode - Make sure the inode is stable on the disk
26065 + * @inode: The inode
26066 + * @sync: synchronous write flag
26071 +gfs_write_inode(struct inode *inode, int sync)
26073 + struct gfs_inode *ip = vn2ip(inode);
26075 + atomic_inc(&ip->i_sbd->sd_ops_super);
26077 + if (ip && sync && !gfs_in_panic)
26078 + gfs_log_flush_glock(ip->i_gl);
26082 + * gfs_put_inode - put an inode
26083 + * @inode: The inode
26085 + * If i_nlink is zero, any dirty data for the inode is thrown away.
26086 + * If a process on another machine has the file open, it may need that
26087 + * data. So, sync it out.
26091 +gfs_put_inode(struct inode *inode)
26093 + struct gfs_sbd *sdp = vfs2sdp(inode->i_sb);
26094 + struct gfs_inode *ip = vn2ip(inode);
26096 + atomic_inc(&sdp->sd_ops_super);
26099 + !inode->i_nlink &&
26100 + S_ISREG(inode->i_mode) &&
26101 + !sdp->sd_args.ar_localcaching)
26102 + gfs_sync_page_i(inode, DIO_START | DIO_WAIT);
26106 + * gfs_put_super - Unmount the filesystem
26107 + * @sb: The VFS superblock
26112 +gfs_put_super(struct super_block *sb)
26114 + struct gfs_sbd *sdp = vfs2sdp(sb);
26117 + atomic_inc(&sdp->sd_ops_super);
26119 + /* Unfreeze the filesystem, if we need to */
26121 + down(&sdp->sd_freeze_lock);
26122 + if (sdp->sd_freeze_count)
26123 + gfs_glock_dq_uninit(&sdp->sd_freeze_gh);
26124 + up(&sdp->sd_freeze_lock);
26126 + /* Kill off the inode thread */
26127 + down(&sdp->sd_thread_lock);
26128 + clear_bit(SDF_INODED_RUN, &sdp->sd_flags);
26129 + wake_up_process(sdp->sd_inoded_process);
26130 + up(&sdp->sd_thread_lock);
26131 + wait_for_completion(&sdp->sd_thread_completion);
26133 + /* Kill off the quota thread */
26134 + down(&sdp->sd_thread_lock);
26135 + clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
26136 + wake_up_process(sdp->sd_quotad_process);
26137 + up(&sdp->sd_thread_lock);
26138 + wait_for_completion(&sdp->sd_thread_completion);
26140 + /* Kill off the log thread */
26141 + down(&sdp->sd_thread_lock);
26142 + clear_bit(SDF_LOGD_RUN, &sdp->sd_flags);
26143 + wake_up_process(sdp->sd_logd_process);
26144 + up(&sdp->sd_thread_lock);
26145 + wait_for_completion(&sdp->sd_thread_completion);
26147 + /* Kill off the recoverd thread */
26148 + down(&sdp->sd_thread_lock);
26149 + clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
26150 + wake_up_process(sdp->sd_recoverd_process);
26151 + up(&sdp->sd_thread_lock);
26152 + wait_for_completion(&sdp->sd_thread_completion);
26154 + /* Kill off the glockd threads */
26155 + clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
26156 + wake_up(&sdp->sd_reclaim_wchan);
26157 + while (sdp->sd_glockd_num--)
26158 + wait_for_completion(&sdp->sd_thread_completion);
26160 + /* Kill off the scand thread */
26161 + down(&sdp->sd_thread_lock);
26162 + clear_bit(SDF_SCAND_RUN, &sdp->sd_flags);
26163 + wake_up_process(sdp->sd_scand_process);
26164 + up(&sdp->sd_thread_lock);
26165 + wait_for_completion(&sdp->sd_thread_completion);
26167 + if (!test_bit(SDF_ROFS, &sdp->sd_flags)) {
26168 + gfs_log_flush(sdp);
26169 + gfs_quota_sync(sdp);
26170 + gfs_quota_sync(sdp);
26172 + error = gfs_make_fs_ro(sdp);
26174 + gfs_io_error(sdp);
26177 + /* At this point, we're through modifying the disk */
26179 + /* Release stuff */
26181 + gfs_inode_put(sdp->sd_riinode);
26182 + gfs_inode_put(sdp->sd_jiinode);
26183 + gfs_inode_put(sdp->sd_rooti);
26184 + gfs_inode_put(sdp->sd_qinode);
26185 + gfs_inode_put(sdp->sd_linode);
26187 + gfs_glock_put(sdp->sd_trans_gl);
26188 + gfs_glock_put(sdp->sd_rename_gl);
26190 + gfs_glock_dq_uninit(&sdp->sd_journal_gh);
26192 + gfs_glock_dq_uninit(&sdp->sd_live_gh);
26194 + /* Get rid of rgrp bitmap structures */
26195 + gfs_clear_rgrpd(sdp);
26196 + gfs_clear_journals(sdp);
26198 + /* Take apart glock structures and buffer lists */
26199 + gfs_gl_hash_clear(sdp, TRUE);
26201 + /* Unmount the locking protocol */
26202 + gfs_unmount_lockproto(sdp);
26204 + /* At this point, we're through participating in the lockspace */
26206 + gfs_clear_dirty_j(sdp);
26208 + /* Get rid of any extra inodes */
26209 + while (invalidate_inodes(sb))
26214 + vfs2sdp(sb) = NULL;
26218 + * gfs_write_super - disk commit all incore transactions
26219 + * @sb: the filesystem
26221 + * This function is called every time sync(2) is called.
26222 + * After this exits, all dirty buffers and synced.
26226 +gfs_write_super(struct super_block *sb)
26228 + struct gfs_sbd *sdp = vfs2sdp(sb);
26230 + atomic_inc(&sdp->sd_ops_super);
26232 + if (!gfs_in_panic)
26233 + gfs_log_flush(sdp);
26237 + * gfs_write_super_lockfs - prevent further writes to the filesystem
26238 + * @sb: the VFS structure for the filesystem
26243 +gfs_write_super_lockfs(struct super_block *sb)
26245 + struct gfs_sbd *sdp = vfs2sdp(sb);
26248 + atomic_inc(&sdp->sd_ops_super);
26251 + error = gfs_freeze_fs(sdp);
26257 + printk("GFS: fsid=%s: waiting for recovery before freeze\n",
26262 + printk("GFS: fsid=%s: error freezing FS: %d\n",
26263 + sdp->sd_fsname, error);
26267 + printk("GFS: fsid=%s: retrying...\n", sdp->sd_fsname);
26269 + current->state = TASK_UNINTERRUPTIBLE;
26270 + schedule_timeout(HZ);
26275 + * gfs_unlockfs - reallow writes to the filesystem
26276 + * @sb: the VFS structure for the filesystem
26281 +gfs_unlockfs(struct super_block *sb)
26283 + struct gfs_sbd *sdp = vfs2sdp(sb);
26285 + atomic_inc(&sdp->sd_ops_super);
26287 + gfs_unfreeze_fs(sdp);
26291 + * gfs_statfs - Gather and return stats about the filesystem
26292 + * @sb: The superblock
26293 + * @statfsbuf: The buffer
26295 + * Returns: 0 on success or error code
26299 +gfs_statfs(struct super_block *sb, struct kstatfs *buf)
26301 + struct gfs_sbd *sdp = vfs2sdp(sb);
26302 + struct gfs_usage usage;
26305 + atomic_inc(&sdp->sd_ops_super);
26307 + error = gfs_stat_gfs(sdp, &usage, TRUE);
26311 + memset(buf, 0, sizeof(struct kstatfs));
26313 + buf->f_type = GFS_MAGIC;
26314 + buf->f_bsize = usage.gu_block_size;
26315 + buf->f_blocks = usage.gu_total_blocks;
26316 + buf->f_bfree = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta;
26317 + buf->f_bavail = usage.gu_free + usage.gu_free_dinode + usage.gu_free_meta;
26318 + buf->f_files = usage.gu_used_dinode + usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free;
26319 + buf->f_ffree = usage.gu_free_dinode + usage.gu_free_meta + usage.gu_free;
26320 + buf->f_namelen = GFS_FNAMESIZE;
26326 + * gfs_remount_fs - called when the FS is remounted
26327 + * @sb: the filesystem
26328 + * @flags: the remount flags
26329 + * @data: extra data passed in (not used right now)
26331 + * Returns: 0 on success, -EXXX on failure
26335 +gfs_remount_fs(struct super_block *sb, int *flags, char *data)
26337 + struct gfs_sbd *sdp = vfs2sdp(sb);
26340 + atomic_inc(&sdp->sd_ops_super);
26342 + if (*flags & (MS_NOATIME | MS_NODIRATIME))
26343 + set_bit(SDF_NOATIME, &sdp->sd_flags);
26345 + clear_bit(SDF_NOATIME, &sdp->sd_flags);
26347 + if (*flags & MS_RDONLY) {
26348 + if (!test_bit(SDF_ROFS, &sdp->sd_flags))
26349 + error = gfs_make_fs_ro(sdp);
26350 + } else if (!(*flags & MS_RDONLY) &&
26351 + test_bit(SDF_ROFS, &sdp->sd_flags)) {
26352 + error = gfs_make_fs_rw(sdp);
26355 + /* Don't let the VFS update atimes. */
26356 + *flags |= MS_NOATIME | MS_NODIRATIME;
26362 + * gfs_clear_inode - Deallocate an inode when VFS is done with it
26363 + * @inode: The VFS inode
26368 +gfs_clear_inode(struct inode *inode)
26370 + struct gfs_inode *ip = vn2ip(inode);
26372 + atomic_inc(&vfs2sdp(inode->i_sb)->sd_ops_super);
26375 + spin_lock(&ip->i_lock);
26376 + ip->i_vnode = NULL;
26377 + vn2ip(inode) = NULL;
26378 + spin_unlock(&ip->i_lock);
26380 + gfs_glock_schedule_for_reclaim(ip->i_gl);
26381 + gfs_inode_put(ip);
26386 + * gfs_show_options - Show mount options for /proc/mounts
26387 + * @s: seq_file structure
26390 + * Returns: 0 on success or error code
26394 +gfs_show_options(struct seq_file *s, struct vfsmount *mnt)
26396 + struct gfs_sbd *sdp = vfs2sdp(mnt->mnt_sb);
26397 + struct gfs_args *args = &sdp->sd_args;
26399 + atomic_inc(&sdp->sd_ops_super);
26401 + if (args->ar_lockproto[0]) {
26402 + seq_printf(s, ",lockproto=");
26403 + seq_puts(s, args->ar_lockproto);
26405 + if (args->ar_locktable[0]) {
26406 + seq_printf(s, ",locktable=");
26407 + seq_puts(s, args->ar_locktable);
26409 + if (args->ar_hostdata[0]) {
26410 + seq_printf(s, ",hostdata=");
26411 + seq_puts(s, args->ar_hostdata);
26413 + if (args->ar_ignore_local_fs)
26414 + seq_printf(s, ",ignore_local_fs");
26415 + if (args->ar_localflocks)
26416 + seq_printf(s, ",localflocks");
26417 + if (args->ar_localcaching)
26418 + seq_printf(s, ",localcaching");
26419 + if (args->ar_upgrade)
26420 + seq_printf(s, ",upgrade");
26421 + if (args->ar_num_glockd != GFS_GLOCKD_DEFAULT)
26422 + seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
26423 + if (args->ar_posixacls)
26424 + seq_printf(s, ",acl");
26429 +struct super_operations gfs_super_ops = {
26430 + .write_inode = gfs_write_inode,
26431 + .put_inode = gfs_put_inode,
26432 + .put_super = gfs_put_super,
26433 + .write_super = gfs_write_super,
26434 + .write_super_lockfs = gfs_write_super_lockfs,
26435 + .unlockfs = gfs_unlockfs,
26436 + .statfs = gfs_statfs,
26437 + .remount_fs = gfs_remount_fs,
26438 + .clear_inode = gfs_clear_inode,
26439 + .show_options = gfs_show_options,
26441 diff -urN linux-orig/fs/gfs/ops_super.h linux-patched/fs/gfs/ops_super.h
26442 --- linux-orig/fs/gfs/ops_super.h 1969-12-31 18:00:00.000000000 -0600
26443 +++ linux-patched/fs/gfs/ops_super.h 2004-06-30 13:27:49.354708578 -0500
26445 +/******************************************************************************
26446 +*******************************************************************************
26448 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26449 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26451 +** This copyrighted material is made available to anyone wishing to use,
26452 +** modify, copy, or redistribute it subject to the terms and conditions
26453 +** of the GNU General Public License v.2.
26455 +*******************************************************************************
26456 +******************************************************************************/
26458 +#ifndef __OPS_SUPER_DOT_H__
26459 +#define __OPS_SUPER_DOT_H__
26461 +extern struct super_operations gfs_super_ops;
26463 +#endif /* __OPS_SUPER_DOT_H__ */
26464 diff -urN linux-orig/fs/gfs/ops_vm.c linux-patched/fs/gfs/ops_vm.c
26465 --- linux-orig/fs/gfs/ops_vm.c 1969-12-31 18:00:00.000000000 -0600
26466 +++ linux-patched/fs/gfs/ops_vm.c 2004-06-30 13:27:49.355708346 -0500
26468 +/******************************************************************************
26469 +*******************************************************************************
26471 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26472 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26474 +** This copyrighted material is made available to anyone wishing to use,
26475 +** modify, copy, or redistribute it subject to the terms and conditions
26476 +** of the GNU General Public License v.2.
26478 +*******************************************************************************
26479 +******************************************************************************/
26481 +#include <linux/sched.h>
26482 +#include <linux/slab.h>
26483 +#include <linux/smp_lock.h>
26484 +#include <linux/spinlock.h>
26485 +#include <asm/semaphore.h>
26486 +#include <linux/completion.h>
26487 +#include <linux/buffer_head.h>
26488 +#include <linux/mm.h>
26489 +#include <linux/pagemap.h>
26493 +#include "glock.h"
26494 +#include "inode.h"
26495 +#include "ops_vm.h"
26497 +#include "quota.h"
26499 +#include "trans.h"
26502 + * gfs_private_nopage -
26507 + * Returns: the page
26510 +static struct page *
26511 +gfs_private_nopage(struct vm_area_struct *area,
26512 + unsigned long address, int *type)
26514 + struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host);
26515 + struct gfs_holder i_gh;
26516 + struct page *result;
26519 + atomic_inc(&ip->i_sbd->sd_ops_vm);
26521 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh);
26525 + set_bit(GIF_PAGED, &ip->i_flags);
26527 + result = filemap_nopage(area, address, type);
26529 + gfs_glock_dq_uninit(&i_gh);
26535 + * alloc_page_backing -
26543 +alloc_page_backing(struct gfs_inode *ip, unsigned long index)
26545 + struct gfs_sbd *sdp = ip->i_sbd;
26546 + uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
26547 + unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
26548 + struct gfs_alloc *al;
26552 + al = gfs_alloc_get(ip);
26554 + error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
26558 + error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
26560 + goto out_gunlock_q;
26562 + gfs_write_calc_reserv(ip, PAGE_CACHE_SIZE,
26563 + &al->al_requested_data, &al->al_requested_meta);
26565 + error = gfs_inplace_reserve(ip);
26567 + goto out_gunlock_q;
26569 + /* Trans may require:
26570 + a dinode block, RG bitmaps to allocate from,
26571 + indirect blocks, and a quota block */
26573 + error = gfs_trans_begin(sdp,
26574 + 1 + al->al_rgd->rd_ri.ri_length +
26575 + al->al_requested_meta, 1);
26579 + if (gfs_is_stuffed(ip)) {
26580 + error = gfs_unstuff_dinode(ip, gfs_unstuffer_page, NULL);
26585 + for (x = 0; x < blocks; ) {
26587 + unsigned int extlen;
26590 + error = gfs_block_map(ip, lblock, &new, &dblock, &extlen);
26593 + GFS_ASSERT_INODE(dblock, ip,);
26595 + lblock += extlen;
26599 + GFS_ASSERT_INODE(al->al_alloced_meta || al->al_alloced_data, ip,);
26602 + gfs_trans_end(sdp);
26605 + gfs_inplace_release(ip);
26608 + gfs_quota_unlock_m(ip);
26611 + gfs_alloc_put(ip);
26617 + * gfs_sharewrite_nopage -
26622 + * Returns: the page
26625 +static struct page *
26626 +gfs_sharewrite_nopage(struct vm_area_struct *area,
26627 + unsigned long address, int *type)
26629 + struct gfs_inode *ip = vn2ip(area->vm_file->f_mapping->host);
26630 + struct gfs_holder i_gh;
26631 + struct page *result = NULL;
26632 + unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
26633 + int alloc_required;
26636 + atomic_inc(&ip->i_sbd->sd_ops_vm);
26638 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
26642 + if (gfs_is_jdata(ip))
26645 + set_bit(GIF_PAGED, &ip->i_flags);
26646 + set_bit(GIF_SW_PAGED, &ip->i_flags);
26648 + error = gfs_write_alloc_required(ip, (uint64_t)index << PAGE_CACHE_SHIFT,
26649 + PAGE_CACHE_SIZE, &alloc_required);
26653 + result = filemap_nopage(area, address, type);
26654 + if (!result || result == NOPAGE_OOM)
26657 + if (alloc_required) {
26658 + error = alloc_page_backing(ip, index);
26660 + page_cache_release(result);
26663 + set_page_dirty(result);
26667 + gfs_glock_dq_uninit(&i_gh);
26672 +struct vm_operations_struct gfs_vm_ops_private = {
26673 + .nopage = gfs_private_nopage,
26676 +struct vm_operations_struct gfs_vm_ops_sharewrite = {
26677 + .nopage = gfs_sharewrite_nopage,
26680 diff -urN linux-orig/fs/gfs/ops_vm.h linux-patched/fs/gfs/ops_vm.h
26681 --- linux-orig/fs/gfs/ops_vm.h 1969-12-31 18:00:00.000000000 -0600
26682 +++ linux-patched/fs/gfs/ops_vm.h 2004-06-30 13:27:49.355708346 -0500
26684 +/******************************************************************************
26685 +*******************************************************************************
26687 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26688 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26690 +** This copyrighted material is made available to anyone wishing to use,
26691 +** modify, copy, or redistribute it subject to the terms and conditions
26692 +** of the GNU General Public License v.2.
26694 +*******************************************************************************
26695 +******************************************************************************/
26697 +#ifndef __OPS_VM_DOT_H__
26698 +#define __OPS_VM_DOT_H__
26700 +extern struct vm_operations_struct gfs_vm_ops_private;
26701 +extern struct vm_operations_struct gfs_vm_ops_sharewrite;
26703 +#endif /* __OPS_VM_DOT_H__ */
26704 diff -urN linux-orig/fs/gfs/page.c linux-patched/fs/gfs/page.c
26705 --- linux-orig/fs/gfs/page.c 1969-12-31 18:00:00.000000000 -0600
26706 +++ linux-patched/fs/gfs/page.c 2004-06-30 13:27:49.355708346 -0500
26708 +/******************************************************************************
26709 +*******************************************************************************
26711 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26712 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26714 +** This copyrighted material is made available to anyone wishing to use,
26715 +** modify, copy, or redistribute it subject to the terms and conditions
26716 +** of the GNU General Public License v.2.
26718 +*******************************************************************************
26719 +******************************************************************************/
26721 +#include <linux/sched.h>
26722 +#include <linux/slab.h>
26723 +#include <linux/smp_lock.h>
26724 +#include <linux/spinlock.h>
26725 +#include <asm/semaphore.h>
26726 +#include <linux/completion.h>
26727 +#include <linux/buffer_head.h>
26728 +#include <linux/pagemap.h>
26729 +#include <linux/mm.h>
26733 +#include "inode.h"
26737 + * gfs_inval_pte - Sync and invalidate all PTEs associated with a glock
26743 +gfs_inval_pte(struct gfs_glock *gl)
26745 + struct gfs_inode *ip;
26746 + struct inode *inode;
26750 + ip->i_di.di_type != GFS_FILE_REG)
26753 + if (!test_bit(GIF_PAGED, &ip->i_flags))
26756 + inode = gfs_iget(ip, NO_CREATE);
26758 + unmap_shared_mapping_range(inode->i_mapping, 0, 0);
26761 + if (test_bit(GIF_SW_PAGED, &ip->i_flags))
26762 + set_bit(GLF_DIRTY, &gl->gl_flags);
26765 + clear_bit(GIF_SW_PAGED, &ip->i_flags);
26769 + * gfs_inval_page - Invalidate all pages associated with a glock
26775 +gfs_inval_page(struct gfs_glock *gl)
26777 + struct gfs_inode *ip;
26778 + struct inode *inode;
26782 + ip->i_di.di_type != GFS_FILE_REG)
26785 + inode = gfs_iget(ip, NO_CREATE);
26787 + struct address_space *mapping = inode->i_mapping;
26789 + truncate_inode_pages(mapping, 0);
26790 + GFS_ASSERT_INODE(!mapping->nrpages, ip,);
26795 + clear_bit(GIF_PAGED, &ip->i_flags);
26799 + * gfs_sync_page_i - Sync the pages for a struct inode
26800 + * @inode: the inode
26801 + * @flags: DIO_START | DIO_WAIT
26806 +gfs_sync_page_i(struct inode *inode, int flags)
26808 + struct address_space *mapping = inode->i_mapping;
26811 + if (flags & DIO_START)
26812 + error = filemap_fdatawrite(mapping);
26813 + if (!error && (flags & DIO_WAIT))
26814 + filemap_fdatawait(mapping);
26817 + gfs_io_error_inode(vn2ip(inode));
26821 + * gfs_sync_page - sync the pages associated with a glock
26823 + * @flags: DIO_START | DIO_WAIT
26828 +gfs_sync_page(struct gfs_glock *gl, int flags)
26830 + struct gfs_inode *ip;
26831 + struct inode *inode;
26835 + ip->i_di.di_type != GFS_FILE_REG)
26838 + inode = gfs_iget(ip, NO_CREATE);
26840 + gfs_sync_page_i(inode, flags);
26846 + * gfs_unstuffer_page - unstuff a stuffed inode into a block cached by a page
26848 + * @dibh: the dinode buffer
26849 + * @block: the block number that was allocated
26850 + * @private: any locked page held by the caller process
26852 + * Returns: 0 on success, -EXXX on failure
26856 +gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh,
26857 + uint64_t block, void *private)
26859 + struct inode *inode = ip->i_vnode;
26860 + struct page *page = (struct page *)private;
26861 + struct buffer_head *bh;
26862 + int release = FALSE;
26864 + if (!page || page->index) {
26865 + RETRY_MALLOC(page = grab_cache_page(inode->i_mapping, 0), page);
26869 + GFS_ASSERT_INODE(PageLocked(page), ip,);
26871 + if (!PageUptodate(page)) {
26872 + void *kaddr = kmap(page);
26875 + dibh->b_data + sizeof(struct gfs_dinode),
26876 + ip->i_di.di_size);
26877 + memset(kaddr + ip->i_di.di_size,
26879 + PAGE_CACHE_SIZE - ip->i_di.di_size);
26882 + SetPageUptodate(page);
26885 + if (!page_has_buffers(page))
26886 + create_empty_buffers(page, 1 << inode->i_blkbits,
26887 + (1 << BH_Uptodate));
26889 + bh = page_buffers(page);
26891 + if (!buffer_mapped(bh))
26892 + map_bh(bh, inode->i_sb, block);
26894 + GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev &&
26895 + bh->b_blocknr == block,
26898 + set_buffer_uptodate(bh);
26899 + mark_buffer_dirty(bh);
26902 + unlock_page(page);
26903 + page_cache_release(page);
26910 + * gfs_truncator_page - truncate a partial data block in the page cache
26912 + * @size: the size the file should be
26914 + * Returns: 0 on success, -EXXX on failure
26918 +gfs_truncator_page(struct gfs_inode *ip, uint64_t size)
26920 + struct inode *inode = ip->i_vnode;
26921 + struct page *page;
26922 + struct buffer_head *bh;
26924 + uint64_t lbn, dbn;
26925 + unsigned long index;
26926 + unsigned int offset;
26927 + unsigned int bufnum;
26931 + lbn = size >> inode->i_blkbits;
26932 + error = gfs_block_map(ip,
26935 + if (error || !dbn)
26938 + index = size >> PAGE_CACHE_SHIFT;
26939 + offset = size & (PAGE_CACHE_SIZE - 1);
26940 + bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits));
26942 + /* Not in a transaction here -- a non-disk-I/O error is ok. */
26944 + page = read_cache_page(inode->i_mapping, index,
26945 + (filler_t *)inode->i_mapping->a_ops->readpage,
26947 + if (IS_ERR(page))
26948 + return PTR_ERR(page);
26952 + if (!PageUptodate(page) || PageError(page)) {
26957 + kaddr = kmap(page);
26958 + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
26961 + if (!page_has_buffers(page))
26962 + create_empty_buffers(page, 1 << inode->i_blkbits,
26963 + (1 << BH_Uptodate));
26965 + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
26966 + /* Do nothing */;
26968 + if (!buffer_mapped(bh))
26969 + map_bh(bh, inode->i_sb, dbn);
26971 + GFS_ASSERT_INODE(bh->b_bdev == inode->i_sb->s_bdev &&
26972 + bh->b_blocknr == dbn,
26975 + set_buffer_uptodate(bh);
26976 + mark_buffer_dirty(bh);
26979 + unlock_page(page);
26980 + page_cache_release(page);
26984 diff -urN linux-orig/fs/gfs/page.h linux-patched/fs/gfs/page.h
26985 --- linux-orig/fs/gfs/page.h 1969-12-31 18:00:00.000000000 -0600
26986 +++ linux-patched/fs/gfs/page.h 2004-06-30 13:27:49.355708346 -0500
26988 +/******************************************************************************
26989 +*******************************************************************************
26991 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
26992 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
26994 +** This copyrighted material is made available to anyone wishing to use,
26995 +** modify, copy, or redistribute it subject to the terms and conditions
26996 +** of the GNU General Public License v.2.
26998 +*******************************************************************************
26999 +******************************************************************************/
27001 +#ifndef __PAGE_DOT_H__
27002 +#define __PAGE_DOT_H__
27004 +void gfs_inval_pte(struct gfs_glock *gl);
27005 +void gfs_inval_page(struct gfs_glock *gl);
27006 +void gfs_sync_page_i(struct inode *inode, int flags);
27007 +void gfs_sync_page(struct gfs_glock *gl, int flags);
27009 +int gfs_unstuffer_page(struct gfs_inode *ip, struct buffer_head *dibh,
27010 + uint64_t block, void *private);
27011 +int gfs_truncator_page(struct gfs_inode *ip, uint64_t size);
27013 +#endif /* __PAGE_DOT_H__ */
27014 diff -urN linux-orig/fs/gfs/quota.c linux-patched/fs/gfs/quota.c
27015 --- linux-orig/fs/gfs/quota.c 1969-12-31 18:00:00.000000000 -0600
27016 +++ linux-patched/fs/gfs/quota.c 2004-06-30 13:27:49.356708115 -0500
27018 +/******************************************************************************
27019 +*******************************************************************************
27021 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
27022 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
27024 +** This copyrighted material is made available to anyone wishing to use,
27025 +** modify, copy, or redistribute it subject to the terms and conditions
27026 +** of the GNU General Public License v.2.
27028 +*******************************************************************************
27029 +******************************************************************************/
27031 +#include <linux/sched.h>
27032 +#include <linux/slab.h>
27033 +#include <linux/smp_lock.h>
27034 +#include <linux/spinlock.h>
27035 +#include <asm/semaphore.h>
27036 +#include <linux/completion.h>
27037 +#include <linux/buffer_head.h>
27038 +#include <linux/tty.h>
27039 +#include <asm/uaccess.h>
27044 +#include "glock.h"
27045 +#include "glops.h"
27047 +#include "quota.h"
27049 +#include "super.h"
27050 +#include "trans.h"
27053 + * gfs_quota_get - Get a structure to represent a quota change
27054 + * @sdp: the filesystem
27055 + * @user: TRUE if this is a user quota
27056 + * @id: the uid or gid
27057 + * @create: if TRUE, create the structure, otherwise return NULL
27058 + * @qdp: the returned quota structure
27060 + * Returns: 0 on success, -EXXX on failure
27064 +gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create,
27065 + struct gfs_quota_data **qdp)
27067 + struct gfs_quota_data *qd = NULL, *new_qd = NULL;
27068 + struct list_head *tmp, *head;
27072 + spin_lock(&sdp->sd_quota_lock);
27074 + for (head = &sdp->sd_quota_list, tmp = head->next;
27076 + tmp = tmp->next) {
27077 + qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27078 + if (qd->qd_id == id &&
27079 + !test_bit(QDF_USER, &qd->qd_flags) == !user) {
27088 + if (!qd && new_qd) {
27090 + list_add(&qd->qd_list, &sdp->sd_quota_list);
27094 + spin_unlock(&sdp->sd_quota_lock);
27096 + if (qd || !create) {
27098 + gfs_lvb_unhold(new_qd->qd_gl);
27100 + atomic_dec(&sdp->sd_quota_count);
27105 + new_qd = gmalloc(sizeof(struct gfs_quota_data));
27106 + memset(new_qd, 0, sizeof(struct gfs_quota_data));
27108 + new_qd->qd_count = 1;
27110 + new_qd->qd_id = id;
27112 + set_bit(QDF_USER, &new_qd->qd_flags);
27114 + INIT_LIST_HEAD(&new_qd->qd_le_list);
27116 + error = gfs_glock_get(sdp, 2 * (uint64_t)id + ((user) ? 0 : 1),
27117 + &gfs_quota_glops, CREATE,
27124 + error = gfs_lvb_hold(new_qd->qd_gl);
27126 + gfs_glock_put(new_qd->qd_gl);
27133 + atomic_inc(&sdp->sd_quota_count);
27143 + * gfs_quota_hold - increment the usage count on a struct gfs_quota_data
27144 + * @sdp: the filesystem
27145 + * @qd: the structure
27150 +gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27152 + spin_lock(&sdp->sd_quota_lock);
27154 + spin_unlock(&sdp->sd_quota_lock);
27158 + * gfs_quota_put - decrement the usage count on a struct gfs_quota_data
27159 + * @sdp: the filesystem
27160 + * @qd: the structure
27162 + * Free the structure if its reference count hits zero.
27167 +gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27169 + spin_lock(&sdp->sd_quota_lock);
27170 + GFS_ASSERT_SBD(qd->qd_count, sdp,);
27172 + spin_unlock(&sdp->sd_quota_lock);
27176 + * quota_find - Find a quota change to sync to the quota file
27177 + * @sdp: the filesystem
27179 + * The returned structure is locked and needs to be unlocked
27180 + * with quota_unlock().
27182 + * Returns: A quota structure, or NULL
27185 +static struct gfs_quota_data *
27186 +quota_find(struct gfs_sbd *sdp)
27188 + struct list_head *tmp, *head;
27189 + struct gfs_quota_data *qd = NULL;
27191 + if (test_bit(SDF_ROFS, &sdp->sd_flags))
27194 + gfs_log_lock(sdp);
27195 + spin_lock(&sdp->sd_quota_lock);
27197 + if (!atomic_read(&sdp->sd_quota_od_count))
27200 + for (head = &sdp->sd_quota_list, tmp = head->next;
27202 + tmp = tmp->next) {
27203 + qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27205 + if (test_bit(QDF_LOCK, &qd->qd_flags))
27207 + if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
27209 + if (qd->qd_sync_gen >= sdp->sd_quota_sync_gen)
27212 + list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
27214 + set_bit(QDF_LOCK, &qd->qd_flags);
27216 + qd->qd_change_sync = qd->qd_change_od;
27224 + spin_unlock(&sdp->sd_quota_lock);
27225 + gfs_log_unlock(sdp);
27231 + * quota_trylock - Try to lock a given quota entry
27232 + * @sdp: the filesystem
27233 + * @qd: the quota data structure
27235 + * Returns: TRUE if the lock was successful, FALSE, otherwise
27239 +quota_trylock(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27243 + if (test_bit(SDF_ROFS, &sdp->sd_flags))
27246 + gfs_log_lock(sdp);
27247 + spin_lock(&sdp->sd_quota_lock);
27249 + if (test_bit(QDF_LOCK, &qd->qd_flags))
27251 + if (!test_bit(QDF_OD_LIST, &qd->qd_flags))
27254 + list_move_tail(&qd->qd_list, &sdp->sd_quota_list);
27256 + set_bit(QDF_LOCK, &qd->qd_flags);
27258 + qd->qd_change_sync = qd->qd_change_od;
27263 + spin_unlock(&sdp->sd_quota_lock);
27264 + gfs_log_unlock(sdp);
27270 + * quota_unlock - drop and a reference on a quota structure
27271 + * @sdp: the filesystem
27272 + * @qd: the quota inode structure
27277 +quota_unlock(struct gfs_sbd *sdp, struct gfs_quota_data *qd)
27279 + spin_lock(&sdp->sd_quota_lock);
27281 + GFS_ASSERT_SBD(test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27282 + clear_bit(QDF_LOCK, &qd->qd_flags);
27284 + GFS_ASSERT_SBD(qd->qd_count, sdp,);
27287 + spin_unlock(&sdp->sd_quota_lock);
27291 + * gfs_quota_merge - add/remove a quota change from the in-memory list
27292 + * @sdp: the filesystem
27293 + * @tag: the quota change tag
27295 + * Returns: 0 on success, -EXXX on failure
27299 +gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag)
27301 + struct gfs_quota_data *qd;
27304 + error = gfs_quota_get(sdp,
27305 + tag->qt_flags & GFS_QTF_USER, tag->qt_id,
27310 + GFS_ASSERT_SBD(qd->qd_change_ic == qd->qd_change_od, sdp,);
27312 + gfs_log_lock(sdp);
27314 + qd->qd_change_ic += tag->qt_change;
27315 + qd->qd_change_od += tag->qt_change;
27317 + if (qd->qd_change_od) {
27318 + if (!test_bit(QDF_OD_LIST, &qd->qd_flags)) {
27319 + gfs_quota_hold(sdp, qd);
27320 + set_bit(QDF_OD_LIST, &qd->qd_flags);
27321 + atomic_inc(&sdp->sd_quota_od_count);
27324 + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags), sdp,);
27325 + clear_bit(QDF_OD_LIST, &qd->qd_flags);
27326 + gfs_quota_put(sdp, qd);
27327 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_quota_od_count), sdp,);
27328 + atomic_dec(&sdp->sd_quota_od_count);
27331 + gfs_log_unlock(sdp);
27333 + gfs_quota_put(sdp, qd);
27339 + * gfs_quota_scan - Look for unused struct gfs_quota_data structures to throw away
27340 + * @sdp: the filesystem
27345 +gfs_quota_scan(struct gfs_sbd *sdp)
27347 + struct list_head *head, *tmp, *next;
27348 + struct gfs_quota_data *qd;
27351 + spin_lock(&sdp->sd_quota_lock);
27353 + for (head = &sdp->sd_quota_list, tmp = head->next, next = tmp->next;
27355 + tmp = next, next = next->next) {
27356 + qd = list_entry(tmp, struct gfs_quota_data, qd_list);
27357 + if (!qd->qd_count)
27358 + list_move(&qd->qd_list, &dead);
27361 + spin_unlock(&sdp->sd_quota_lock);
27363 + while (!list_empty(&dead)) {
27364 + qd = list_entry(dead.next, struct gfs_quota_data, qd_list);
27366 + GFS_ASSERT_SBD(!qd->qd_count, sdp,);
27367 + GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27368 + !test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27369 + GFS_ASSERT_SBD(!qd->qd_change_new && !qd->qd_change_ic &&
27370 + !qd->qd_change_od, sdp,);
27372 + list_del(&qd->qd_list);
27373 + gfs_lvb_unhold(qd->qd_gl);
27375 + atomic_dec(&sdp->sd_quota_count);
27380 + * gfs_quota_cleanup - get rid of any extra struct gfs_quota_data structures
27381 + * @sdp: the filesystem
27386 +gfs_quota_cleanup(struct gfs_sbd *sdp)
27388 + struct gfs_quota_data *qd;
27391 + gfs_log_lock(sdp);
27393 + spin_lock(&sdp->sd_quota_lock);
27395 + while (!list_empty(&sdp->sd_quota_list)) {
27396 + qd = list_entry(sdp->sd_quota_list.next,
27397 + struct gfs_quota_data,
27400 + if (qd->qd_count > 1) {
27401 + spin_unlock(&sdp->sd_quota_lock);
27402 + gfs_log_unlock(sdp);
27403 + current->state = TASK_UNINTERRUPTIBLE;
27404 + schedule_timeout(HZ);
27407 + } else if (qd->qd_count) {
27408 + GFS_ASSERT_SBD(test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27409 + !test_bit(QDF_LOCK, &qd->qd_flags),
27411 + GFS_ASSERT_SBD(qd->qd_change_od &&
27412 + qd->qd_change_od == qd->qd_change_ic,
27414 + GFS_ASSERT_SBD(!qd->qd_change_new, sdp,);
27416 + list_del(&qd->qd_list);
27417 + atomic_dec(&sdp->sd_quota_od_count);
27419 + spin_unlock(&sdp->sd_quota_lock);
27420 + gfs_lvb_unhold(qd->qd_gl);
27422 + atomic_dec(&sdp->sd_quota_count);
27423 + spin_lock(&sdp->sd_quota_lock);
27426 + GFS_ASSERT_SBD(!test_bit(QDF_OD_LIST, &qd->qd_flags) &&
27427 + !test_bit(QDF_LOCK, &qd->qd_flags), sdp,);
27428 + GFS_ASSERT_SBD(!qd->qd_change_new &&
27429 + !qd->qd_change_ic &&
27430 + !qd->qd_change_od, sdp,);
27432 + list_del(&qd->qd_list);
27434 + spin_unlock(&sdp->sd_quota_lock);
27435 + gfs_lvb_unhold(qd->qd_gl);
27437 + atomic_dec(&sdp->sd_quota_count);
27438 + spin_lock(&sdp->sd_quota_lock);
27442 + spin_unlock(&sdp->sd_quota_lock);
27444 + GFS_ASSERT_SBD(!atomic_read(&sdp->sd_quota_od_count), sdp,);
27446 + gfs_log_unlock(sdp);
27450 + * sort_qd - figure out the order between two quota data structures
27451 + * @a: first quota data structure
27452 + * @b: second quota data structure
27454 + * Returns: -1 if @a comes before @b, 0 if @a equals @b, 1 if @b comes before @a
27458 +sort_qd(const void *a, const void *b)
27460 + struct gfs_quota_data *qd_a = *(struct gfs_quota_data **)a;
27461 + struct gfs_quota_data *qd_b = *(struct gfs_quota_data **)b;
27464 + if (!test_bit(QDF_USER, &qd_a->qd_flags) !=
27465 + !test_bit(QDF_USER, &qd_b->qd_flags)) {
27466 + if (test_bit(QDF_USER, &qd_a->qd_flags))
27471 + if (qd_a->qd_id < qd_b->qd_id)
27473 + else if (qd_a->qd_id > qd_b->qd_id)
27481 + * do_quota_sync - Sync a bunch quota changes to the quota file
27482 + * @sdp: the filesystem
27483 + * @qda: an array of struct gfs_quota_data structures to be synced
27484 + * @num_qd: the number of elements in @qda
27486 + * Returns: 0 on success, -EXXX on failure
27490 +do_quota_sync(struct gfs_sbd *sdp, struct gfs_quota_data **qda,
27491 + unsigned int num_qd)
27493 + struct gfs_inode *ip = sdp->sd_qinode;
27494 + struct gfs_alloc *al = NULL;
27495 + struct gfs_holder i_gh, *ghs;
27496 + struct gfs_quota q;
27497 + char buf[sizeof(struct gfs_quota)];
27499 + unsigned int qx, x;
27501 + unsigned int nalloc = 0;
27502 + unsigned int data_blocks, ind_blocks;
27505 + gfs_write_calc_reserv(ip, sizeof(struct gfs_quota), &data_blocks,
27508 + ghs = gmalloc(num_qd * sizeof(struct gfs_holder));
27510 + gfs_sort(qda, num_qd, sizeof (struct gfs_quota_data *), sort_qd);
27511 + for (qx = 0; qx < num_qd; qx++) {
27512 + error = gfs_glock_nq_init(qda[qx]->qd_gl,
27514 + GL_NOCACHE, &ghs[qx]);
27519 + error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
27523 + for (x = 0; x < num_qd; x++) {
27524 + offset = (2 * (uint64_t)qda[x]->qd_id +
27525 + ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) *
27526 + sizeof(struct gfs_quota);
27528 + error = gfs_write_alloc_required(ip, offset,
27529 + sizeof(struct gfs_quota),
27532 + goto fail_gunlock;
27539 + al = gfs_alloc_get(ip);
27542 + gfs_quota_hold_m(ip, NO_QUOTA_CHANGE,
27543 + NO_QUOTA_CHANGE);
27547 + al->al_requested_meta = nalloc * (data_blocks + ind_blocks);
27549 + error = gfs_inplace_reserve(ip);
27553 + /* Trans may require:
27554 + two (journaled) data blocks, a dinode block, RG bitmaps to allocate from,
27555 + indirect blocks, and a quota block */
27557 + error = gfs_trans_begin(sdp,
27558 + 1 + al->al_rgd->rd_ri.ri_length +
27559 + num_qd * data_blocks +
27560 + nalloc * ind_blocks,
27561 + gfs_struct2blk(sdp, num_qd + 2,
27562 + sizeof(struct gfs_quota_tag)));
27566 + /* Trans may require:
27567 + Data blocks, a dinode block, and quota blocks */
27569 + error = gfs_trans_begin(sdp,
27570 + 1 + data_blocks * num_qd,
27571 + gfs_struct2blk(sdp, num_qd,
27572 + sizeof(struct gfs_quota_tag)));
27574 + goto fail_gunlock;
27577 + for (x = 0; x < num_qd; x++) {
27578 + offset = (2 * (uint64_t)qda[x]->qd_id +
27579 + ((test_bit(QDF_USER, &qda[x]->qd_flags)) ? 0 : 1)) *
27580 + sizeof(struct gfs_quota);
27582 + /* The quota file may not be a multiple of sizeof(struct gfs_quota) bytes. */
27583 + memset(buf, 0, sizeof(struct gfs_quota));
27585 + error = gfs_internal_read(ip, buf, offset,
27586 + sizeof(struct gfs_quota));
27588 + goto fail_end_trans;
27590 + gfs_quota_in(&q, buf);
27591 + q.qu_value += qda[x]->qd_change_sync;
27592 + gfs_quota_out(&q, buf);
27594 + error = gfs_internal_write(ip, buf, offset,
27595 + sizeof(struct gfs_quota));
27597 + goto fail_end_trans;
27598 + else if (error != sizeof(struct gfs_quota)) {
27600 + goto fail_end_trans;
27603 + if (test_bit(QDF_USER, &qda[x]->qd_flags))
27604 + gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync,
27605 + qda[x]->qd_id, NO_QUOTA_CHANGE);
27607 + gfs_trans_add_quota(sdp, -qda[x]->qd_change_sync,
27608 + NO_QUOTA_CHANGE, qda[x]->qd_id);
27610 + memset(&qda[x]->qd_qb, 0, sizeof(struct gfs_quota_lvb));
27611 + qda[x]->qd_qb.qb_magic = GFS_MAGIC;
27612 + qda[x]->qd_qb.qb_limit = q.qu_limit;
27613 + qda[x]->qd_qb.qb_warn = q.qu_warn;
27614 + qda[x]->qd_qb.qb_value = q.qu_value;
27616 + gfs_quota_lvb_out(&qda[x]->qd_qb, qda[x]->qd_gl->gl_lvb);
27617 + clear_bit(GLF_LVB_INVALID, &qda[x]->qd_gl->gl_flags);
27620 + gfs_trans_end(sdp);
27623 + GFS_ASSERT_SBD(al->al_alloced_meta, sdp,);
27624 + gfs_inplace_release(ip);
27625 + gfs_quota_unhold_m(ip);
27626 + gfs_alloc_put(ip);
27629 + gfs_glock_dq_uninit(&i_gh);
27631 + for (x = 0; x < num_qd; x++)
27632 + gfs_glock_dq_uninit(&ghs[x]);
27636 + gfs_log_flush_glock(ip->i_gl);
27641 + gfs_trans_end(sdp);
27645 + gfs_inplace_release(ip);
27649 + gfs_quota_unhold_m(ip);
27653 + gfs_alloc_put(ip);
27656 + gfs_glock_dq_uninit(&i_gh);
27660 + gfs_glock_dq_uninit(&ghs[qx]);
27668 + * glock_q - Acquire a lock for a quota entry
27669 + * @sdp: the filesystem
27670 + * @qd: the quota data structure to glock
27671 + * @force_refresh: If TRUE, always read from the quota file
27672 + * @q_gh: the glock holder for the quota lock
27674 + * Returns: 0 on success, -EXXX on failure
27678 +glock_q(struct gfs_sbd *sdp, struct gfs_quota_data *qd, int force_refresh,
27679 + struct gfs_holder *q_gh)
27681 + struct gfs_holder i_gh;
27682 + struct gfs_quota q;
27683 + char buf[sizeof(struct gfs_quota)];
27687 + error = gfs_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
27691 + gfs_quota_lvb_in(&qd->qd_qb, qd->qd_gl->gl_lvb);
27693 + if (force_refresh ||
27694 + qd->qd_qb.qb_magic != GFS_MAGIC ||
27695 + test_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags)) {
27696 + gfs_glock_dq_uninit(q_gh);
27697 + error = gfs_glock_nq_init(qd->qd_gl,
27698 + LM_ST_EXCLUSIVE, GL_NOCACHE,
27703 + error = gfs_glock_nq_init(sdp->sd_qinode->i_gl,
27709 + memset(buf, 0, sizeof(struct gfs_quota));
27711 + error = gfs_internal_read(sdp->sd_qinode, buf,
27712 + (2 * (uint64_t)qd->qd_id +
27713 + ((test_bit(QDF_USER, &qd->qd_flags)) ? 0 : 1)) *
27714 + sizeof(struct gfs_quota),
27715 + sizeof(struct gfs_quota));
27717 + goto fail_gunlock;
27719 + gfs_glock_dq_uninit(&i_gh);
27721 + gfs_quota_in(&q, buf);
27723 + memset(&qd->qd_qb, 0, sizeof(struct gfs_quota_lvb));
27724 + qd->qd_qb.qb_magic = GFS_MAGIC;
27725 + qd->qd_qb.qb_limit = q.qu_limit;
27726 + qd->qd_qb.qb_warn = q.qu_warn;
27727 + qd->qd_qb.qb_value = q.qu_value;
27729 + gfs_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
27730 + clear_bit(GLF_LVB_INVALID, &qd->qd_gl->gl_flags);
27732 + gfs_glock_dq_uninit(q_gh);
27733 + force_refresh = FALSE;
27740 + gfs_glock_dq_uninit(&i_gh);
27743 + gfs_glock_dq_uninit(q_gh);
27749 + * gfs_quota_hold_m - Hold the quota structures for up to 4 IDs
27750 + * @ip: Two of the IDs are the UID and GID from this file
27751 + * @uid: a UID or the constant NO_QUOTA_CHANGE
27752 + * @gid: a GID or the constant NO_QUOTA_CHANGE
27754 + * The struct gfs_quota_data structures representing the locks are
27755 + * stored in the ip->i_alloc->al_qd array.
27757 + * Returns: 0 on success, -EXXX on failure
27761 +gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27763 + struct gfs_sbd *sdp = ip->i_sbd;
27764 + struct gfs_alloc *al = ip->i_alloc;
27765 + unsigned int x = 0;
27768 + GFS_ASSERT_INODE(al && !al->al_qd_num &&
27769 + !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,);
27771 + if (!sdp->sd_tune.gt_quota_account)
27774 + error = gfs_quota_get(sdp, TRUE, ip->i_di.di_uid,
27775 + CREATE, &al->al_qd[x]);
27780 + error = gfs_quota_get(sdp, FALSE, ip->i_di.di_gid,
27781 + CREATE, &al->al_qd[x]);
27786 + if (uid != NO_QUOTA_CHANGE) {
27787 + error = gfs_quota_get(sdp, TRUE, uid,
27788 + CREATE, &al->al_qd[x]);
27794 + if (gid != NO_QUOTA_CHANGE) {
27795 + error = gfs_quota_get(sdp, FALSE, gid,
27796 + CREATE, &al->al_qd[x]);
27802 + al->al_qd_num = x;
27808 + al->al_qd_num = x;
27809 + gfs_quota_unhold_m(ip);
27816 + * gfs_quota_unhold_m - throw away some quota locks
27817 + * @ip: the inode who's ip->i_alloc->al_qd array holds the structures
27822 +gfs_quota_unhold_m(struct gfs_inode *ip)
27824 + struct gfs_sbd *sdp = ip->i_sbd;
27825 + struct gfs_alloc *al = ip->i_alloc;
27828 + GFS_ASSERT_INODE(al &&
27829 + !test_bit(GIF_QD_LOCKED, &ip->i_flags), ip,);
27831 + for (x = 0; x < al->al_qd_num; x++) {
27832 + gfs_quota_put(sdp, al->al_qd[x]);
27833 + al->al_qd[x] = NULL;
27835 + al->al_qd_num = 0;
27839 + * gfs_quota_lock_m - Acquire the quota locks for up to 4 IDs
27840 + * @ip: Two of the IDs are the UID and GID from this file
27841 + * @uid: a UID or the constant NO_QUOTA_CHANGE
27842 + * @gid: a GID or the constant NO_QUOTA_CHANGE
27844 + * The struct gfs_quota_data structures representing the locks are
27845 + * stored in the ip->i_alloc->al_qd array.
27847 + * Returns: 0 on success, -EXXX on failure
27851 +gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27853 + struct gfs_sbd *sdp = ip->i_sbd;
27854 + struct gfs_alloc *al = ip->i_alloc;
27858 + gfs_quota_hold_m(ip, uid, gid);
27860 + if (!sdp->sd_tune.gt_quota_enforce)
27862 + if (capable(CAP_SYS_RESOURCE))
27865 + gfs_sort(al->al_qd, al->al_qd_num,
27866 + sizeof(struct gfs_quota_data *), sort_qd);
27868 + for (x = 0; x < al->al_qd_num; x++) {
27869 + error = glock_q(sdp, al->al_qd[x], FALSE, &al->al_qd_ghs[x]);
27874 + set_bit(GIF_QD_LOCKED, &ip->i_flags);
27880 + gfs_glock_dq_uninit(&al->al_qd_ghs[x]);
27886 + * gfs_quota_unlock_m - drop some quota locks
27887 + * @ip: the inode who's ip->i_alloc->al_qd array holds the locks
27892 +gfs_quota_unlock_m(struct gfs_inode *ip)
27894 + struct gfs_sbd *sdp = ip->i_sbd;
27895 + struct gfs_alloc *al = ip->i_alloc;
27896 + struct gfs_quota_data *qd, *qda[4];
27898 + unsigned int count = 0;
27902 + if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
27905 + for (x = 0; x < al->al_qd_num; x++) {
27906 + qd = al->al_qd[x];
27908 + spin_lock(&sdp->sd_quota_lock);
27909 + value = qd->qd_change_new + qd->qd_change_ic;
27910 + spin_unlock(&sdp->sd_quota_lock);
27913 + if (!qd->qd_qb.qb_limit)
27915 + else if (qd->qd_qb.qb_value >= (int64_t)qd->qd_qb.qb_limit)
27919 + v = value * gfs_num_journals(sdp) * sdp->sd_tune.gt_quota_scale_num;
27920 + do_div(v, sdp->sd_tune.gt_quota_scale_den);
27921 + v += qd->qd_qb.qb_value;
27922 + if (v < (int64_t)qd->qd_qb.qb_limit)
27926 + gfs_glock_dq_uninit(&al->al_qd_ghs[x]);
27929 + gfs_log_flush(sdp);
27930 + if (quota_trylock(sdp, qd))
27931 + qda[count++] = qd;
27936 + do_quota_sync(sdp, qda, count);
27938 + for (x = 0; x < count; x++)
27939 + quota_unlock(sdp, qda[x]);
27943 + gfs_quota_unhold_m(ip);
27947 + * print_quota_message - print a message to the user's tty about quotas
27948 + * @sdp: the filesystem
27949 + * @qd: the quota ID that the message is about
27950 + * @type: the type of message ("exceeded" or "warning")
27955 +print_quota_message(struct gfs_sbd *sdp, struct gfs_quota_data *qd, char *type)
27957 + char *line = gmalloc(256);
27959 + struct tty_struct *tty;
27961 + len = snprintf(line, 256, "GFS: fsid=%s: quota %s for %s %u\r\n",
27962 + sdp->sd_fsname, type,
27963 + (test_bit(QDF_USER, &qd->qd_flags)) ? "user" : "group",
27966 + if (current->signal) {
27967 + tty = current->signal->tty;
27968 + if (tty && tty->driver->write)
27969 + tty->driver->write(tty, 0, line, len);
27976 + * gfs_quota_check - Check to see if a block allocation is possible
27977 + * @ip: the inode who's ip->i_res.ir_qd array holds the quota locks
27978 + * @uid: the UID the block is allocated for
27979 + * @gid: the GID the block is allocated for
27984 +gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid)
27986 + struct gfs_sbd *sdp = ip->i_sbd;
27987 + struct gfs_alloc *al = ip->i_alloc;
27988 + struct gfs_quota_data *qd;
27996 + for (x = 0; x < al->al_qd_num; x++) {
27997 + qd = al->al_qd[x];
27999 + if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
28000 + (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
28003 + spin_lock(&sdp->sd_quota_lock);
28004 + value = qd->qd_change_new + qd->qd_change_ic;
28005 + spin_unlock(&sdp->sd_quota_lock);
28006 + value += qd->qd_qb.qb_value;
28008 + if (qd->qd_qb.qb_limit && (int64_t)qd->qd_qb.qb_limit < value) {
28009 + print_quota_message(sdp, qd, "exceeded");
28012 + } else if (qd->qd_qb.qb_warn &&
28013 + (int64_t)qd->qd_qb.qb_warn < value &&
28014 + time_after_eq(jiffies,
28015 + qd->qd_last_warn +
28016 + sdp->sd_tune.gt_quota_warn_period * HZ)) {
28017 + print_quota_message(sdp, qd, "warning");
28018 + qd->qd_last_warn = jiffies;
28026 + * gfs_quota_sync - Sync quota changes to the quota file
28027 + * @sdp: the filesystem
28029 + * Returns: 0 on success, -EXXX on failure
28033 +gfs_quota_sync(struct gfs_sbd *sdp)
28035 + struct gfs_quota_data **qda;
28036 + unsigned int max_qd = sdp->sd_tune.gt_quota_simul_sync;
28037 + unsigned int num_qd;
28041 + sdp->sd_quota_sync_gen++;
28043 + qda = gmalloc(max_qd * sizeof(struct gfs_quota_data *));
28045 + memset(qda, 0, max_qd * sizeof(struct gfs_quota_data *));
28051 + qda[num_qd] = quota_find(sdp);
28052 + if (!qda[num_qd])
28055 + if (++num_qd == max_qd)
28060 + error = do_quota_sync(sdp, qda, num_qd);
28062 + for (x = 0; x < num_qd; x++)
28063 + qda[x]->qd_sync_gen =
28064 + sdp->sd_quota_sync_gen;
28066 + for (x = 0; x < num_qd; x++)
28067 + quota_unlock(sdp, qda[x]);
28070 + while (!error && num_qd == max_qd);
28078 + * gfs_quota_refresh - Refresh the LVB for a given quota ID
28079 + * @sdp: the filesystem
28080 + * @arg: a pointer to a struct gfs_quota_name in user space
28082 + * Returns: 0 on success, -EXXX on failure
28086 +gfs_quota_refresh(struct gfs_sbd *sdp, void *arg)
28088 + struct gfs_quota_name qn;
28089 + struct gfs_quota_data *qd;
28090 + struct gfs_holder q_gh;
28093 + if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name)))
28096 + error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd);
28100 + error = glock_q(sdp, qd, TRUE, &q_gh);
28102 + gfs_glock_dq_uninit(&q_gh);
28104 + gfs_quota_put(sdp, qd);
28110 + * gfs_quota_read - Read the info a given quota ID
28111 + * @sdp: the filesystem
28112 + * @arg: a pointer to a gfs_quota_refresh_t in user space
28114 + * Returns: 0 on success, -EXXX on failure
28118 +gfs_quota_read(struct gfs_sbd *sdp, void *arg)
28120 + struct gfs_quota_name qn;
28121 + struct gfs_quota_data *qd;
28122 + struct gfs_holder q_gh;
28123 + struct gfs_quota q;
28126 + if (copy_from_user(&qn, arg, sizeof(struct gfs_quota_name)))
28129 + if (((qn.qn_user) ?
28130 + (qn.qn_id != current->fsuid) :
28131 + (!in_group_p(qn.qn_id))) &&
28132 + !capable(CAP_SYS_ADMIN))
28135 + error = gfs_quota_get(sdp, qn.qn_user, qn.qn_id, CREATE, &qd);
28139 + error = glock_q(sdp, qd, FALSE, &q_gh);
28143 + memset(&q, 0, sizeof(struct gfs_quota));
28144 + q.qu_limit = qd->qd_qb.qb_limit;
28145 + q.qu_warn = qd->qd_qb.qb_warn;
28146 + q.qu_value = qd->qd_qb.qb_value;
28148 + spin_lock(&sdp->sd_quota_lock);
28149 + q.qu_value += qd->qd_change_new + qd->qd_change_ic;
28150 + spin_unlock(&sdp->sd_quota_lock);
28152 + gfs_glock_dq_uninit(&q_gh);
28155 + gfs_quota_put(sdp, qd);
28158 + copy_to_user((char *)arg + sizeof(struct gfs_quota_name),
28159 + &q, sizeof(struct gfs_quota)))
28164 diff -urN linux-orig/fs/gfs/quota.h linux-patched/fs/gfs/quota.h
28165 --- linux-orig/fs/gfs/quota.h 1969-12-31 18:00:00.000000000 -0600
28166 +++ linux-patched/fs/gfs/quota.h 2004-06-30 13:27:49.356708115 -0500
28168 +/******************************************************************************
28169 +*******************************************************************************
28171 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
28172 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
28174 +** This copyrighted material is made available to anyone wishing to use,
28175 +** modify, copy, or redistribute it subject to the terms and conditions
28176 +** of the GNU General Public License v.2.
28178 +*******************************************************************************
28179 +******************************************************************************/
28181 +#ifndef __QUOTA_DOT_H__
28182 +#define __QUOTA_DOT_H__
28184 +#define NO_QUOTA_CHANGE ((uint32_t)-1)
28186 +int gfs_quota_get(struct gfs_sbd *sdp, int user, uint32_t id, int create,
28187 + struct gfs_quota_data **qdp);
28188 +void gfs_quota_hold(struct gfs_sbd *sdp, struct gfs_quota_data *qd);
28189 +void gfs_quota_put(struct gfs_sbd *sdp, struct gfs_quota_data *qd);
28191 +int gfs_quota_merge(struct gfs_sbd *sdp, struct gfs_quota_tag *tag);
28192 +void gfs_quota_scan(struct gfs_sbd *sdp);
28193 +void gfs_quota_cleanup(struct gfs_sbd *sdp);
28195 +int gfs_quota_hold_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28196 +void gfs_quota_unhold_m(struct gfs_inode *ip);
28198 +int gfs_quota_lock_m(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28199 +void gfs_quota_unlock_m(struct gfs_inode *ip);
28201 +int gfs_quota_check(struct gfs_inode *ip, uint32_t uid, uint32_t gid);
28203 +int gfs_quota_sync(struct gfs_sbd *sdp);
28204 +int gfs_quota_refresh(struct gfs_sbd *sdp, void *arg);
28205 +int gfs_quota_read(struct gfs_sbd *sdp, void *arg);
28207 +#endif /* __QUOTA_DOT_H__ */
28208 diff -urN linux-orig/fs/gfs/recovery.c linux-patched/fs/gfs/recovery.c
28209 --- linux-orig/fs/gfs/recovery.c 1969-12-31 18:00:00.000000000 -0600
28210 +++ linux-patched/fs/gfs/recovery.c 2004-06-30 13:27:49.357707883 -0500
28212 +/******************************************************************************
28213 +*******************************************************************************
28215 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
28216 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
28218 +** This copyrighted material is made available to anyone wishing to use,
28219 +** modify, copy, or redistribute it subject to the terms and conditions
28220 +** of the GNU General Public License v.2.
28222 +*******************************************************************************
28223 +******************************************************************************/
28225 +#include <linux/sched.h>
28226 +#include <linux/slab.h>
28227 +#include <linux/smp_lock.h>
28228 +#include <linux/spinlock.h>
28229 +#include <asm/semaphore.h>
28230 +#include <linux/completion.h>
28231 +#include <linux/buffer_head.h>
28235 +#include "glock.h"
28236 +#include "glops.h"
28238 +#include "recovery.h"
28240 +#define bn2seg(bn) (((uint32_t)((bn) - jdesc->ji_addr)) / sdp->sd_sb.sb_seg_size)
28241 +#define seg2bn(seg) ((seg) * sdp->sd_sb.sb_seg_size + jdesc->ji_addr)
28244 + struct list_head dj_list;
28245 + unsigned int dj_jid;
28246 + struct gfs_jindex dj_desc;
28250 + * gfs_add_dirty_j - add a jid to the list of dirty journals
28251 + * @sdp: the filesystem
28252 + * @jid: the journal ID number
28257 +gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid)
28259 + struct dirty_j *dj;
28261 + dj = gmalloc(sizeof(struct dirty_j));
28262 + memset(dj, 0, sizeof(struct dirty_j));
28264 + dj->dj_jid = jid;
28266 + spin_lock(&sdp->sd_dirty_j_lock);
28267 + list_add(&dj->dj_list, &sdp->sd_dirty_j);
28268 + spin_unlock(&sdp->sd_dirty_j_lock);
28272 + * get_dirty_j - return a dirty journal from the list
28273 + * @sdp: the filesystem
28275 + * Returns: a struct dirty_j or NULL
28278 +static struct dirty_j *
28279 +get_dirty_j(struct gfs_sbd *sdp)
28281 + struct dirty_j *dj = NULL;
28283 + spin_lock(&sdp->sd_dirty_j_lock);
28284 + if (!list_empty(&sdp->sd_dirty_j)) {
28285 + dj = list_entry(sdp->sd_dirty_j.prev, struct dirty_j, dj_list);
28286 + list_del(&dj->dj_list);
28288 + spin_unlock(&sdp->sd_dirty_j_lock);
28294 + * gfs_clear_dirty_j - destroy the list of dirty journals
28295 + * @sdp: the filesystem
28300 +gfs_clear_dirty_j(struct gfs_sbd *sdp)
28302 + struct dirty_j *dj;
28304 + dj = get_dirty_j(sdp);
28312 + * gfs_log_header - read the log header for a given segment
28313 + * @sdp: the filesystem
28314 + * @jdesc: the journal
28315 + * @gl: the journal's glock
28316 + * @seg: the segment to look at
28317 + * @lh: the log header to return
28319 + * Read the log header for a given segement in a given journal. Do a few
28320 + * sanity checks on it.
28322 + * Returns: 0 on success, 1 if the header was invalid or incomplete and, -EXXX on error
28326 +get_log_header(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28327 + struct gfs_glock *gl, uint32_t seg, struct gfs_log_header *lh)
28329 + struct buffer_head *bh;
28330 + struct gfs_log_header lh2;
28333 + error = gfs_dread(sdp, seg2bn(seg), gl, DIO_START | DIO_WAIT, &bh);
28337 + gfs_log_header_in(lh, bh->b_data);
28338 + gfs_log_header_in(&lh2,
28339 + bh->b_data + GFS_BASIC_BLOCK -
28340 + sizeof(struct gfs_log_header));
28344 + if (memcmp(lh, &lh2, sizeof(struct gfs_log_header)) != 0 ||
28345 + lh->lh_header.mh_magic != GFS_MAGIC ||
28346 + lh->lh_header.mh_type != GFS_METATYPE_LH)
28353 + * find_good_lh - find a good log header
28354 + * @sdp: the filesystem
28355 + * @jdesc: the journal
28356 + * @gl: the journal's glock
28357 + * @seg: the segment to start searching from (it's also filled in with a new value.)
28358 + * @lh: the log header to fill in
28359 + * @forward: if true search forward in the log, else search backward
28361 + * Call get_log_header() to get a log header for a segment, but if the
28362 + * segment is bad, either scan forward or backward until we find a good one.
28364 + * Returns: 0 on success, -EXXX on failure
28368 +find_good_lh(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28369 + struct gfs_glock *gl, uint32_t *seg, struct gfs_log_header *lh,
28373 + uint32_t orig_seg = *seg;
28376 + error = get_log_header(sdp, jdesc, gl, *seg, lh);
28381 + if (++*seg == jdesc->ji_nsegment)
28385 + *seg = jdesc->ji_nsegment - 1;
28388 + GFS_ASSERT_SBD(*seg != orig_seg, sdp,);
28393 + * verify_jhead - make sure we've found the head of the log
28394 + * @sdp: the filesystem
28395 + * @jdesc: the journal
28396 + * @gl: the journal's glock
28397 + * @head: this is filled in with the log descriptor of the head
28399 + * At this point, seg and lh should be either the head of the log or just
28400 + * before. Scan forward until we find the head.
28402 + * Returns: 0 on success, -EXXX on failure
28406 +verify_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28407 + struct gfs_glock *gl, struct gfs_log_header *head)
28409 + struct gfs_log_header lh;
28413 + seg = bn2seg(head->lh_first);
28416 + if (++seg == jdesc->ji_nsegment)
28419 + error = get_log_header(sdp, jdesc, gl, seg, &lh);
28425 + if (lh.lh_sequence == head->lh_sequence)
28428 + if (lh.lh_sequence < head->lh_sequence)
28431 + memcpy(head, &lh, sizeof(struct gfs_log_header));
28438 + * gfs_find_jhead - find the head of a log
28439 + * @sdp: the filesystem
28440 + * @jdesc: the journal
28441 + * @gl: the journal's glock
28442 + * @head: the log descriptor for the head of the log is returned here
28444 + * Do a binary search of a journal and find the valid log entry with the
28445 + * highest sequence number. (i.e. the log head)
28447 + * Returns: 0 on success, -EXXX on failure
28451 +gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28452 + struct gfs_glock *gl, struct gfs_log_header *head)
28454 + struct gfs_log_header lh1, lh_m;
28455 + uint32_t seg1, seg2, seg_m;
28459 + seg2 = jdesc->ji_nsegment - 1;
28462 + seg_m = (seg1 + seg2) / 2;
28464 + error = find_good_lh(sdp, jdesc, gl, &seg1, &lh1, TRUE);
28468 + if (seg1 == seg_m) {
28469 + error = verify_jhead(sdp, jdesc, gl, &lh1);
28470 + memcpy(head, &lh1, sizeof(struct gfs_log_header));
28474 + error = find_good_lh(sdp, jdesc, gl, &seg_m, &lh_m, FALSE);
28478 + if (lh1.lh_sequence <= lh_m.lh_sequence)
28488 + * gfs_increment_blkno - move to the next block in a journal
28489 + * @sdp: the filesystem
28490 + * @jdesc: the journal
28491 + * @gl: the journal's glock
28492 + * @addr: the block number to increment
28493 + * @skip_header: if this is TRUE, skip log headers
28495 + * Replace @addr with the location of the next block in the log.
28496 + * Take care of journal wrap and skip of log header if necessary.
28498 + * Returns: 0 on success, -EXXX on failure
28502 +gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28503 + struct gfs_glock *gl, uint64_t *addr, int skip_headers)
28505 + struct gfs_log_header header;
28510 + /* Handle journal wrap */
28512 + if (*addr == seg2bn(jdesc->ji_nsegment))
28513 + *addr -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
28515 + gfs_start_ra(gl, *addr,
28517 + jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size - *addr);
28519 + /* Handle landing on a header block */
28521 + if (skip_headers && !do_mod(*addr, sdp->sd_sb.sb_seg_size)) {
28522 + error = get_log_header(sdp, jdesc, gl, bn2seg(*addr), &header);
28526 + GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers here are bad */
28527 + GFS_ASSERT_SBD(header.lh_first != *addr, sdp,
28528 + gfs_log_header_print(&header);
28529 + printk("*addr = %"PRIu64"\n", *addr););
28532 + /* Can't wrap here */
28539 + * foreach_descriptor - go through the active part of the log
28540 + * @sdp: the filesystem
28541 + * @jdesc: the journal
28542 + * @gl: the journal's glock
28543 + * @start: the first log header in the active region
28544 + * @end: the last log header (don't process the contents of this entry))
28545 + * @pass: the recovery pass
28547 + * Call a given function once for every log descriptor in the active
28548 + * portion of the log.
28550 + * Returns: 0 on success, -EXXX on failure
28554 +foreach_descriptor(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28555 + struct gfs_glock *gl, uint64_t start, uint64_t end,
28556 + unsigned int pass)
28558 + struct gfs_log_header header;
28559 + struct gfs_log_descriptor desc;
28560 + struct buffer_head *bh;
28563 + while (start != end) {
28564 + GFS_ASSERT_SBD(!do_mod(start, sdp->sd_sb.sb_seg_size), sdp,);
28566 + error = get_log_header(sdp, jdesc, gl, bn2seg(start), &header);
28570 + GFS_ASSERT_SBD(!error, sdp,); /* Corrupt headers are bad */
28571 + GFS_ASSERT_SBD(header.lh_first == start, sdp,
28572 + gfs_log_header_print(&header);
28573 + printk("start = %"PRIu64"\n", start););
28578 + error = gfs_dread(sdp, start, gl, DIO_START | DIO_WAIT, &bh);
28582 + gfs_metatype_check(sdp, bh, GFS_METATYPE_LD);
28583 + gfs_desc_in(&desc, bh->b_data);
28587 + if (desc.ld_type != GFS_LOG_DESC_LAST) {
28588 + error = LO_SCAN_ELEMENTS(sdp, jdesc, gl, start,
28593 + while (desc.ld_length--) {
28594 + error = gfs_increment_blkno(sdp, jdesc, gl,
28600 + while (desc.ld_length--) {
28601 + error = gfs_increment_blkno(sdp, jdesc, gl,
28603 + !!desc.ld_length);
28617 + * clean_journal - mark a dirty journal as being clean
28618 + * @sdp: the filesystem
28619 + * @jdesc: the journal
28620 + * @gl: the journal's glock
28621 + * @head: the head journal to start from
28623 + * Returns: 0 on success, -EXXX on failure
28627 +clean_journal(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28628 + struct gfs_glock *gl, struct gfs_log_header *head)
28630 + struct gfs_log_header lh;
28631 + struct gfs_log_descriptor desc;
28632 + struct buffer_head *bh;
28637 + seg = bn2seg(head->lh_first);
28640 + if (++seg == jdesc->ji_nsegment)
28643 + error = get_log_header(sdp, jdesc, gl, seg, &lh);
28647 + /* Rewrite corrupt header blocks */
28649 + if (error == 1) {
28650 + bh = gfs_dgetblk(sdp, seg2bn(seg), gl);
28652 + gfs_prep_new_buffer(bh);
28653 + gfs_buffer_clear(bh);
28654 + gfs_log_header_out(head, bh->b_data);
28655 + gfs_log_header_out(head,
28656 + bh->b_data + GFS_BASIC_BLOCK -
28657 + sizeof(struct gfs_log_header));
28659 + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28665 + /* Stop when we get to the end of the log. */
28667 + if (lh.lh_sequence < head->lh_sequence)
28671 + /* Build a "last" descriptor for the transaction we are
28672 + about to commit by writing the shutdown header. */
28674 + memset(&desc, 0, sizeof(struct gfs_log_descriptor));
28675 + desc.ld_header.mh_magic = GFS_MAGIC;
28676 + desc.ld_header.mh_type = GFS_METATYPE_LD;
28677 + desc.ld_header.mh_format = GFS_FORMAT_LD;
28678 + desc.ld_type = GFS_LOG_DESC_LAST;
28679 + desc.ld_length = 0;
28681 + for (blkno = head->lh_first + 1; blkno != seg2bn(seg);) {
28682 + if (do_mod(blkno, sdp->sd_sb.sb_seg_size))
28683 + desc.ld_length++;
28684 + if (++blkno == seg2bn(jdesc->ji_nsegment))
28685 + blkno -= jdesc->ji_nsegment * sdp->sd_sb.sb_seg_size;
28688 + /* Write the descriptor */
28690 + bh = gfs_dgetblk(sdp, head->lh_first + 1, gl);
28692 + gfs_prep_new_buffer(bh);
28693 + gfs_buffer_clear(bh);
28694 + gfs_desc_out(&desc, bh->b_data);
28696 + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28701 + /* Build a log header that says the journal is clean */
28703 + memset(&lh, 0, sizeof(struct gfs_log_header));
28704 + lh.lh_header.mh_magic = GFS_MAGIC;
28705 + lh.lh_header.mh_type = GFS_METATYPE_LH;
28706 + lh.lh_header.mh_format = GFS_FORMAT_LH;
28707 + lh.lh_flags = GFS_LOG_HEAD_UNMOUNT;
28708 + lh.lh_first = seg2bn(seg);
28709 + lh.lh_sequence = head->lh_sequence + 1;
28710 + /* Don't care about tail */
28711 + lh.lh_last_dump = head->lh_last_dump;
28713 + /* Write the header */
28715 + bh = gfs_dgetblk(sdp, lh.lh_first, gl);
28717 + gfs_prep_new_buffer(bh);
28718 + gfs_buffer_clear(bh);
28719 + gfs_log_header_out(&lh, bh->b_data);
28720 + gfs_log_header_out(&lh,
28721 + bh->b_data + GFS_BASIC_BLOCK -
28722 + sizeof(struct gfs_log_header));
28724 + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
28731 + * gfs_recover_journal - recovery a given journal
28732 + * @sdp: the filesystem
28733 + * @jid: the number of the journal to recover
28734 + * @jdesc: the struct gfs_jindex describing the journal
28735 + * @wait: Don't return until the journal is clean (or an error is encountered)
28737 + * Acquire a journals lock, check to see if the journal is clean, and
28738 + * do recovery if necessary.
28740 + * Returns: 0 on success, -EXXX on failure
28744 +gfs_recover_journal(struct gfs_sbd *sdp,
28745 + unsigned int jid, struct gfs_jindex *jdesc,
28748 + struct gfs_log_header head;
28749 + struct gfs_holder j_gh, t_gh;
28753 + printk("GFS: fsid=%s: jid=%u: Trying to acquire journal lock...\n",
28754 + sdp->sd_fsname, jid);
28756 + /* Aquire the journal lock so we can do recovery */
28758 + error = gfs_glock_nq_num(sdp,
28759 + jdesc->ji_addr, &gfs_meta_glops,
28762 + ((wait) ? 0 : LM_FLAG_TRY) |
28763 + GL_NOCACHE, &j_gh);
28768 + case GLR_TRYFAILED:
28769 + GFS_ASSERT_SBD(!wait, sdp,);
28770 + printk("GFS: fsid=%s: jid=%u: Busy\n", sdp->sd_fsname, jid);
28777 + printk("GFS: fsid=%s: jid=%u: Looking at journal...\n",
28778 + sdp->sd_fsname, jid);
28780 + error = gfs_find_jhead(sdp, jdesc, j_gh.gh_gl, &head);
28782 + goto fail_gunlock;
28784 + if (!(head.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
28785 + if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
28786 + printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
28787 + sdp->sd_fsname, jid);
28789 + goto fail_gunlock;
28792 + printk("GFS: fsid=%s: jid=%u: Acquiring the transaction lock...\n",
28793 + sdp->sd_fsname, jid);
28797 + /* Acquire an exclusive hold on the transaction lock */
28799 + error = gfs_glock_nq_init(sdp->sd_trans_gl,
28802 + LM_FLAG_PRIORITY |
28806 + goto fail_gunlock;
28808 + if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
28809 + printk("GFS: fsid=%s: jid=%u: Can't replay: read-only FS\n",
28810 + sdp->sd_fsname, jid);
28812 + goto fail_gunlock_tr;
28815 + printk("GFS: fsid=%s: jid=%u: Replaying journal...\n",
28816 + sdp->sd_fsname, jid);
28818 + set_bit(GLF_DIRTY, &j_gh.gh_gl->gl_flags);
28820 + LO_BEFORE_SCAN(sdp, jid, &head, GFS_RECPASS_A1);
28822 + error = foreach_descriptor(sdp, jdesc, j_gh.gh_gl,
28823 + head.lh_tail, head.lh_first,
28826 + goto fail_gunlock_tr;
28828 + LO_AFTER_SCAN(sdp, jid, GFS_RECPASS_A1);
28830 + gfs_replay_wait(sdp);
28832 + error = clean_journal(sdp, jdesc, j_gh.gh_gl, &head);
28834 + goto fail_gunlock_tr;
28836 + gfs_glock_dq_uninit(&t_gh);
28838 + t = DIV_RU(jiffies - t, HZ);
28840 + printk("GFS: fsid=%s: jid=%u: Journal replayed in %lus\n",
28841 + sdp->sd_fsname, jid, t);
28844 + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28848 + gfs_glock_dq_uninit(&j_gh);
28850 + printk("GFS: fsid=%s: jid=%u: Done\n", sdp->sd_fsname, jid);
28855 + gfs_replay_wait(sdp);
28856 + gfs_glock_dq_uninit(&t_gh);
28859 + gfs_glock_dq_uninit(&j_gh);
28861 + printk("GFS: fsid=%s: jid=%u: %s\n",
28862 + sdp->sd_fsname, jid, (error) ? "Failed" : "Done");
28865 + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28873 + * gfs_check_journals - Recovery any dirty journals
28874 + * @sdp: the filesystem
28879 +gfs_check_journals(struct gfs_sbd *sdp)
28881 + struct dirty_j *dj;
28884 + dj = get_dirty_j(sdp);
28888 + down(&sdp->sd_jindex_lock);
28890 + if (dj->dj_jid != sdp->sd_lockstruct.ls_jid &&
28891 + dj->dj_jid < sdp->sd_journals) {
28892 + memcpy(&dj->dj_desc,
28893 + sdp->sd_jindex + dj->dj_jid,
28894 + sizeof(struct gfs_jindex));
28895 + up(&sdp->sd_jindex_lock);
28897 + gfs_recover_journal(sdp,
28898 + dj->dj_jid, &dj->dj_desc,
28902 + up(&sdp->sd_jindex_lock);
28903 + sdp->sd_lockstruct.ls_ops->lm_recovery_done(sdp->sd_lockstruct.ls_lockspace,
28904 + dj->dj_jid, LM_RD_GAVEUP);
28912 + * gfs_recover_dump - recover the log elements in this machine's journal
28913 + * @sdp: the filesystem
28915 + * Returns: 0 on success, -EXXX on failure
28919 +gfs_recover_dump(struct gfs_sbd *sdp)
28921 + struct gfs_log_header head;
28924 + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl,
28929 + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
28930 + if (!head.lh_last_dump)
28933 + printk("GFS: fsid=%s: Scanning for log elements...\n",
28936 + LO_BEFORE_SCAN(sdp, sdp->sd_lockstruct.ls_jid, &head, GFS_RECPASS_B1);
28938 + error = foreach_descriptor(sdp, &sdp->sd_jdesc, sdp->sd_journal_gh.gh_gl,
28939 + head.lh_last_dump, head.lh_first,
28944 + LO_AFTER_SCAN(sdp, sdp->sd_lockstruct.ls_jid, GFS_RECPASS_B1);
28946 + /* We need to make sure if we crash during the next log dump that
28947 + all intermediate headers in the transaction point to the last
28948 + log dump before the one we're making so we don't lose it. */
28950 + sdp->sd_log_dump_last = head.lh_last_dump;
28952 + printk("GFS: fsid=%s: Done\n", sdp->sd_fsname);
28957 + printk("GFS: fsid=%s: Failed\n", sdp->sd_fsname);
28961 diff -urN linux-orig/fs/gfs/recovery.h linux-patched/fs/gfs/recovery.h
28962 --- linux-orig/fs/gfs/recovery.h 1969-12-31 18:00:00.000000000 -0600
28963 +++ linux-patched/fs/gfs/recovery.h 2004-06-30 13:27:49.357707883 -0500
28965 +/******************************************************************************
28966 +*******************************************************************************
28968 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
28969 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
28971 +** This copyrighted material is made available to anyone wishing to use,
28972 +** modify, copy, or redistribute it subject to the terms and conditions
28973 +** of the GNU General Public License v.2.
28975 +*******************************************************************************
28976 +******************************************************************************/
28978 +#ifndef __RECOVERY_DOT_H__
28979 +#define __RECOVERY_DOT_H__
28981 +#define GFS_RECPASS_A1 (12)
28982 +#define GFS_RECPASS_B1 (14)
28984 +void gfs_add_dirty_j(struct gfs_sbd *sdp, unsigned int jid);
28985 +void gfs_clear_dirty_j(struct gfs_sbd *sdp);
28987 +int gfs_find_jhead(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28988 + struct gfs_glock *gl, struct gfs_log_header *head);
28989 +int gfs_increment_blkno(struct gfs_sbd *sdp, struct gfs_jindex *jdesc,
28990 + struct gfs_glock *gl, uint64_t *addr,
28991 + int skip_headers);
28993 +int gfs_recover_journal(struct gfs_sbd *sdp,
28994 + unsigned int jid, struct gfs_jindex *jdesc,
28996 +void gfs_check_journals(struct gfs_sbd *sdp);
28998 +int gfs_recover_dump(struct gfs_sbd *sdp);
29000 +#endif /* __RECOVERY_DOT_H__ */
29001 diff -urN linux-orig/fs/gfs/rgrp.c linux-patched/fs/gfs/rgrp.c
29002 --- linux-orig/fs/gfs/rgrp.c 1969-12-31 18:00:00.000000000 -0600
29003 +++ linux-patched/fs/gfs/rgrp.c 2004-06-30 13:27:49.358707651 -0500
29005 +/******************************************************************************
29006 +*******************************************************************************
29008 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
29009 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
29011 +** This copyrighted material is made available to anyone wishing to use,
29012 +** modify, copy, or redistribute it subject to the terms and conditions
29013 +** of the GNU General Public License v.2.
29015 +*******************************************************************************
29016 +******************************************************************************/
29018 +#include <linux/sched.h>
29019 +#include <linux/slab.h>
29020 +#include <linux/smp_lock.h>
29021 +#include <linux/spinlock.h>
29022 +#include <asm/semaphore.h>
29023 +#include <linux/completion.h>
29024 +#include <linux/buffer_head.h>
29030 +#include "glock.h"
29031 +#include "glops.h"
29033 +#include "super.h"
29034 +#include "trans.h"
29037 + * mhc_hash: find the mhc hash bucket for a buffer
29038 + * @bh: the buffer
29040 + * Returns: The bucket number
29043 +static unsigned int
29044 +mhc_hash(struct buffer_head *bh)
29049 + blkno = bh->b_blocknr;
29050 + h = gfs_hash(&blkno, sizeof(uint64_t)) & GFS_MHC_HASH_MASK;
29063 +mhc_trim(struct gfs_sbd *sdp, unsigned int max)
29065 + struct gfs_meta_header_cache *mc;
29068 + spin_lock(&sdp->sd_mhc_lock);
29069 + if (list_empty(&sdp->sd_mhc_single)) {
29070 + spin_unlock(&sdp->sd_mhc_lock);
29073 + mc = list_entry(sdp->sd_mhc_single.prev,
29074 + struct gfs_meta_header_cache,
29076 + list_del(&mc->mc_list_hash);
29077 + list_del(&mc->mc_list_single);
29078 + list_del(&mc->mc_list_rgd);
29079 + spin_unlock(&sdp->sd_mhc_lock);
29081 + kmem_cache_free(gfs_mhc_cachep, mc);
29082 + atomic_dec(&sdp->sd_mhc_count);
29084 + if (atomic_read(&sdp->sd_mhc_count) <= max)
29091 + * gfs_mhc_add - add buffers to the cache of metadata
29093 + * @bh: an array of buffers
29094 + * @num: the number of buffers in the array
29099 +gfs_mhc_add(struct gfs_rgrpd *rgd,
29100 + struct buffer_head **bh, unsigned int num)
29102 + struct gfs_sbd *sdp = rgd->rd_sbd;
29103 + struct gfs_meta_header_cache *mc;
29106 + struct list_head *head;
29108 + for (x = 0; x < num; x++) {
29109 + gfs_meta_check(sdp, bh[x]);
29111 + RETRY_MALLOC(mc = kmem_cache_alloc(gfs_mhc_cachep, GFP_KERNEL), mc);
29112 + memset(mc, 0, sizeof(struct gfs_meta_header_cache));
29114 + mc->mc_block = bh[x]->b_blocknr;
29115 + memcpy(&mc->mc_mh, bh[x]->b_data,
29116 + sizeof(struct gfs_meta_header));
29118 + gen = gfs64_to_cpu(mc->mc_mh.mh_generation) + 2;
29119 + mc->mc_mh.mh_generation = cpu_to_gfs64(gen);
29121 + head = &sdp->sd_mhc[mhc_hash(bh[x])];
29123 + spin_lock(&sdp->sd_mhc_lock);
29124 + list_add(&mc->mc_list_hash, head);
29125 + list_add(&mc->mc_list_single, &sdp->sd_mhc_single);
29126 + list_add(&mc->mc_list_rgd, &rgd->rd_mhc);
29127 + spin_unlock(&sdp->sd_mhc_lock);
29129 + atomic_inc(&sdp->sd_mhc_count);
29132 + if (atomic_read(&sdp->sd_mhc_count) > sdp->sd_tune.gt_max_mhc)
29133 + mhc_trim(sdp, sdp->sd_tune.gt_max_mhc);
29137 + * gfs_mhc_fish - Try to fill in a buffer with data from the cache
29138 + * @sdp: the filesystem
29139 + * @bh: the buffer to fill in
29141 + * Returns: TRUE if the buffer was cached, FALSE otherwise
29145 +gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh)
29147 + struct list_head *tmp, *head;
29148 + struct gfs_meta_header_cache *mc;
29150 + head = &sdp->sd_mhc[mhc_hash(bh)];
29152 + spin_lock(&sdp->sd_mhc_lock);
29154 + for (tmp = head->next;
29156 + tmp = tmp->next) {
29157 + mc = list_entry(tmp, struct gfs_meta_header_cache, mc_list_hash);
29158 + if (mc->mc_block != bh->b_blocknr)
29161 + list_del(&mc->mc_list_hash);
29162 + list_del(&mc->mc_list_single);
29163 + list_del(&mc->mc_list_rgd);
29164 + spin_unlock(&sdp->sd_mhc_lock);
29166 + gfs_prep_new_buffer(bh);
29167 + memcpy(bh->b_data, &mc->mc_mh,
29168 + sizeof(struct gfs_meta_header));
29170 + kmem_cache_free(gfs_mhc_cachep, mc);
29171 + atomic_dec(&sdp->sd_mhc_count);
29176 + spin_unlock(&sdp->sd_mhc_lock);
29182 + * gfs_mhc_zap - Get rid of the data in the cache of metadata headers
29188 +gfs_mhc_zap(struct gfs_rgrpd *rgd)
29190 + struct gfs_sbd *sdp = rgd->rd_sbd;
29191 + struct gfs_meta_header_cache *mc;
29193 + spin_lock(&sdp->sd_mhc_lock);
29195 + while (!list_empty(&rgd->rd_mhc)) {
29196 + mc = list_entry(rgd->rd_mhc.next,
29197 + struct gfs_meta_header_cache,
29200 + list_del(&mc->mc_list_hash);
29201 + list_del(&mc->mc_list_single);
29202 + list_del(&mc->mc_list_rgd);
29203 + spin_unlock(&sdp->sd_mhc_lock);
29205 + kmem_cache_free(gfs_mhc_cachep, mc);
29206 + atomic_dec(&sdp->sd_mhc_count);
29208 + spin_lock(&sdp->sd_mhc_lock);
29211 + spin_unlock(&sdp->sd_mhc_lock);
29215 + * depend_hash() - Turn glock number into hash bucket number
29218 + * Returns: The number of the corresponding hash bucket
29221 +static unsigned int
29222 +depend_hash(uint64_t formal_ino)
29226 + h = gfs_hash(&formal_ino, sizeof(uint64_t));
29227 + h &= GFS_DEPEND_HASH_MASK;
29233 + * depend_sync_one -
29240 +depend_sync_one(struct gfs_sbd *sdp, struct gfs_depend *gd)
29242 + struct gfs_glock *gl;
29244 + spin_lock(&sdp->sd_depend_lock);
29245 + list_del(&gd->gd_list_hash);
29246 + spin_unlock(&sdp->sd_depend_lock);
29247 + list_del(&gd->gd_list_rgd);
29249 + gl = gfs_glock_find(sdp,
29250 + &(struct lm_lockname){gd->gd_formal_ino,
29253 + if (gl->gl_ops->go_sync)
29254 + gl->gl_ops->go_sync(gl,
29257 + gfs_glock_put(gl);
29261 + atomic_dec(&sdp->sd_depend_count);
29265 + * depend_sync_old -
29271 +depend_sync_old(struct gfs_rgrpd *rgd)
29273 + struct gfs_sbd *sdp = rgd->rd_sbd;
29274 + struct gfs_depend *gd;
29277 + gd = list_entry(rgd->rd_depend.prev,
29278 + struct gfs_depend,
29281 + if (time_before(jiffies,
29283 + sdp->sd_tune.gt_depend_secs * HZ))
29286 + depend_sync_one(sdp, gd);
29291 + * gfs_depend_add -
29298 +gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino)
29300 + struct gfs_sbd *sdp = rgd->rd_sbd;
29301 + struct list_head *head, *tmp;
29302 + struct gfs_depend *gd;
29304 + head = &sdp->sd_depend[depend_hash(formal_ino)];
29306 + spin_lock(&sdp->sd_depend_lock);
29308 + for (tmp = head->next;
29310 + tmp = tmp->next) {
29311 + gd = list_entry(tmp, struct gfs_depend, gd_list_hash);
29312 + if (gd->gd_rgd == rgd &&
29313 + gd->gd_formal_ino == formal_ino) {
29314 + list_move(&gd->gd_list_hash, head);
29315 + spin_unlock(&sdp->sd_depend_lock);
29316 + list_move(&gd->gd_list_rgd, &rgd->rd_depend);
29317 + gd->gd_time = jiffies;
29322 + spin_unlock(&sdp->sd_depend_lock);
29324 + gd = gmalloc(sizeof(struct gfs_depend));
29325 + memset(gd, 0, sizeof(struct gfs_depend));
29327 + gd->gd_rgd = rgd;
29328 + gd->gd_formal_ino = formal_ino;
29329 + gd->gd_time = jiffies;
29331 + spin_lock(&sdp->sd_depend_lock);
29332 + list_add(&gd->gd_list_hash, head);
29333 + spin_unlock(&sdp->sd_depend_lock);
29334 + list_add(&gd->gd_list_rgd, &rgd->rd_depend);
29336 + atomic_inc(&sdp->sd_depend_count);
29338 + depend_sync_old(rgd);
29342 + * gfs_depend_sync -
29348 +gfs_depend_sync(struct gfs_rgrpd *rgd)
29350 + struct gfs_sbd *sdp = rgd->rd_sbd;
29351 + struct gfs_depend *gd;
29353 + while (!list_empty(&rgd->rd_depend)) {
29354 + gd = list_entry(rgd->rd_depend.next,
29355 + struct gfs_depend,
29357 + depend_sync_one(sdp, gd);
29362 + * rgrp_verify - Verify that a resource group is consistent
29363 + * @sdp: the filesystem
29366 + * Somebody should have already called gfs_glock_rg() on this RG.
29370 +rgrp_verify(struct gfs_rgrpd *rgd)
29372 + struct gfs_bitmap *bits = NULL;
29373 + uint32_t length = rgd->rd_ri.ri_length;
29374 + uint32_t count[4], tmp;
29377 + memset(count, 0, 4 * sizeof(uint32_t));
29379 + for (buf = 0; buf < length; buf++) {
29380 + bits = &rgd->rd_bits[buf];
29381 + for (x = 0; x < 4; x++)
29382 + count[x] += gfs_bitcount(rgd,
29383 + rgd->rd_bh[buf]->b_data +
29385 + bits->bi_len, x);
29388 + GFS_ASSERT_RGRPD(count[0] == rgd->rd_rg.rg_free, rgd,
29389 + printk("free data mismatch: %u != %u\n",
29390 + count[0], rgd->rd_rg.rg_free););
29392 + tmp = rgd->rd_ri.ri_data -
29393 + (rgd->rd_rg.rg_usedmeta + rgd->rd_rg.rg_freemeta) -
29394 + (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi) -
29395 + rgd->rd_rg.rg_free;
29396 + GFS_ASSERT_RGRPD(count[1] == tmp, rgd,
29397 + printk("used data mismatch: %u != %u\n",
29398 + count[1], tmp););
29400 + GFS_ASSERT_RGRPD(count[2] == rgd->rd_rg.rg_freemeta, rgd,
29401 + printk("free metadata mismatch: %u != %u\n",
29402 + count[2], rgd->rd_rg.rg_freemeta););
29404 + tmp = rgd->rd_rg.rg_usedmeta +
29405 + (rgd->rd_rg.rg_useddi + rgd->rd_rg.rg_freedi);
29406 + GFS_ASSERT_RGRPD(count[3] == tmp, rgd,
29407 + printk("used metadata mismatch: %u != %u\n",
29408 + count[3], tmp););
29412 + * gfs_blk2rgrpd - Find resource group for a given data block number
29413 + * @sdp: The GFS superblock
29414 + * @n: The data block number
29416 + * Returns: Ths resource group, or NULL if not found
29419 +struct gfs_rgrpd *
29420 +gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk)
29422 + struct list_head *tmp, *head;
29423 + struct gfs_rgrpd *rgd = NULL;
29424 + struct gfs_rindex *ri;
29426 + spin_lock(&sdp->sd_rg_mru_lock);
29428 + for (head = &sdp->sd_rg_mru_list, tmp = head->next;
29430 + tmp = tmp->next) {
29431 + rgd = list_entry(tmp, struct gfs_rgrpd, rd_list_mru);
29432 + ri = &rgd->rd_ri;
29434 + if (ri->ri_data1 <= blk && blk < ri->ri_data1 + ri->ri_data) {
29435 + list_move(&rgd->rd_list_mru, &sdp->sd_rg_mru_list);
29436 + spin_unlock(&sdp->sd_rg_mru_lock);
29441 + spin_unlock(&sdp->sd_rg_mru_lock);
29447 + * gfs_rgrpd_get_first - get the first RG
29448 + * @sdp: The GFS superblock
29450 + * Returns: The first rgrp in the filesystem
29453 +struct gfs_rgrpd *
29454 +gfs_rgrpd_get_first(struct gfs_sbd *sdp)
29456 + GFS_ASSERT_SBD(!list_empty(&sdp->sd_rglist), sdp,);
29457 + return list_entry(sdp->sd_rglist.next, struct gfs_rgrpd, rd_list);
29461 + * gfs_rgrpd_get_next - get the next RG
29464 + * Returns: The next rgrp
29467 +struct gfs_rgrpd *
29468 +gfs_rgrpd_get_next(struct gfs_rgrpd *rgd)
29470 + if (rgd->rd_list.next == &rgd->rd_sbd->sd_rglist)
29472 + return list_entry(rgd->rd_list.next, struct gfs_rgrpd, rd_list);
29476 + * clear_rgrpdi - Clear up rgrps
29477 + * @sdp: The GFS superblock
29482 +clear_rgrpdi(struct gfs_sbd *sdp)
29484 + struct gfs_rgrpd *rgd;
29485 + struct gfs_glock *gl;
29487 + sdp->sd_rg_forward = NULL;
29489 + while (!list_empty(&sdp->sd_rg_recent)) {
29490 + rgd = list_entry(sdp->sd_rg_recent.next,
29491 + struct gfs_rgrpd, rd_recent);
29492 + list_del(&rgd->rd_recent);
29495 + while (!list_empty(&sdp->sd_rglist)) {
29496 + rgd = list_entry(sdp->sd_rglist.next,
29497 + struct gfs_rgrpd, rd_list);
29500 + list_del(&rgd->rd_list);
29501 + list_del(&rgd->rd_list_mru);
29504 + gfs_glock_force_drop(gl);
29505 + if (atomic_read(&gl->gl_lvb_count))
29506 + gfs_lvb_unhold(gl);
29507 + gl2rgd(gl) = NULL;
29508 + gfs_glock_put(gl);
29511 + if (rgd->rd_bits)
29512 + kfree(rgd->rd_bits);
29514 + kfree(rgd->rd_bh);
29521 + * gfs_clear_rgrpd - Clear up rgrps
29522 + * @sdp: The GFS superblock
29527 +gfs_clear_rgrpd(struct gfs_sbd *sdp)
29529 + down(&sdp->sd_rindex_lock);
29530 + clear_rgrpdi(sdp);
29531 + up(&sdp->sd_rindex_lock);
29535 + * gfs_compute_bitstructs - Compute the bitmap sizes
29536 + * @rgd: The resource group descriptor
29541 +compute_bitstructs(struct gfs_rgrpd *rgd)
29543 + struct gfs_sbd *sdp = rgd->rd_sbd;
29544 + struct gfs_bitmap *bits;
29545 + uint32_t length = rgd->rd_ri.ri_length;
29546 + uint32_t bytes_left, bytes;
29549 + rgd->rd_bits = gmalloc(length * sizeof(struct gfs_bitmap));
29550 + memset(rgd->rd_bits, 0, length * sizeof(struct gfs_bitmap));
29552 + bytes_left = rgd->rd_ri.ri_bitbytes;
29554 + for (x = 0; x < length; x++) {
29555 + bits = &rgd->rd_bits[x];
29557 + if (length == 1) {
29558 + bytes = bytes_left;
29559 + bits->bi_offset = sizeof(struct gfs_rgrp);
29560 + bits->bi_start = 0;
29561 + bits->bi_len = bytes;
29562 + } else if (x == 0) {
29563 + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_rgrp);
29564 + bits->bi_offset = sizeof(struct gfs_rgrp);
29565 + bits->bi_start = 0;
29566 + bits->bi_len = bytes;
29567 + } else if (x + 1 == length) {
29568 + bytes = bytes_left;
29569 + bits->bi_offset = sizeof(struct gfs_meta_header);
29570 + bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
29571 + bits->bi_len = bytes;
29573 + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
29574 + bits->bi_offset = sizeof(struct gfs_meta_header);
29575 + bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
29576 + bits->bi_len = bytes;
29579 + bytes_left -= bytes;
29582 + GFS_ASSERT_RGRPD(!bytes_left, rgd,);
29583 + GFS_ASSERT_RGRPD((rgd->rd_bits[length - 1].bi_start +
29584 + rgd->rd_bits[length - 1].bi_len) * GFS_NBBY ==
29585 + rgd->rd_ri.ri_data, rgd,
29586 + printk("start=%u len=%u offset=%u\n",
29587 + rgd->rd_bits[length - 1].bi_start,
29588 + rgd->rd_bits[length - 1].bi_len,
29589 + rgd->rd_bits[length - 1].bi_offset);
29590 + gfs_rindex_print(&rgd->rd_ri););
29592 + rgd->rd_bh = gmalloc(length * sizeof(struct buffer_head *));
29593 + memset(rgd->rd_bh, 0, length * sizeof(struct buffer_head *));
29597 + * gfs_ri_update - Pull in a new resource index from the disk
29598 + * @gl: The glock covering the rindex inode
29600 + * Returns: 0 on successful update, error code otherwise
29604 +gfs_ri_update(struct gfs_inode *ip)
29606 + struct gfs_sbd *sdp = ip->i_sbd;
29607 + struct gfs_rgrpd *rgd;
29608 + char buf[sizeof(struct gfs_rindex)];
29611 + GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_rindex)),
29614 + clear_rgrpdi(sdp);
29616 + for (sdp->sd_rgcount = 0;; sdp->sd_rgcount++) {
29617 + error = gfs_internal_read(ip, buf,
29618 + sdp->sd_rgcount *
29619 + sizeof(struct gfs_rindex),
29620 + sizeof(struct gfs_rindex));
29623 + if (error != sizeof(struct gfs_rindex)) {
29629 + rgd = gmalloc(sizeof(struct gfs_rgrpd));
29630 + memset(rgd, 0, sizeof(struct gfs_rgrpd));
29632 + INIT_LIST_HEAD(&rgd->rd_mhc);
29633 + INIT_LIST_HEAD(&rgd->rd_depend);
29634 + rgd->rd_sbd = sdp;
29636 + list_add_tail(&rgd->rd_list, &sdp->sd_rglist);
29637 + list_add_tail(&rgd->rd_list_mru, &sdp->sd_rg_mru_list);
29639 + gfs_rindex_in(&rgd->rd_ri, buf);
29641 + compute_bitstructs(rgd);
29643 + error = gfs_glock_get(sdp, rgd->rd_ri.ri_addr, &gfs_rgrp_glops,
29644 + CREATE, &rgd->rd_gl);
29648 + error = gfs_lvb_hold(rgd->rd_gl);
29652 + gl2rgd(rgd->rd_gl) = rgd;
29653 + rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1;
29656 + sdp->sd_riinode_vn = ip->i_gl->gl_vn;
29661 + clear_rgrpdi(sdp);
29667 + * gfs_rindex_hold - Grab a lock on the rindex
29668 + * @sdp: The GFS superblock
29669 + * @ri_gh: the glock holder
29671 + * We grab a lock in the rindex inode to make sure that it doesn't
29672 + * change whilst we are performing an operation. We keep this lock
29673 + * for quite long periods of time compared to other locks. This
29674 + * doesn't matter, since its shared and it is very, very rarely
29675 + * accessed in the exclusive mode.
29677 + * Returns: 0 on success, error code otherwise
29681 +gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh)
29683 + struct gfs_inode *ip = sdp->sd_riinode;
29684 + struct gfs_glock *gl = ip->i_gl;
29687 + error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
29691 + if (sdp->sd_riinode_vn != gl->gl_vn) {
29692 + down(&sdp->sd_rindex_lock);
29693 + if (sdp->sd_riinode_vn != gl->gl_vn) {
29694 + error = gfs_ri_update(ip);
29696 + gfs_glock_dq_uninit(ri_gh);
29698 + up(&sdp->sd_rindex_lock);
29705 + * gfs_rgrp_read - Read in a RG's bitmaps
29706 + * @rgd: the struct gfs_rgrpd describing the RG to read in
29708 + * Read in RG bitmaps. Must call gfs_rgrp_relse() it free the bitmaps.
29710 + * Returns: 0 on success, -EXXX on failure
29714 +gfs_rgrp_read(struct gfs_rgrpd *rgd)
29716 + struct gfs_sbd *sdp = rgd->rd_sbd;
29717 + struct gfs_glock *gl = rgd->rd_gl;
29718 + unsigned int x, length = rgd->rd_ri.ri_length;
29721 + for (x = 0; x < length; x++) {
29722 + GFS_ASSERT_RGRPD(!rgd->rd_bh[x], rgd,);
29723 + rgd->rd_bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_addr + x, gl);
29726 + for (x = 0; x < length; x++) {
29727 + error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_START);
29732 + for (x = length; x--;) {
29733 + error = gfs_dreread(sdp, rgd->rd_bh[x], DIO_WAIT);
29736 + gfs_metatype_check(sdp, rgd->rd_bh[x],
29737 + (x) ? GFS_METATYPE_RB : GFS_METATYPE_RG);
29740 + if (rgd->rd_rg_vn != gl->gl_vn) {
29741 + gfs_rgrp_in(&rgd->rd_rg, (rgd->rd_bh[0])->b_data);
29742 + rgd->rd_rg_vn = gl->gl_vn;
29748 + for (x = 0; x < length; x++) {
29749 + brelse(rgd->rd_bh[x]);
29750 + rgd->rd_bh[x] = NULL;
29757 + * gfs_rgrp_relse - Release RG bitmaps read in with gfs_rgrp_read()
29758 + * @rgd: the struct gfs_rgrpd describing the RG to read in
29763 +gfs_rgrp_relse(struct gfs_rgrpd *rgd)
29765 + int x, length = rgd->rd_ri.ri_length;
29767 + for (x = 0; x < length; x++) {
29768 + brelse(rgd->rd_bh[x]);
29769 + rgd->rd_bh[x] = NULL;
29774 + * gfs_rgrp_lvb_fill - copy RG usage data out of the struct gfs_rgrp into the struct gfs_rgrp_lvb
29775 + * @rgd: the resource group data structure
29780 +gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd)
29782 + struct gfs_rgrp *rg = &rgd->rd_rg;
29783 + struct gfs_rgrp_lvb *rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
29785 + rb->rb_magic = cpu_to_gfs32(GFS_MAGIC);
29786 + rb->rb_free = cpu_to_gfs32(rg->rg_free);
29787 + rb->rb_useddi = cpu_to_gfs32(rg->rg_useddi);
29788 + rb->rb_freedi = cpu_to_gfs32(rg->rg_freedi);
29789 + rb->rb_usedmeta = cpu_to_gfs32(rg->rg_usedmeta);
29790 + rb->rb_freemeta = cpu_to_gfs32(rg->rg_freemeta);
29792 + clear_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags);
29796 + * gfs_rgrp_lvb_init - Init the data of a RG LVB
29797 + * @rgd: the resource group data structure
29799 + * Returns: 0 on success, -EXXX on failure
29803 +gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd)
29805 + struct gfs_glock *gl = rgd->rd_gl;
29806 + struct gfs_holder rgd_gh;
29809 + error = gfs_glock_nq_init(gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
29811 + gfs_rgrp_lvb_fill(rgd);
29812 + gfs_glock_dq_uninit(&rgd_gh);
29819 + * gfs_alloc_get - allocate a struct gfs_alloc structure for an inode
29822 + * Returns: the struct gfs_alloc
29825 +struct gfs_alloc *
29826 +gfs_alloc_get(struct gfs_inode *ip)
29828 + struct gfs_alloc *al = ip->i_alloc;
29830 + GFS_ASSERT_INODE(!al, ip,);
29832 + al = gmalloc(sizeof(struct gfs_alloc));
29833 + memset(al, 0, sizeof(struct gfs_alloc));
29835 + ip->i_alloc = al;
29841 + * gfs_alloc_put - throw away the struct gfs_alloc for an inode
29847 +gfs_alloc_put(struct gfs_inode *ip)
29849 + struct gfs_alloc *al = ip->i_alloc;
29851 + GFS_ASSERT_INODE(al, ip,);
29853 + ip->i_alloc = NULL;
29858 + * try_rgrp_fit - See if a given reservation will fit in a given RG
29859 + * @rgd: the RG data
29860 + * @al: the struct gfs_alloc structure describing the reservation
29862 + * Sets the $ir_datares field in @res.
29863 + * Sets the $ir_metares field in @res.
29865 + * Returns: 1 on success, 0 on failure
29869 +try_rgrp_fit(struct gfs_rgrpd *rgd, struct gfs_alloc *al)
29871 + uint32_t freeblks = rgd->rd_rg.rg_free;
29872 + uint32_t freemeta = rgd->rd_rg.rg_freemeta;
29873 + uint32_t metares = al->al_requested_meta;
29874 + uint32_t datares = al->al_requested_data;
29876 + /* First take care of the data blocks required */
29878 + if (freeblks < al->al_requested_data)
29881 + freeblks -= al->al_requested_data;
29883 + /* Then take care of the dinodes */
29885 + metares += al->al_requested_di;
29887 + /* Then take care of the metadata blocks */
29889 + while (freemeta < metares) {
29890 + if (freeblks < GFS_META_CLUMP)
29893 + freeblks -= GFS_META_CLUMP;
29894 + freemeta += GFS_META_CLUMP;
29896 + datares += GFS_META_CLUMP;
29899 + al->al_rgd = rgd;
29900 + al->al_reserved_meta = metares;
29901 + al->al_reserved_data = datares;
29907 + * recent_rgrp_first - get first RG from recent list
29908 + * @sdp: The GFS superblock
29909 + * @rglast: address of the rgrp used last
29911 + * Returns: The first rgrp in the recent list
29914 +static struct gfs_rgrpd *
29915 +recent_rgrp_first(struct gfs_sbd *sdp, uint64_t rglast)
29917 + struct list_head *tmp, *head;
29918 + struct gfs_rgrpd *rgd = NULL;
29920 + spin_lock(&sdp->sd_rg_recent_lock);
29922 + if (list_empty(&sdp->sd_rg_recent))
29928 + for (head = &sdp->sd_rg_recent, tmp = head->next;
29930 + tmp = tmp->next) {
29931 + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
29932 + if (rgd->rd_ri.ri_addr == rglast)
29937 + rgd = list_entry(sdp->sd_rg_recent.next, struct gfs_rgrpd, rd_recent);
29940 + spin_unlock(&sdp->sd_rg_recent_lock);
29946 + * recent_rgrp_next - get next RG from recent list
29947 + * @cur_rgd: current rgrp
29949 + * Returns: The next rgrp in the recent list
29952 +static struct gfs_rgrpd *
29953 +recent_rgrp_next(struct gfs_rgrpd *cur_rgd)
29955 + struct gfs_sbd *sdp = cur_rgd->rd_sbd;
29956 + struct list_head *tmp, *head;
29957 + struct gfs_rgrpd *rgd;
29959 + spin_lock(&sdp->sd_rg_recent_lock);
29961 + for (head = &sdp->sd_rg_recent, tmp = head->next;
29963 + tmp = tmp->next) {
29964 + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
29965 + if (rgd == cur_rgd) {
29966 + if (cur_rgd->rd_recent.next != &sdp->sd_rg_recent)
29967 + rgd = list_entry(cur_rgd->rd_recent.next,
29968 + struct gfs_rgrpd, rd_recent);
29979 + spin_unlock(&sdp->sd_rg_recent_lock);
29985 + * recent_rgrp_remove - remove an RG from recent list
29986 + * @rgd: The rgrp to remove
29991 +recent_rgrp_remove(struct gfs_rgrpd *rgd)
29993 + spin_lock(&rgd->rd_sbd->sd_rg_recent_lock);
29994 + list_del(&rgd->rd_recent);
29995 + spin_unlock(&rgd->rd_sbd->sd_rg_recent_lock);
29999 + * recent_rgrp_add - add an RG to recent list
30000 + * @new_rgd: The rgrp to add
30005 +recent_rgrp_add(struct gfs_rgrpd *new_rgd)
30007 + struct gfs_sbd *sdp = new_rgd->rd_sbd;
30008 + struct list_head *tmp, *head;
30009 + struct gfs_rgrpd *rgd = NULL;
30010 + unsigned int count = 0;
30011 + unsigned int max = sdp->sd_rgcount / gfs_num_journals(sdp);
30013 + spin_lock(&sdp->sd_rg_recent_lock);
30015 + for (head = &sdp->sd_rg_recent, tmp = head->next;
30017 + tmp = tmp->next) {
30018 + rgd = list_entry(tmp, struct gfs_rgrpd, rd_recent);
30019 + if (rgd == new_rgd)
30022 + if (++count >= max)
30025 + list_add_tail(&new_rgd->rd_recent, &sdp->sd_rg_recent);
30028 + spin_unlock(&sdp->sd_rg_recent_lock);
30032 + * forward_rgrp_get - get an rgrp to try next from full list
30033 + * @sdp: The GFS superblock
30035 + * Returns: The rgrp to try next
30038 +static struct gfs_rgrpd *
30039 +forward_rgrp_get(struct gfs_sbd *sdp)
30041 + struct gfs_rgrpd *rgd;
30042 + unsigned int journals = gfs_num_journals(sdp);
30043 + unsigned int rg = 0, x;
30045 + spin_lock(&sdp->sd_rg_forward_lock);
30047 + rgd = sdp->sd_rg_forward;
30049 + if (sdp->sd_rgcount >= journals)
30050 + rg = sdp->sd_rgcount *
30051 + sdp->sd_lockstruct.ls_jid /
30054 + for (x = 0, rgd = gfs_rgrpd_get_first(sdp);
30056 + x++, rgd = gfs_rgrpd_get_next(rgd))
30057 + /* Do Nothing */;
30059 + sdp->sd_rg_forward = rgd;
30062 + spin_unlock(&sdp->sd_rg_forward_lock);
30068 + * forward_rgrp_set - set the forward rgrp pointer
30069 + * @sdp: the filesystem
30070 + * @rgd: The new forward rgrp
30075 +forward_rgrp_set(struct gfs_sbd *sdp, struct gfs_rgrpd *rgd)
30077 + spin_lock(&sdp->sd_rg_forward_lock);
30078 + sdp->sd_rg_forward = rgd;
30079 + spin_unlock(&sdp->sd_rg_forward_lock);
30083 + * get_local_rgrp - Choose and lock a rgrp for allocation
30084 + * @ip: the inode to reserve space for
30085 + * @rgp: the chosen and locked rgrp
30087 + * Try to acquire rgrp in way which avoids contending with others.
30089 + * Returns: 0 on success, -EXXX on failure
30093 +get_local_rgrp(struct gfs_inode *ip)
30095 + struct gfs_sbd *sdp = ip->i_sbd;
30096 + struct gfs_rgrpd *rgd, *begin, *next = NULL;
30097 + struct gfs_alloc *al = ip->i_alloc;
30098 + int flags = LM_FLAG_TRY;
30102 + int update_recent = FALSE;
30104 + /* Try recently successful rgrps */
30106 + rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc);
30109 + error = gfs_glock_nq_init(rgd->rd_gl,
30110 + LM_ST_EXCLUSIVE, LM_FLAG_TRY,
30114 + if (try_rgrp_fit(rgd, al))
30117 + next = recent_rgrp_next(rgd);
30118 + recent_rgrp_remove(rgd);
30119 + gfs_glock_dq_uninit(&al->al_rgd_gh);
30123 + case GLR_TRYFAILED:
30124 + rgd = recent_rgrp_next(rgd);
30128 + GFS_ASSERT_RGRPD(error < 0, rgd,);
30133 + /* Go through full list of rgrps */
30135 + update_recent = TRUE;
30136 + begin = rgd = forward_rgrp_get(sdp);
30139 + error = gfs_glock_nq_init(rgd->rd_gl,
30140 + LM_ST_EXCLUSIVE, flags,
30144 + if (try_rgrp_fit(rgd, al))
30146 + gfs_glock_dq_uninit(&al->al_rgd_gh);
30149 + case GLR_TRYFAILED:
30150 + GFS_ASSERT_RGRPD(flags == LM_FLAG_TRY, rgd,);
30155 + GFS_ASSERT_RGRPD(error < 0, rgd,);
30159 + rgd = gfs_rgrpd_get_next(rgd);
30161 + rgd = gfs_rgrpd_get_first(sdp);
30163 + if (rgd == begin) {
30164 + if (++loops >= 2 || !skipped) {
30172 + ip->i_last_rg_alloc = rgd->rd_ri.ri_addr;
30174 + if (update_recent) {
30175 + recent_rgrp_add(rgd);
30176 + rgd = gfs_rgrpd_get_next(rgd);
30177 + forward_rgrp_set(sdp, rgd);
30184 + * gfs_inplace_reserve_i - Reserve space in the filesystem
30185 + * @ip: the inode to reserve space for
30187 + * Acquire resource group locks to allow for the maximum allocation
30188 + * described by "res".
30190 + * This should probably become more complex again, but for now, let's go
30191 + * for simple (one resource group) reservations.
30193 + * Returns: 0 on success, -EXXX on failure
30197 +gfs_inplace_reserve_i(struct gfs_inode *ip,
30198 + char *file, unsigned int line)
30200 + struct gfs_sbd *sdp = ip->i_sbd;
30201 + struct gfs_alloc *al = ip->i_alloc;
30204 + GFS_ASSERT_INODE(al->al_requested_di ||
30205 + al->al_requested_data ||
30206 + al->al_requested_meta, ip,);
30208 + error = gfs_rindex_hold(sdp, &al->al_ri_gh);
30212 + error = get_local_rgrp(ip);
30214 + gfs_glock_dq_uninit(&al->al_ri_gh);
30218 + gfs_depend_sync(al->al_rgd);
30220 + al->al_file = file;
30221 + al->al_line = line;
30227 + * gfs_inplace_release - release an inplace reservation
30228 + * @ip: the inode the reservation was taken out on
30230 + * Release a reservation made by gfs_inplace_reserve().
30234 +gfs_inplace_release(struct gfs_inode *ip)
30236 + struct gfs_alloc *al = ip->i_alloc;
30238 + GFS_ASSERT_INODE(al->al_alloced_di <= al->al_requested_di, ip,
30239 + printk("al_alloced_di = %u, al_requested_di = %u\n",
30240 + al->al_alloced_di, al->al_requested_di);
30241 + printk("al_file = %s, al_line = %u\n",
30242 + al->al_file, al->al_line););
30243 + GFS_ASSERT_INODE(al->al_alloced_meta <= al->al_reserved_meta, ip,
30244 + printk("al_alloced_meta = %u, al_reserved_meta = %u\n",
30245 + al->al_alloced_meta, al->al_reserved_meta);
30246 + printk("al_file = %s, al_line = %u\n",
30247 + al->al_file, al->al_line););
30248 + GFS_ASSERT_INODE(al->al_alloced_data <= al->al_reserved_data, ip,
30249 + printk("al_alloced_data = %u, al_reserved_data = %u\n",
30250 + al->al_alloced_data, al->al_reserved_data);
30251 + printk("al_file = %s, al_line = %u\n",
30252 + al->al_file, al->al_line););
30254 + al->al_rgd = NULL;
30255 + gfs_glock_dq_uninit(&al->al_rgd_gh);
30256 + gfs_glock_dq_uninit(&al->al_ri_gh);
30260 + * gfs_get_block_type - Check a block in a RG is of given type
30261 + * @rgd: the resource group holding the block
30262 + * @block: the block number
30264 + * Returns: The block type (GFS_BLKST_*)
30268 +gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block)
30270 + struct gfs_bitmap *bits = NULL;
30271 + uint32_t length, rgrp_block, buf_block;
30272 + unsigned int buf;
30273 + unsigned char type;
30275 + length = rgd->rd_ri.ri_length;
30276 + rgrp_block = block - rgd->rd_ri.ri_data1;
30278 + for (buf = 0; buf < length; buf++) {
30279 + bits = &rgd->rd_bits[buf];
30280 + if (rgrp_block < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30284 + GFS_ASSERT_RGRPD(buf < length, rgd,);
30285 + buf_block = rgrp_block - bits->bi_start * GFS_NBBY;
30287 + type = gfs_testbit(rgd,
30288 + rgd->rd_bh[buf]->b_data + bits->bi_offset,
30289 + bits->bi_len, buf_block);
30295 + * blkalloc_internal - allocate a single block
30296 + * @rgd: the resource group descriptor
30297 + * @goal: the goal block in the RG
30298 + * @old_state: the type of block to find
30299 + * @new_state: the resulting block type
30301 + * This function never fails.
30303 + * Returns: returns the block allocated
30307 +blkalloc_internal(struct gfs_rgrpd *rgd,
30309 + unsigned char old_state, unsigned char new_state)
30311 + struct gfs_bitmap *bits = NULL;
30312 + uint32_t length = rgd->rd_ri.ri_length;
30313 + uint32_t blk = 0;
30314 + unsigned int buf, x;
30316 + for (buf = 0; buf < length; buf++) {
30317 + bits = &rgd->rd_bits[buf];
30318 + if (goal < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30322 + GFS_ASSERT_RGRPD(buf < length, rgd,);
30323 + goal -= bits->bi_start * GFS_NBBY;
30325 + /* "x <= length" because we're skipping over some of the first
30326 + buffer when the goal is non-zero. */
30328 + for (x = 0; x <= length; x++) {
30329 + blk = gfs_bitfit(rgd,
30330 + rgd->rd_bh[buf]->b_data + bits->bi_offset,
30331 + bits->bi_len, goal, old_state);
30332 + if (blk != BFITNOENT)
30335 + buf = (buf + 1) % length;
30336 + bits = &rgd->rd_bits[buf];
30340 + GFS_ASSERT_RGRPD(x <= length, rgd,);
30342 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]);
30344 + rgd->rd_bh[buf]->b_data + bits->bi_offset,
30345 + bits->bi_len, blk, new_state);
30347 + return bits->bi_start * GFS_NBBY + blk;
30351 + * blkfree_internal - Free a block
30352 + * @sdp: the filesystem
30353 + * @bstart: the start of a run of blocks to free
30354 + * @blen: the length of the block run
30355 + * @new_state: the new state of the block
30359 +static struct gfs_rgrpd *
30360 +blkfree_internal(struct gfs_sbd *sdp, uint64_t bstart, uint32_t blen,
30361 + unsigned char new_state)
30363 + struct gfs_rgrpd *rgd;
30364 + struct gfs_bitmap *bits = NULL;
30365 + uint32_t length, rgrp_blk, buf_blk;
30366 + unsigned int buf;
30368 + rgd = gfs_blk2rgrpd(sdp, bstart);
30369 + GFS_ASSERT_SBD(rgd, sdp,
30370 + printk("block = %"PRIu64"\n", bstart););
30372 + length = rgd->rd_ri.ri_length;
30373 + rgrp_blk = bstart - rgd->rd_ri.ri_data1;
30376 + for (buf = 0; buf < length; buf++) {
30377 + bits = &rgd->rd_bits[buf];
30378 + if (rgrp_blk < (bits->bi_start + bits->bi_len) * GFS_NBBY)
30382 + GFS_ASSERT_RGRPD(buf < length, rgd,);
30383 + buf_blk = rgrp_blk - bits->bi_start * GFS_NBBY;
30386 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[buf]);
30388 + rgd->rd_bh[buf]->b_data + bits->bi_offset,
30389 + bits->bi_len, buf_blk, new_state);
30396 + * clump_alloc - Allocate a clump of metadata
30397 + * @rgd: the resource group descriptor
30398 + * @first: returns the first block allocated
30400 + * Returns: 0 on success, -EXXX on failure
30404 +clump_alloc(struct gfs_rgrpd *rgd, uint32_t *first)
30406 + struct gfs_sbd *sdp = rgd->rd_sbd;
30407 + struct gfs_meta_header mh;
30408 + struct buffer_head **bh;
30409 + uint32_t goal, blk;
30413 + memset(&mh, 0, sizeof(struct gfs_meta_header));
30414 + mh.mh_magic = GFS_MAGIC;
30415 + mh.mh_type = GFS_METATYPE_NONE;
30417 + bh = gmalloc(GFS_META_CLUMP * sizeof(struct buffer_head *));
30418 + memset(bh, 0, sizeof(GFS_META_CLUMP * sizeof(struct buffer_head *)));
30420 + goal = rgd->rd_last_alloc_data;
30422 + for (x = 0; x < GFS_META_CLUMP; x++) {
30423 + blk = blkalloc_internal(rgd, goal, GFS_BLKST_FREE,
30424 + GFS_BLKST_FREEMETA);
30428 + bh[x] = gfs_dgetblk(sdp, rgd->rd_ri.ri_data1 + blk, rgd->rd_gl);
30430 + gfs_prep_new_buffer(bh[x]);
30432 + gfs_meta_header_out(&mh, bh[x]->b_data);
30433 + ((struct gfs_meta_header *)bh[x]->b_data)->mh_generation = 0;
30435 + error = gfs_dwrite(sdp, bh[x], DIO_DIRTY | DIO_START);
30442 + rgd->rd_last_alloc_data = goal;
30444 + for (x = 0; x < GFS_META_CLUMP; x++) {
30445 + error = gfs_dwrite(sdp, bh[x], DIO_WAIT);
30450 + gfs_mhc_add(rgd, bh, GFS_META_CLUMP);
30452 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free >= GFS_META_CLUMP, rgd,);
30453 + rgd->rd_rg.rg_free -= GFS_META_CLUMP;
30454 + rgd->rd_rg.rg_freemeta += GFS_META_CLUMP;
30457 + for (x = 0; x < GFS_META_CLUMP; x++)
30459 + gfs_dwrite(sdp, bh[x], DIO_WAIT);
30468 + * gfs_blkalloc - Allocate a data block
30469 + * @ip: the inode to allocate the data block for
30470 + * @block: the block allocated
30475 +gfs_blkalloc(struct gfs_inode *ip, uint64_t *block)
30477 + struct gfs_sbd *sdp = ip->i_sbd;
30478 + struct gfs_alloc *al = ip->i_alloc;
30479 + struct gfs_rgrpd *rgd = al->al_rgd;
30480 + uint32_t goal, blk;
30483 + GFS_ASSERT_INODE(rgd, ip,);
30485 + same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp);
30486 + goal = (same) ? ip->i_di.di_goal_dblk : rgd->rd_last_alloc_data;
30488 + blk = blkalloc_internal(rgd, goal,
30489 + GFS_BLKST_FREE, GFS_BLKST_USED);
30490 + rgd->rd_last_alloc_data = blk;
30493 + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
30494 + ip->i_di.di_goal_mblk = 0;
30496 + ip->i_di.di_goal_dblk = blk;
30498 + *block = rgd->rd_ri.ri_data1 + blk;
30500 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_free, rgd,);
30501 + rgd->rd_rg.rg_free--;
30503 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30504 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30506 + al->al_alloced_data++;
30508 + gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid);
30512 + * gfs_metaalloc - Allocate a metadata block to a file
30514 + * @block: the block allocated
30516 + * Returns: 0 on success, -EXXX on failure
30520 +gfs_metaalloc(struct gfs_inode *ip, uint64_t *block)
30522 + struct gfs_sbd *sdp = ip->i_sbd;
30523 + struct gfs_alloc *al = ip->i_alloc;
30524 + struct gfs_rgrpd *rgd = al->al_rgd;
30525 + uint32_t goal, blk;
30529 + GFS_ASSERT_INODE(rgd, ip,);
30531 + same = (rgd->rd_ri.ri_addr == ip->i_di.di_goal_rgrp);
30533 + if (!rgd->rd_rg.rg_freemeta) {
30534 + error = clump_alloc(rgd, &goal);
30538 + al->al_alloced_data += GFS_META_CLUMP;
30540 + goal = (same) ? ip->i_di.di_goal_mblk : rgd->rd_last_alloc_meta;
30542 + blk = blkalloc_internal(rgd, goal,
30543 + GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA);
30544 + rgd->rd_last_alloc_meta = blk;
30547 + ip->i_di.di_goal_rgrp = rgd->rd_ri.ri_addr;
30548 + ip->i_di.di_goal_dblk = 0;
30550 + ip->i_di.di_goal_mblk = blk;
30552 + *block = rgd->rd_ri.ri_data1 + blk;
30554 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,);
30555 + rgd->rd_rg.rg_freemeta--;
30556 + rgd->rd_rg.rg_usedmeta++;
30558 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30559 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30561 + al->al_alloced_meta++;
30563 + gfs_trans_add_quota(sdp, +1, ip->i_di.di_uid, ip->i_di.di_gid);
30569 + * gfs_dialloc - Allocate a dinode
30570 + * @dip: the directory that the inode is going in
30571 + * @block: the block
30577 +gfs_dialloc(struct gfs_inode *dip, uint64_t *block)
30579 + struct gfs_alloc *al = dip->i_alloc;
30580 + struct gfs_rgrpd *rgd = al->al_rgd;
30581 + uint32_t goal, blk;
30584 + GFS_ASSERT_INODE(rgd, dip,);
30586 + if (rgd->rd_rg.rg_freemeta)
30587 + goal = rgd->rd_last_alloc_meta;
30589 + error = clump_alloc(rgd, &goal);
30593 + al->al_alloced_data += GFS_META_CLUMP;
30596 + blk = blkalloc_internal(rgd, goal,
30597 + GFS_BLKST_FREEMETA, GFS_BLKST_USEDMETA);
30598 + rgd->rd_last_alloc_meta = blk;
30600 + *block = rgd->rd_ri.ri_data1 + blk;
30602 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_freemeta, rgd,);
30603 + rgd->rd_rg.rg_freemeta--;
30604 + rgd->rd_rg.rg_useddi++;
30606 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30607 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30609 + al->al_alloced_di++;
30610 + al->al_alloced_meta++;
30616 + * gfs_blkfree - free a piece of data
30617 + * @ip: the inode these blocks are being free from
30618 + * @bstart: the start of a run of blocks to free
30619 + * @blen: the length of the block run
30624 +gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen)
30626 + struct gfs_sbd *sdp = ip->i_sbd;
30627 + struct gfs_rgrpd *rgd;
30629 + rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREE);
30631 + rgd->rd_rg.rg_free += blen;
30633 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30634 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30636 + gfs_trans_add_quota(sdp, -(int64_t)blen,
30638 + ip->i_di.di_gid);
30642 + * gfs_metafree - free a piece of metadata
30643 + * @ip: the inode these blocks are being free from
30644 + * @bstart: the start of a run of blocks to free
30645 + * @blen: the length of the block run
30650 +gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen)
30652 + struct gfs_sbd *sdp = ip->i_sbd;
30653 + struct gfs_rgrpd *rgd;
30655 + rgd = blkfree_internal(sdp, bstart, blen, GFS_BLKST_FREEMETA);
30657 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_usedmeta >= blen, rgd,);
30658 + rgd->rd_rg.rg_usedmeta -= blen;
30659 + rgd->rd_rg.rg_freemeta += blen;
30661 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30662 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30664 + gfs_trans_add_quota(sdp, -(int64_t)blen,
30666 + ip->i_di.di_gid);
30667 + gfs_wipe_buffers(ip, rgd, bstart, blen);
30671 + * gfs_difree_uninit - free a piece of metadata
30672 + * @rgd: the resource group that contains the dinode
30673 + * @addr: the dinode address
30678 +gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr)
30680 + struct gfs_sbd *sdp = rgd->rd_sbd;
30681 + struct gfs_rgrpd *tmp_rgd;
30683 + tmp_rgd = blkfree_internal(sdp, addr, 1,
30684 + GFS_BLKST_FREEMETA);
30685 + GFS_ASSERT_RGRPD(rgd == tmp_rgd, rgd,);
30687 + GFS_ASSERT_RGRPD(rgd->rd_rg.rg_useddi, rgd,);
30688 + rgd->rd_rg.rg_useddi--;
30689 + rgd->rd_rg.rg_freemeta++;
30691 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30692 + gfs_rgrp_out(&rgd->rd_rg, rgd->rd_bh[0]->b_data);
30696 + * gfs_difree - free a piece of metadata
30697 + * @rgd: the resource group that contains the dinode
30698 + * @ip: the inode representing the dinode to free
30703 +gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip)
30705 + gfs_difree_uninit(rgd, ip->i_num.no_addr);
30707 + gfs_trans_add_quota(ip->i_sbd, -1, ip->i_di.di_uid, ip->i_di.di_gid);
30708 + gfs_wipe_buffers(ip, rgd, ip->i_num.no_addr, 1);
30712 + * gfs_rlist_add - add a RG to a list of RGs
30713 + * @sdp: the filesystem
30714 + * @rlist: the list of resource groups
30715 + * @block: the block
30717 + * Figure out what RG a block belongs to and add that RG to the list
30722 +gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist, uint64_t block)
30724 + struct gfs_rgrpd *rgd;
30725 + struct gfs_rgrpd **tmp;
30726 + unsigned int new_space;
30729 + GFS_ASSERT_SBD(rlist->rl_rgrps <= rlist->rl_space, sdp,);
30730 + GFS_ASSERT_SBD(!rlist->rl_ghs, sdp,);
30732 + rgd = gfs_blk2rgrpd(sdp, block);
30733 + GFS_ASSERT_SBD(rgd, sdp,
30734 + printk("block = %"PRIu64"\n", block););
30736 + for (x = 0; x < rlist->rl_rgrps; x++)
30737 + if (rlist->rl_rgd[x] == rgd)
30740 + if (rlist->rl_rgrps == rlist->rl_space) {
30741 + new_space = rlist->rl_space + 10;
30743 + tmp = gmalloc(new_space * sizeof(struct gfs_rgrpd *));
30745 + if (rlist->rl_rgd) {
30746 + memcpy(tmp, rlist->rl_rgd,
30747 + rlist->rl_space * sizeof(struct gfs_rgrpd *));
30748 + kfree(rlist->rl_rgd);
30751 + rlist->rl_space = new_space;
30752 + rlist->rl_rgd = tmp;
30755 + rlist->rl_rgd[rlist->rl_rgrps++] = rgd;
30759 + * gfs_rlist_alloc - all RGs have been added to the rlist, allocated holders for them
30760 + * @rlist: the list of resource groups
30761 + * @state: the lock state to acquire the RG lock in
30762 + * @flags: the modifier flags for the holder structures
30767 +gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state, int flags)
30771 + rlist->rl_ghs = gmalloc(rlist->rl_rgrps * sizeof(struct gfs_holder));
30772 + for (x = 0; x < rlist->rl_rgrps; x++)
30773 + gfs_holder_init(rlist->rl_rgd[x]->rd_gl,
30775 + &rlist->rl_ghs[x]);
30779 + * gfs_rlist_free - free a resource group list
30780 + * @list: the list of resource groups
30785 +gfs_rlist_free(struct gfs_rgrp_list *rlist)
30789 + if (rlist->rl_rgd)
30790 + kfree(rlist->rl_rgd);
30792 + if (rlist->rl_ghs) {
30793 + for (x = 0; x < rlist->rl_rgrps; x++)
30794 + gfs_holder_uninit(&rlist->rl_ghs[x]);
30795 + kfree(rlist->rl_ghs);
30800 + * gfs_reclaim_metadata - reclaims unused metadata
30801 + * @sdp: the file system
30802 + * @stats: stats on reclaimation
30804 + * This function will look through the resource groups and
30805 + * free the unused metadata.
30807 + * Returns: 0 on success, -EXXX on error
30811 +gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats)
30813 + struct gfs_holder ji_gh, ri_gh, rgd_gh, t_gh;
30814 + struct gfs_rgrpd *rgd;
30815 + struct gfs_rgrp *rg;
30816 + struct gfs_dinode *di;
30817 + struct gfs_inum next;
30818 + struct buffer_head *bh;
30824 + /* Acquire the jindex lock here so we don't deadlock with a
30825 + process writing the the jindex inode. :-( */
30827 + error = gfs_jindex_hold(sdp, &ji_gh);
30831 + error = gfs_rindex_hold(sdp, &ri_gh);
30833 + goto fail_jindex_relse;
30835 + for (rgd = gfs_rgrpd_get_first(sdp);
30837 + rgd = gfs_rgrpd_get_next(rgd)) {
30838 + error = gfs_glock_nq_init(rgd->rd_gl,
30839 + LM_ST_EXCLUSIVE, GL_NOCACHE,
30842 + goto fail_rindex_relse;
30844 + rgrp_verify(rgd);
30846 + rg = &rgd->rd_rg;
30848 + if (!rg->rg_freedi && !rg->rg_freemeta) {
30849 + gfs_glock_dq_uninit(&rgd_gh);
30853 + gfs_mhc_zap(rgd);
30854 + gfs_depend_sync(rgd);
30856 + error = gfs_lock_fs_check_clean(sdp, LM_ST_EXCLUSIVE, &t_gh);
30858 + goto fail_gunlock_rg;
30860 + error = gfs_trans_begin(sdp, rgd->rd_ri.ri_length, 0);
30862 + goto fail_unlock_fs;
30864 + next = rg->rg_freedi_list;
30866 + for (x = rg->rg_freedi; x--;) {
30867 + GFS_ASSERT_RGRPD(next.no_formal_ino &&
30868 + next.no_addr, rgd,);
30870 + blkfree_internal(sdp, next.no_addr, 1, GFS_BLKST_FREE);
30872 + error = gfs_dread(sdp, next.no_addr, rgd->rd_gl,
30873 + DIO_FORCE | DIO_START | DIO_WAIT, &bh);
30875 + goto fail_end_trans;
30877 + di = (struct gfs_dinode *)bh->b_data;
30878 + flags = di->di_flags;
30879 + flags = gfs32_to_cpu(flags);
30880 + GFS_ASSERT_RGRPD(flags & GFS_DIF_UNUSED, rgd,);
30882 + gfs_inum_in(&next, (char *)&di->di_next_unused);
30888 + stats->rc_inodes++;
30891 + GFS_ASSERT_RGRPD(!next.no_formal_ino && !next.no_addr, rgd,);
30892 + rg->rg_freedi_list = next;
30895 + for (x = rg->rg_freemeta; x--;) {
30896 + goal = blkalloc_internal(rgd, goal,
30897 + GFS_BLKST_FREEMETA, GFS_BLKST_FREE);
30898 + rg->rg_freemeta--;
30900 + stats->rc_metadata++;
30903 + gfs_trans_add_bh(rgd->rd_gl, rgd->rd_bh[0]);
30904 + gfs_rgrp_out(rg, rgd->rd_bh[0]->b_data);
30906 + gfs_trans_end(sdp);
30908 + gfs_glock_dq_uninit(&t_gh);
30910 + gfs_glock_dq_uninit(&rgd_gh);
30913 + gfs_glock_dq_uninit(&ri_gh);
30915 + gfs_glock_dq_uninit(&ji_gh);
30920 + gfs_trans_end(sdp);
30923 + gfs_glock_dq_uninit(&t_gh);
30926 + gfs_glock_dq_uninit(&rgd_gh);
30928 + fail_rindex_relse:
30929 + gfs_glock_dq_uninit(&ri_gh);
30931 + fail_jindex_relse:
30932 + gfs_glock_dq_uninit(&ji_gh);
30937 diff -urN linux-orig/fs/gfs/rgrp.h linux-patched/fs/gfs/rgrp.h
30938 --- linux-orig/fs/gfs/rgrp.h 1969-12-31 18:00:00.000000000 -0600
30939 +++ linux-patched/fs/gfs/rgrp.h 2004-06-30 13:27:49.358707651 -0500
30941 +/******************************************************************************
30942 +*******************************************************************************
30944 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
30945 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
30947 +** This copyrighted material is made available to anyone wishing to use,
30948 +** modify, copy, or redistribute it subject to the terms and conditions
30949 +** of the GNU General Public License v.2.
30951 +*******************************************************************************
30952 +******************************************************************************/
30954 +#ifndef __RGRP_DOT_H__
30955 +#define __RGRP_DOT_H__
30957 +void gfs_mhc_add(struct gfs_rgrpd *rgd, struct buffer_head **bh,
30958 + unsigned int num);
30959 +int gfs_mhc_fish(struct gfs_sbd *sdp, struct buffer_head *bh);
30960 +void gfs_mhc_zap(struct gfs_rgrpd *rgd);
30962 +void gfs_depend_add(struct gfs_rgrpd *rgd, uint64_t formal_ino);
30963 +void gfs_depend_sync(struct gfs_rgrpd *rgd);
30965 +struct gfs_rgrpd *gfs_blk2rgrpd(struct gfs_sbd *sdp, uint64_t blk);
30966 +struct gfs_rgrpd *gfs_rgrpd_get_first(struct gfs_sbd *sdp);
30967 +struct gfs_rgrpd *gfs_rgrpd_get_next(struct gfs_rgrpd *rgd);
30969 +void gfs_clear_rgrpd(struct gfs_sbd *sdp);
30971 +int gfs_rindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ri_gh);
30973 +int gfs_rgrp_read(struct gfs_rgrpd *rgd);
30974 +void gfs_rgrp_relse(struct gfs_rgrpd *rgd);
30976 +void gfs_rgrp_lvb_fill(struct gfs_rgrpd *rgd);
30977 +int gfs_rgrp_lvb_init(struct gfs_rgrpd *rgd);
30979 +struct gfs_alloc *gfs_alloc_get(struct gfs_inode *ip);
30980 +void gfs_alloc_put(struct gfs_inode *ip);
30982 +int gfs_inplace_reserve_i(struct gfs_inode *ip,
30983 + char *file, unsigned int line);
30984 +#define gfs_inplace_reserve(ip) \
30985 +gfs_inplace_reserve_i((ip), __FILE__, __LINE__)
30987 +void gfs_inplace_release(struct gfs_inode *ip);
30989 +unsigned char gfs_get_block_type(struct gfs_rgrpd *rgd, uint64_t block);
30991 +void gfs_blkalloc(struct gfs_inode *ip, uint64_t *block);
30992 +int gfs_metaalloc(struct gfs_inode *ip, uint64_t *block);
30993 +int gfs_dialloc(struct gfs_inode *dip, uint64_t *block);
30995 +void gfs_blkfree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen);
30996 +void gfs_metafree(struct gfs_inode *ip, uint64_t bstart, uint32_t blen);
30997 +void gfs_difree_uninit(struct gfs_rgrpd *rgd, uint64_t addr);
30998 +void gfs_difree(struct gfs_rgrpd *rgd, struct gfs_inode *ip);
31000 +struct gfs_rgrp_list {
31001 + unsigned int rl_rgrps;
31002 + unsigned int rl_space;
31003 + struct gfs_rgrpd **rl_rgd;
31004 + struct gfs_holder *rl_ghs;
31007 +void gfs_rlist_add(struct gfs_sbd *sdp, struct gfs_rgrp_list *rlist,
31009 +void gfs_rlist_alloc(struct gfs_rgrp_list *rlist, unsigned int state,
31011 +void gfs_rlist_free(struct gfs_rgrp_list *rlist);
31013 +int gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats);
31015 +#endif /* __RGRP_DOT_H__ */
31016 diff -urN linux-orig/fs/gfs/super.c linux-patched/fs/gfs/super.c
31017 --- linux-orig/fs/gfs/super.c 1969-12-31 18:00:00.000000000 -0600
31018 +++ linux-patched/fs/gfs/super.c 2004-06-30 13:27:49.359707419 -0500
31020 +/******************************************************************************
31021 +*******************************************************************************
31023 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
31024 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
31026 +** This copyrighted material is made available to anyone wishing to use,
31027 +** modify, copy, or redistribute it subject to the terms and conditions
31028 +** of the GNU General Public License v.2.
31030 +*******************************************************************************
31031 +******************************************************************************/
31033 +#include <linux/sched.h>
31034 +#include <linux/slab.h>
31035 +#include <linux/smp_lock.h>
31036 +#include <linux/spinlock.h>
31037 +#include <asm/semaphore.h>
31038 +#include <linux/completion.h>
31039 +#include <linux/buffer_head.h>
31044 +#include "format.h"
31045 +#include "glock.h"
31046 +#include "glops.h"
31047 +#include "inode.h"
31049 +#include "quota.h"
31050 +#include "recovery.h"
31052 +#include "super.h"
31053 +#include "unlinked.h"
31056 + * gfs_init_tune_data - Fill in the struct gfs_tune (sd_tune) in the struct gfs_sbd.
31057 + * @sdp: the filesystem
31062 +gfs_init_tune_data(struct gfs_sbd *sdp)
31064 + struct gfs_tune *gt = &sdp->sd_tune;
31066 + gt->gt_tune_version = GFS_TUNE_VERSION;
31068 + gt->gt_ilimit1 = 100;
31069 + gt->gt_ilimit1_tries = 3;
31070 + gt->gt_ilimit1_min = 1;
31071 + gt->gt_ilimit2 = 500;
31072 + gt->gt_ilimit2_tries = 10;
31073 + gt->gt_ilimit2_min = 3;
31074 + gt->gt_demote_secs = 300;
31075 + gt->gt_incore_log_blocks = 1024;
31076 + gt->gt_jindex_refresh_secs = 60;
31077 + gt->gt_depend_secs = 60;
31078 + gt->gt_scand_secs = 5;
31079 + gt->gt_recoverd_secs = 60;
31080 + gt->gt_logd_secs = 1;
31081 + gt->gt_quotad_secs = 5;
31082 + gt->gt_inoded_secs = 15;
31083 + gt->gt_quota_simul_sync = 64;
31084 + gt->gt_quota_warn_period = 10;
31085 + gt->gt_atime_quantum = 3600;
31086 + gt->gt_quota_quantum = 60;
31087 + gt->gt_quota_scale_num = 1;
31088 + gt->gt_quota_scale_den = 1;
31089 + gt->gt_quota_enforce = 1;
31090 + gt->gt_quota_account = 1;
31091 + gt->gt_new_files_jdata = 0;
31092 + gt->gt_new_files_directio = 0;
31093 + gt->gt_max_atomic_write = 4 << 20;
31094 + gt->gt_max_readahead = 1 << 18;
31095 + gt->gt_lockdump_size = 131072;
31096 + gt->gt_stall_secs = 600;
31097 + gt->gt_complain_secs = 10;
31098 + gt->gt_reclaim_limit = 5000;
31099 + gt->gt_entries_per_readdir = 32;
31100 + gt->gt_prefetch_secs = 10;
31101 + gt->gt_statfs_slots = 64;
31102 + gt->gt_max_mhc = 10000;
31106 + * gfs_check_sb - Check superblock
31107 + * @sdp: the filesystem
31108 + * @sb: The superblock
31109 + * @silent: Don't print a message if the check fails
31111 + * Checks the version code of the FS is one that we understand how to
31112 + * read and that the sizes of the various on-disk structures have not
31117 +gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent)
31121 + if (sb->sb_header.mh_magic != GFS_MAGIC ||
31122 + sb->sb_header.mh_type != GFS_METATYPE_SB) {
31124 + printk("GFS: not a GFS filesystem\n");
31128 + /* If format numbers match exactly, we're done. */
31130 + if (sb->sb_fs_format == GFS_FORMAT_FS &&
31131 + sb->sb_multihost_format == GFS_FORMAT_MULTI)
31134 + if (sb->sb_fs_format != GFS_FORMAT_FS) {
31135 + for (x = 0; gfs_old_fs_formats[x]; x++)
31136 + if (gfs_old_fs_formats[x] == sb->sb_fs_format)
31139 + if (!gfs_old_fs_formats[x]) {
31140 + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31141 + GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31142 + sb->sb_fs_format, sb->sb_multihost_format);
31143 + printk("GFS: I don't know how to upgrade this FS\n");
31148 + if (sb->sb_multihost_format != GFS_FORMAT_MULTI) {
31149 + for (x = 0; gfs_old_multihost_formats[x]; x++)
31150 + if (gfs_old_multihost_formats[x] == sb->sb_multihost_format)
31153 + if (!gfs_old_multihost_formats[x]) {
31154 + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31155 + GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31156 + sb->sb_fs_format, sb->sb_multihost_format);
31157 + printk("GFS: I don't know how to upgrade this FS\n");
31162 + if (!sdp->sd_args.ar_upgrade) {
31163 + printk("GFS: code version (%u, %u) is incompatible with ondisk format (%u, %u)\n",
31164 + GFS_FORMAT_FS, GFS_FORMAT_MULTI,
31165 + sb->sb_fs_format, sb->sb_multihost_format);
31166 + printk("GFS: Use the \"upgrade\" mount option to upgrade the FS\n");
31167 + printk("GFS: See the manual for more details\n");
31175 + * gfs_read_sb - Read super block
31176 + * @sdp: The GFS superblock
31177 + * @gl: the glock for the superblock (assumed to be held)
31178 + * @silent: Don't print message if mount fails
31183 +gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent)
31185 + struct buffer_head *bh;
31186 + uint32_t hash_blocks, ind_blocks, leaf_blocks;
31187 + uint32_t tmp_blocks;
31188 + uint64_t space = 0;
31192 + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift,
31193 + gl, DIO_FORCE | DIO_START | DIO_WAIT, &bh);
31196 + printk("GFS: fsid=%s: can't read superblock\n",
31201 + GFS_ASSERT_SBD(sizeof(struct gfs_sb) <= bh->b_size, sdp,);
31203 + gfs_sb_in(&sdp->sd_sb, bh->b_data);
31207 + error = gfs_check_sb(sdp, &sdp->sd_sb, silent);
31211 + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
31212 + GFS_BASIC_BLOCK_SHIFT;
31213 + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
31214 + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode)) /
31215 + sizeof(uint64_t);
31216 + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - sizeof(struct gfs_indirect)) /
31217 + sizeof(uint64_t);
31218 + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
31219 + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
31220 + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
31221 + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t);
31223 + /* Compute maximum reservation required to add a entry to a directory */
31225 + hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS_DIR_MAX_DEPTH),
31229 + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
31230 + tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs);
31231 + ind_blocks += tmp_blocks;
31234 + leaf_blocks = 2 + GFS_DIR_MAX_DEPTH;
31236 + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
31238 + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
31239 + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
31240 + for (x = 2;; x++) {
31243 + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
31245 + m = do_div(d, sdp->sd_inptrs);
31247 + if (d != sdp->sd_heightsize[x - 1] || m)
31249 + sdp->sd_heightsize[x] = space;
31251 + sdp->sd_max_height = x;
31252 + GFS_ASSERT_SBD(sdp->sd_max_height <= GFS_MAX_META_HEIGHT, sdp,);
31254 + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
31255 + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
31256 + for (x = 2;; x++) {
31259 + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
31261 + m = do_div(d, sdp->sd_inptrs);
31263 + if (d != sdp->sd_jheightsize[x - 1] || m)
31265 + sdp->sd_jheightsize[x] = space;
31267 + sdp->sd_max_jheight = x;
31268 + GFS_ASSERT_SBD(sdp->sd_max_jheight <= GFS_MAX_META_HEIGHT, sdp,);
31274 + * gfs_do_upgrade - upgrade a filesystem
31275 + * @sdp: The GFS superblock
31280 +gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *sb_gl)
31282 + struct gfs_holder ji_gh, t_gh, j_gh;
31283 + struct gfs_log_header lh;
31284 + struct buffer_head *bh;
31288 + /* If format numbers match exactly, we're done. */
31290 + if (sdp->sd_sb.sb_fs_format == GFS_FORMAT_FS &&
31291 + sdp->sd_sb.sb_multihost_format == GFS_FORMAT_MULTI) {
31292 + printk("GFS: fsid=%s: no upgrade necessary\n",
31294 + sdp->sd_args.ar_upgrade = FALSE;
31298 + error = gfs_jindex_hold(sdp, &ji_gh);
31302 + error = gfs_glock_nq_init(sdp->sd_trans_gl,
31303 + LM_ST_EXCLUSIVE, GL_NOCACHE,
31306 + goto fail_ji_relse;
31308 + if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
31309 + printk("GFS: fsid=%s: can't upgrade: read-only FS\n",
31312 + goto fail_gunlock_tr;
31315 + for (x = 0; x < sdp->sd_journals; x++) {
31316 + error = gfs_glock_nq_num(sdp,
31317 + sdp->sd_jindex[x].ji_addr,
31318 + &gfs_meta_glops, LM_ST_SHARED,
31319 + LM_FLAG_TRY | GL_NOCACHE, &j_gh);
31324 + case GLR_TRYFAILED:
31325 + printk("GFS: fsid=%s: journal %u is busy\n",
31326 + sdp->sd_fsname, x);
31330 + goto fail_gunlock_tr;
31333 + error = gfs_find_jhead(sdp, &sdp->sd_jindex[x],
31334 + j_gh.gh_gl, &lh);
31336 + gfs_glock_dq_uninit(&j_gh);
31339 + goto fail_gunlock_tr;
31341 + if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT) || lh.lh_last_dump) {
31342 + printk("GFS: fsid=%s: journal %u is busy\n",
31343 + sdp->sd_fsname, x);
31345 + goto fail_gunlock_tr;
31349 + /* We don't need to journal this change because we're changing
31350 + only one sector of one block. We definitely don't want to have
31351 + the journaling code running at this point. */
31353 + error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, sb_gl,
31354 + DIO_START | DIO_WAIT, &bh);
31356 + goto fail_gunlock_tr;
31358 + gfs_sb_in(&sdp->sd_sb, bh->b_data);
31360 + error = gfs_check_sb(sdp, &sdp->sd_sb, FALSE);
31361 + GFS_ASSERT_SBD(!error, sdp,);
31363 + sdp->sd_sb.sb_fs_format = GFS_FORMAT_FS;
31364 + sdp->sd_sb.sb_multihost_format = GFS_FORMAT_MULTI;
31366 + gfs_sb_out(&sdp->sd_sb, bh->b_data);
31368 + set_bit(GLF_DIRTY, &sb_gl->gl_flags);
31369 + error = gfs_dwrite(sdp, bh, DIO_DIRTY | DIO_START | DIO_WAIT);
31373 + gfs_glock_dq_uninit(&t_gh);
31375 + gfs_glock_dq_uninit(&ji_gh);
31378 + printk("GFS: fsid=%s: upgrade successful\n",
31380 + sdp->sd_args.ar_upgrade = FALSE;
31386 + gfs_glock_dq_uninit(&t_gh);
31389 + gfs_glock_dq_uninit(&ji_gh);
31392 + if (error == -EBUSY)
31393 + printk("GFS: fsid=%s: can't upgrade: the FS is still busy or contains dirty journals\n",
31396 + printk("GFS: fsid=%s: can't upgrade: %d\n",
31397 + sdp->sd_fsname, error);
31403 + * clear_journalsi - Clear all the journal index information (without locking)
31404 + * @sdp: The GFS superblock
31409 +clear_journalsi(struct gfs_sbd *sdp)
31411 + if (sdp->sd_jindex) {
31412 + kfree(sdp->sd_jindex);
31413 + sdp->sd_jindex = NULL;
31415 + sdp->sd_journals = 0;
31419 + * gfs_clear_journals - Clear all the journal index information
31420 + * @sdp: The GFS superblock
31425 +gfs_clear_journals(struct gfs_sbd *sdp)
31427 + down(&sdp->sd_jindex_lock);
31428 + clear_journalsi(sdp);
31429 + up(&sdp->sd_jindex_lock);
31433 + * gfs_ji_update - Update the journal index information
31434 + * @ip: The journal index inode
31436 + * Returns: 0 on success, error code otherwise
31440 +gfs_ji_update(struct gfs_inode *ip)
31442 + struct gfs_sbd *sdp = ip->i_sbd;
31443 + char buf[sizeof(struct gfs_jindex)];
31447 + GFS_ASSERT_SBD(!do_mod(ip->i_di.di_size, sizeof(struct gfs_jindex)),
31450 + clear_journalsi(sdp);
31452 + sdp->sd_jindex = gmalloc(ip->i_di.di_size);
31453 + memset(sdp->sd_jindex, 0, ip->i_di.di_size);
31455 + for (j = 0;; j++) {
31456 + error = gfs_internal_read(ip, buf,
31457 + j * sizeof(struct gfs_jindex),
31458 + sizeof(struct gfs_jindex));
31461 + if (error != sizeof(struct gfs_jindex)) {
31467 + gfs_jindex_in(sdp->sd_jindex + j, buf);
31470 + GFS_ASSERT_SBD(j * sizeof(struct gfs_jindex) == ip->i_di.di_size,
31473 + sdp->sd_journals = j;
31474 + sdp->sd_jiinode_vn = ip->i_gl->gl_vn;
31479 + clear_journalsi(sdp);
31484 + * gfs_jindex_hold - Grab a lock on the jindex
31485 + * @sdp: The GFS superblock
31486 + * @ji_gh: the holder for the jindex glock
31488 + * This is very similar to the gfs_rindex_hold() function, except that
31489 + * in general we hold the jindex lock for longer periods of time and
31490 + * we grab it far less frequently (in general) then the rgrp lock.
31492 + * Returns: 0 on success, error code otherwise
31496 +gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh)
31498 + struct gfs_inode *ip = sdp->sd_jiinode;
31499 + struct gfs_glock *gl = ip->i_gl;
31502 + error = gfs_glock_nq_init(gl, LM_ST_SHARED, 0, ji_gh);
31506 + if (sdp->sd_jiinode_vn != gl->gl_vn) {
31507 + down(&sdp->sd_jindex_lock);
31508 + if (sdp->sd_jiinode_vn != gl->gl_vn)
31509 + error = gfs_ji_update(ip);
31510 + up(&sdp->sd_jindex_lock);
31514 + gfs_glock_dq_uninit(ji_gh);
31520 + * gfs_get_jiinode - Read in the jindex inode for the superblock
31521 + * @sdp: The GFS superblock
31523 + * Returns: 0 on success, error code otherwise
31527 +gfs_get_jiinode(struct gfs_sbd *sdp)
31529 + struct gfs_holder ji_gh;
31532 + error = gfs_glock_nq_num(sdp,
31533 + sdp->sd_sb.sb_jindex_di.no_formal_ino,
31534 + &gfs_inode_glops,
31535 + LM_ST_SHARED, GL_LOCAL_EXCL,
31540 + error = gfs_inode_get(ji_gh.gh_gl, &sdp->sd_sb.sb_jindex_di,
31541 + CREATE, &sdp->sd_jiinode);
31543 + sdp->sd_jiinode_vn = ji_gh.gh_gl->gl_vn - 1;
31544 + set_bit(GLF_STICKY, &ji_gh.gh_gl->gl_flags);
31547 + gfs_glock_dq_uninit(&ji_gh);
31553 + * gfs_get_riinode - Read in the rindex inode for the superblock
31554 + * @sdp: The GFS superblock
31556 + * Returns: 0 on success, error code otherwise
31560 +gfs_get_riinode(struct gfs_sbd *sdp)
31562 + struct gfs_holder ri_gh;
31565 + error = gfs_glock_nq_num(sdp,
31566 + sdp->sd_sb.sb_rindex_di.no_formal_ino,
31567 + &gfs_inode_glops,
31568 + LM_ST_SHARED, GL_LOCAL_EXCL,
31573 + error = gfs_inode_get(ri_gh.gh_gl, &sdp->sd_sb.sb_rindex_di,
31574 + CREATE, &sdp->sd_riinode);
31576 + sdp->sd_riinode_vn = ri_gh.gh_gl->gl_vn - 1;
31577 + set_bit(GLF_STICKY, &ri_gh.gh_gl->gl_flags);
31580 + gfs_glock_dq_uninit(&ri_gh);
31586 + * gfs_get_rootinode - Read in the root inode
31587 + * @sdp: The GFS superblock
31589 + * Returns: 0 on success, error code otherwise
31593 +gfs_get_rootinode(struct gfs_sbd *sdp)
31595 + struct gfs_holder i_gh;
31598 + error = gfs_glock_nq_num(sdp,
31599 + sdp->sd_sb.sb_root_di.no_formal_ino,
31600 + &gfs_inode_glops,
31601 + LM_ST_SHARED, GL_LOCAL_EXCL,
31606 + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_root_di,
31607 + CREATE, &sdp->sd_rooti);
31609 + gfs_glock_dq_uninit(&i_gh);
31615 + * gfs_get_qinode - Read in the quota inode
31616 + * @sdp: The GFS superblock
31618 + * Returns: 0 on success, error code otherwise
31622 +gfs_get_qinode(struct gfs_sbd *sdp)
31624 + struct gfs_holder i_gh;
31627 + if (!sdp->sd_sb.sb_quota_di.no_formal_ino) {
31628 + error = gfs_alloc_qinode(sdp);
31633 + error = gfs_glock_nq_num(sdp,
31634 + sdp->sd_sb.sb_quota_di.no_formal_ino,
31635 + &gfs_inode_glops,
31636 + LM_ST_SHARED, GL_LOCAL_EXCL,
31641 + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_quota_di,
31642 + CREATE, &sdp->sd_qinode);
31644 + gfs_glock_dq_uninit(&i_gh);
31650 + * gfs_get_linode - Read in the quota inode
31651 + * @sdp: The GFS superblock
31653 + * Returns: 0 on success, error code otherwise
31657 +gfs_get_linode(struct gfs_sbd *sdp)
31659 + struct gfs_holder i_gh;
31662 + if (!sdp->sd_sb.sb_license_di.no_formal_ino) {
31663 + error = gfs_alloc_linode(sdp);
31668 + error = gfs_glock_nq_num(sdp,
31669 + sdp->sd_sb.sb_license_di.no_formal_ino,
31670 + &gfs_inode_glops,
31671 + LM_ST_SHARED, GL_LOCAL_EXCL,
31676 + error = gfs_inode_get(i_gh.gh_gl, &sdp->sd_sb.sb_license_di,
31677 + CREATE, &sdp->sd_linode);
31679 + gfs_glock_dq_uninit(&i_gh);
31685 + * gfs_make_fs_rw - Turn a RO FS into a RW one
31686 + * @sdp: the filesystem
31688 + * Returns: 0 on success, -EXXX on failure
31692 +gfs_make_fs_rw(struct gfs_sbd *sdp)
31694 + struct gfs_glock *j_gl = sdp->sd_journal_gh.gh_gl;
31695 + struct gfs_holder t_gh;
31696 + struct gfs_log_header head;
31699 + error = gfs_glock_nq_init(sdp->sd_trans_gl,
31701 + GL_LOCAL_EXCL | GL_EXACT,
31706 + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA);
31708 + error = gfs_find_jhead(sdp, &sdp->sd_jdesc, j_gl, &head);
31712 + GFS_ASSERT_SBD(head.lh_flags & GFS_LOG_HEAD_UNMOUNT, sdp,);
31714 + /* Initialize some head of the log stuff */
31715 + sdp->sd_sequence = head.lh_sequence;
31716 + sdp->sd_log_head = head.lh_first + 1;
31718 + error = gfs_recover_dump(sdp);
31722 + set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
31723 + clear_bit(SDF_ROFS, &sdp->sd_flags);
31725 + set_bit(GLF_DIRTY, &j_gl->gl_flags);
31726 + gfs_log_dump(sdp, TRUE);
31728 + gfs_glock_dq_uninit(&t_gh);
31733 + t_gh.gh_flags |= GL_NOCACHE;
31734 + gfs_glock_dq_uninit(&t_gh);
31740 + * gfs_make_fs_ro - Turn a RW FS into a RO one
31741 + * @sdp: the filesystem
31743 + * Returns: 0 on success, -EXXX on failure
31747 +gfs_make_fs_ro(struct gfs_sbd *sdp)
31749 + struct gfs_holder t_gh;
31752 + error = gfs_glock_nq_init(sdp->sd_trans_gl,
31754 + GL_LOCAL_EXCL | GL_EXACT | GL_NOCACHE,
31759 + gfs_sync_meta(sdp);
31760 + gfs_log_dump(sdp, TRUE);
31762 + error = gfs_log_shutdown(sdp);
31764 + gfs_io_error(sdp);
31766 + set_bit(SDF_ROFS, &sdp->sd_flags);
31767 + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
31769 + gfs_glock_dq_uninit(&t_gh);
31771 + gfs_unlinked_cleanup(sdp);
31772 + gfs_quota_cleanup(sdp);
31778 + * stat_gfs_async - Stat a filesystem using asynchronous locking
31779 + * @sdp: the filesystem
31780 + * @usage: the usage info that will be returned
31781 + * @interruptible: TRUE if we should look for signals.
31783 + * Any error (other than a signal) will cause this routine to fall back
31784 + * to the synchronous version.
31786 + * This really shouldn't busy wait like this.
31788 + * Returns: 0 on success, -EXXX on failure
31792 +stat_gfs_async(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible)
31794 + struct gfs_rgrpd *rgd_next = gfs_rgrpd_get_first(sdp), *rgd;
31795 + struct gfs_holder *gha, *gh;
31796 + struct gfs_rgrp_lvb *rb;
31797 + unsigned int slots = sdp->sd_tune.gt_statfs_slots;
31800 + int error = 0, err;
31802 + gha = gmalloc(slots * sizeof(struct gfs_holder));
31803 + memset(gha, 0, slots * sizeof(struct gfs_holder));
31808 + for (x = 0; x < slots; x++) {
31811 + if (gh->gh_gl && gfs_glock_poll(gh)) {
31812 + err = gfs_glock_wait(gh);
31814 + gfs_holder_uninit(gh);
31817 + rgd = gl2rgd(gh->gh_gl);
31819 + rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
31820 + if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC &&
31821 + !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) {
31822 + usage->gu_total_blocks += rgd->rd_ri.ri_data;
31823 + usage->gu_free += gfs32_to_cpu(rb->rb_free);
31824 + usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi);
31825 + usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi);
31826 + usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta);
31827 + usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta);
31831 + gfs_glock_dq_uninit(gh);
31837 + else if (rgd_next && !error) {
31838 + gfs_glock_nq_init(rgd_next->rd_gl,
31840 + GL_LOCAL_EXCL | GL_SKIP | GL_ASYNC,
31842 + rgd_next = gfs_rgrpd_get_next(rgd_next);
31846 + if (interruptible && signal_pending(current))
31847 + error = -ERESTARTSYS;
31862 + * gfs_stat_gfs - Do a statfs
31863 + * @sdp: the filesystem
31864 + * @usage: the usage structure
31865 + * @interruptible: Stop if there is a signal pending
31867 + * Returns: 0 on success, -EXXX on failure
31871 +gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage, int interruptible)
31873 + struct gfs_holder ri_gh, rgd_gh;
31874 + struct gfs_rgrpd *rgd;
31875 + struct gfs_rgrp_lvb *rb;
31878 + memset(usage, 0, sizeof(struct gfs_usage));
31879 + usage->gu_block_size = sdp->sd_sb.sb_bsize;
31881 + error = gfs_rindex_hold(sdp, &ri_gh);
31885 + if (GFS_ASYNC_LM(sdp)) {
31886 + error = stat_gfs_async(sdp, usage, interruptible);
31887 + if (!error || error == -ERESTARTSYS)
31890 + memset(usage, 0, sizeof(struct gfs_usage));
31891 + usage->gu_block_size = sdp->sd_sb.sb_bsize;
31894 + for (rgd = gfs_rgrpd_get_first(sdp);
31896 + rgd = gfs_rgrpd_get_next(rgd)) {
31898 + error = gfs_glock_nq_init(rgd->rd_gl,
31900 + GL_LOCAL_EXCL | GL_SKIP,
31905 + rb = (struct gfs_rgrp_lvb *)rgd->rd_gl->gl_lvb;
31906 + if (gfs32_to_cpu(rb->rb_magic) == GFS_MAGIC &&
31907 + !test_bit(GLF_LVB_INVALID, &rgd->rd_gl->gl_flags)) {
31908 + usage->gu_total_blocks += rgd->rd_ri.ri_data;
31909 + usage->gu_free += gfs32_to_cpu(rb->rb_free);
31910 + usage->gu_used_dinode += gfs32_to_cpu(rb->rb_useddi);
31911 + usage->gu_free_dinode += gfs32_to_cpu(rb->rb_freedi);
31912 + usage->gu_used_meta += gfs32_to_cpu(rb->rb_usedmeta);
31913 + usage->gu_free_meta += gfs32_to_cpu(rb->rb_freemeta);
31915 + gfs_glock_dq_uninit(&rgd_gh);
31919 + gfs_glock_dq_uninit(&rgd_gh);
31921 + error = gfs_rgrp_lvb_init(rgd);
31927 + if (interruptible && signal_pending(current)) {
31928 + error = -ERESTARTSYS;
31934 + gfs_glock_dq_uninit(&ri_gh);
31940 + * gfs_lock_fs_check_clean - Stop all writes to the FS and check that all journals are clean
31941 + * @sdp: the file system
31942 + * @state: the state to put the transaction lock into
31943 + * @t_gh: the hold on the transaction lock
31945 + * Returns: 0 on success, -EXXX on error
31949 +gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state,
31950 + struct gfs_holder *t_gh)
31952 + struct gfs_holder ji_gh, cl_gh;
31953 + struct gfs_log_header lh;
31957 + error = gfs_jindex_hold(sdp, &ji_gh);
31961 + error = gfs_glock_nq_num(sdp,
31962 + GFS_CRAP_LOCK, &gfs_meta_glops,
31963 + LM_ST_SHARED, GL_NOCACHE,
31968 + error = gfs_glock_nq_init(sdp->sd_trans_gl, state,
31969 + LM_FLAG_PRIORITY | GL_EXACT | GL_NOCACHE,
31972 + goto fail_gunlock_craplock;
31974 + for (x = 0; x < sdp->sd_journals; x++) {
31975 + error = gfs_find_jhead(sdp, &sdp->sd_jindex[x],
31976 + cl_gh.gh_gl, &lh);
31978 + goto fail_gunlock_trans;
31980 + if (!(lh.lh_flags & GFS_LOG_HEAD_UNMOUNT)) {
31982 + goto fail_gunlock_trans;
31986 + gfs_glock_dq_uninit(&cl_gh);
31987 + gfs_glock_dq_uninit(&ji_gh);
31991 + fail_gunlock_trans:
31992 + gfs_glock_dq_uninit(t_gh);
31994 + fail_gunlock_craplock:
31995 + gfs_glock_dq_uninit(&cl_gh);
31998 + gfs_glock_dq_uninit(&ji_gh);
32004 + * gfs_freeze_fs - freezes the file system
32005 + * @sdp: the file system
32007 + * This function flushes data and meta data for all machines by
32008 + * aquiring the transaction log exclusively. All journals are
32009 + * ensured to be in a clean state as well.
32011 + * Returns: 0 on success, -EXXX on error
32015 +gfs_freeze_fs(struct gfs_sbd *sdp)
32019 + down(&sdp->sd_freeze_lock);
32021 + if (!sdp->sd_freeze_count++) {
32022 + error = gfs_lock_fs_check_clean(sdp, LM_ST_DEFERRED,
32023 + &sdp->sd_freeze_gh);
32025 + sdp->sd_freeze_count--;
32027 + sdp->sd_freeze_gh.gh_owner = NULL;
32030 + up(&sdp->sd_freeze_lock);
32036 + * gfs_unfreeze_fs - unfreezes the file system
32037 + * @sdp: the file system
32039 + * This function allows the file system to proceed by unlocking
32040 + * the exclusively held transaction lock. Other GFS nodes are
32041 + * now free to acquire the lock shared and go on with their lives.
32046 +gfs_unfreeze_fs(struct gfs_sbd *sdp)
32048 + down(&sdp->sd_freeze_lock);
32050 + if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
32051 + gfs_glock_dq_uninit(&sdp->sd_freeze_gh);
32053 + up(&sdp->sd_freeze_lock);
32055 diff -urN linux-orig/fs/gfs/super.h linux-patched/fs/gfs/super.h
32056 --- linux-orig/fs/gfs/super.h 1969-12-31 18:00:00.000000000 -0600
32057 +++ linux-patched/fs/gfs/super.h 2004-06-30 13:27:49.359707419 -0500
32059 +/******************************************************************************
32060 +*******************************************************************************
32062 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
32063 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
32065 +** This copyrighted material is made available to anyone wishing to use,
32066 +** modify, copy, or redistribute it subject to the terms and conditions
32067 +** of the GNU General Public License v.2.
32069 +*******************************************************************************
32070 +******************************************************************************/
32072 +#ifndef __SUPER_DOT_H__
32073 +#define __SUPER_DOT_H__
32075 +void gfs_init_tune_data(struct gfs_sbd *sdp);
32077 +int gfs_check_sb(struct gfs_sbd *sdp, struct gfs_sb *sb, int silent);
32078 +int gfs_read_sb(struct gfs_sbd *sdp, struct gfs_glock *gl, int silent);
32079 +int gfs_do_upgrade(struct gfs_sbd *sdp, struct gfs_glock *gl_sb);
32081 +static __inline__ unsigned int
32082 +gfs_num_journals(struct gfs_sbd *sdp)
32084 + unsigned int num;
32085 + down(&sdp->sd_jindex_lock);
32086 + num = sdp->sd_journals;
32087 + up(&sdp->sd_jindex_lock);
32091 +int gfs_jindex_hold(struct gfs_sbd *sdp, struct gfs_holder *ji_gh);
32092 +void gfs_clear_journals(struct gfs_sbd *sdp);
32094 +int gfs_get_jiinode(struct gfs_sbd *sdp);
32095 +int gfs_get_riinode(struct gfs_sbd *sdp);
32096 +int gfs_get_rootinode(struct gfs_sbd *sdp);
32097 +int gfs_get_qinode(struct gfs_sbd *sdp);
32098 +int gfs_get_linode(struct gfs_sbd *sdp);
32100 +int gfs_make_fs_rw(struct gfs_sbd *sdp);
32101 +int gfs_make_fs_ro(struct gfs_sbd *sdp);
32103 +int gfs_stat_gfs(struct gfs_sbd *sdp, struct gfs_usage *usage,
32104 + int interruptible);
32106 +int gfs_lock_fs_check_clean(struct gfs_sbd *sdp, unsigned int state,
32107 + struct gfs_holder *t_gh);
32108 +int gfs_freeze_fs(struct gfs_sbd *sdp);
32109 +void gfs_unfreeze_fs(struct gfs_sbd *sdp);
32111 +#endif /* __SUPER_DOT_H__ */
32112 diff -urN linux-orig/fs/gfs/trans.c linux-patched/fs/gfs/trans.c
32113 --- linux-orig/fs/gfs/trans.c 1969-12-31 18:00:00.000000000 -0600
32114 +++ linux-patched/fs/gfs/trans.c 2004-06-30 13:27:49.359707419 -0500
32116 +/******************************************************************************
32117 +*******************************************************************************
32119 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
32120 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
32122 +** This copyrighted material is made available to anyone wishing to use,
32123 +** modify, copy, or redistribute it subject to the terms and conditions
32124 +** of the GNU General Public License v.2.
32126 +*******************************************************************************
32127 +******************************************************************************/
32129 +#include <linux/sched.h>
32130 +#include <linux/slab.h>
32131 +#include <linux/smp_lock.h>
32132 +#include <linux/spinlock.h>
32133 +#include <asm/semaphore.h>
32134 +#include <linux/completion.h>
32135 +#include <linux/buffer_head.h>
32139 +#include "glock.h"
32142 +#include "quota.h"
32143 +#include "trans.h"
32144 +#include "unlinked.h"
32147 + * gfs_trans_print - Print a transaction to the console
32148 + * @sdp: the filesystem
32149 + * @tr: The GFS transaction
32150 + * @where: Situation of transaction
32155 +gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr, unsigned int where)
32157 + struct gfs_log_element *le;
32158 + struct list_head *tmp, *head;
32159 + unsigned int mblks = 0, eblks = 0;
32161 + LO_TRANS_SIZE(sdp, tr, &mblks, &eblks, NULL, NULL);
32163 + printk("Transaction: (%s, %u)\n", tr->tr_file, tr->tr_line);
32164 + printk(" tr_mblks_asked = %u, tr_eblks_asked = %u, tr_seg_reserved = %u\n",
32165 + tr->tr_mblks_asked, tr->tr_eblks_asked, tr->tr_seg_reserved);
32166 + printk(" mblks = %u, eblks = %u\n", mblks, eblks);
32167 + printk(" tr_flags = 0x%.8X\n", tr->tr_flags);
32169 + for (head = &tr->tr_elements, tmp = head->next;
32171 + tmp = tmp->next) {
32172 + le = list_entry(tmp, struct gfs_log_element, le_list);
32173 + LO_PRINT(sdp, le, where);
32176 + printk("End Trans\n");
32180 + * gfs_trans_begin_i - Perpare to start a transaction
32181 + * @sdp: The GFS superblock
32182 + * @meta_blocks: Reserve this many metadata blocks in the log
32183 + * @extra_blocks: Number of non-metadata blocks to reserve
32185 + * Allocate the struct gfs_trans struct. Do in-place and
32186 + * log reservations.
32188 + * Returns: 0 on success, -EXXX on failure
32192 +gfs_trans_begin_i(struct gfs_sbd *sdp,
32193 + unsigned int meta_blocks, unsigned int extra_blocks,
32194 + char *file, unsigned int line)
32196 + struct gfs_trans *tr;
32197 + unsigned int blocks;
32200 + tr = gmalloc(sizeof(struct gfs_trans));
32201 + memset(tr, 0, sizeof(struct gfs_trans));
32203 + INIT_LIST_HEAD(&tr->tr_elements);
32204 + INIT_LIST_HEAD(&tr->tr_free_bufs);
32205 + INIT_LIST_HEAD(&tr->tr_free_bmem);
32206 + INIT_LIST_HEAD(&tr->tr_bufs);
32207 + INIT_LIST_HEAD(&tr->tr_ail_bufs);
32209 + tr->tr_file = file;
32210 + tr->tr_line = line;
32211 + tr->tr_t_gh = gfs_holder_get(sdp->sd_trans_gl, LM_ST_SHARED, 0);
32213 + error = gfs_glock_nq(tr->tr_t_gh);
32217 + if (test_bit(SDF_ROFS, &sdp->sd_flags)) {
32218 + tr->tr_t_gh->gh_flags |= GL_NOCACHE;
32220 + goto fail_gunlock;
32223 + /* Do log reservation */
32225 + tr->tr_mblks_asked = meta_blocks;
32226 + tr->tr_eblks_asked = extra_blocks;
32230 + blocks += gfs_struct2blk(sdp, meta_blocks,
32231 + sizeof(struct gfs_block_tag)) +
32233 + blocks += extra_blocks;
32234 + tr->tr_seg_reserved = gfs_blk2seg(sdp, blocks);
32236 + error = gfs_log_reserve(sdp, tr->tr_seg_reserved, FALSE);
32238 + goto fail_gunlock;
32240 + GFS_ASSERT_SBD(!current_transaction, sdp,);
32241 + current_transaction = tr;
32246 + gfs_glock_dq(tr->tr_t_gh);
32249 + gfs_holder_put(tr->tr_t_gh);
32256 + * gfs_trans_end - End a transaction
32257 + * @sdp: The GFS superblock
32259 + * If buffers were actually added to the transaction,
32264 +gfs_trans_end(struct gfs_sbd *sdp)
32266 + struct gfs_trans *tr;
32267 + struct gfs_holder *t_gh;
32268 + struct list_head *tmp, *head;
32269 + struct gfs_log_element *le;
32271 + tr = current_transaction;
32272 + GFS_ASSERT_SBD(tr, sdp,);
32273 + current_transaction = NULL;
32275 + t_gh = tr->tr_t_gh;
32276 + tr->tr_t_gh = NULL;
32278 + if (list_empty(&tr->tr_elements)) {
32279 + gfs_log_release(sdp, tr->tr_seg_reserved);
32282 + gfs_glock_dq(t_gh);
32283 + gfs_holder_put(t_gh);
32288 + for (head = &tr->tr_elements, tmp = head->next;
32290 + tmp = tmp->next) {
32291 + le = list_entry(tmp, struct gfs_log_element, le_list);
32292 + LO_TRANS_END(sdp, le);
32295 + gfs_log_commit(sdp, tr);
32297 + gfs_glock_dq(t_gh);
32298 + gfs_holder_put(t_gh);
32300 + if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
32301 + gfs_log_flush(sdp);
32305 + * gfs_trans_add_gl - Add a glock to a transaction
32308 + * Add the given glock to this process's transaction
32312 +gfs_trans_add_gl(struct gfs_glock *gl)
32314 + if (!gl->gl_new_le.le_trans) {
32315 + GFS_ASSERT_GLOCK(gfs_glock_is_locked_by_me(gl) &&
32316 + gfs_glock_is_held_excl(gl), gl,);
32317 + gfs_glock_hold(gl); /* Released in glock_trans_end() */
32319 + set_bit(GLF_DIRTY, &gl->gl_flags);
32321 + LO_ADD(gl->gl_sbd, &gl->gl_new_le);
32322 + gl->gl_new_le.le_trans->tr_num_gl++;
32327 + * gfs_trans_add_bh - Add a buffer to the current transaction
32328 + * @gl: the glock the buffer belongs to
32329 + * @bh: The buffer to add
32331 + * Add a buffer to the current transaction. The glock for the buffer
32332 + * should be held. This pins the buffer as well.
32334 + * Call this as many times as you want during transaction formation.
32335 + * It only does its work once.
32340 +gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh)
32342 + struct gfs_sbd *sdp = gl->gl_sbd;
32343 + struct gfs_bufdata *bd;
32347 + gfs_attach_bufdata(bh, gl);
32351 + if (bd->bd_new_le.le_trans)
32354 + gfs_meta_check(sdp, bh);
32356 + GFS_ASSERT_GLOCK(bd->bd_gl == gl, gl,);
32358 + if (!gl->gl_new_le.le_trans)
32359 + gfs_trans_add_gl(gl);
32361 + gfs_dpin(sdp, bh);
32363 + LO_ADD(sdp, &bd->bd_new_le);
32364 + bd->bd_new_le.le_trans->tr_num_buf++;
32368 + * gfs_trans_add_unlinked - Add a unlinked/dealloced tag to the current transaction
32369 + * @sdp: the filesystem
32370 + * @type: the type of entry
32371 + * @inum: the inode number
32373 + * Returns: the unlinked structure
32376 +struct gfs_unlinked *
32377 +gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type,
32378 + struct gfs_inum *inum)
32380 + struct gfs_unlinked *ul;
32382 + ul = gfs_unlinked_get(sdp, inum, CREATE);
32384 + LO_ADD(sdp, &ul->ul_new_le);
32387 + case GFS_LOG_DESC_IUL:
32388 + set_bit(ULF_NEW_UL, &ul->ul_flags);
32389 + ul->ul_new_le.le_trans->tr_num_iul++;
32391 + case GFS_LOG_DESC_IDA:
32392 + clear_bit(ULF_NEW_UL, &ul->ul_flags);
32393 + ul->ul_new_le.le_trans->tr_num_ida++;
32396 + GFS_ASSERT_SBD(FALSE, sdp,);
32404 + * gfs_trans_add_quota - Add quota changes to a transaction
32405 + * @sdp: the filesystem
32406 + * @change: The number of blocks allocated (positive) or freed (negative)
32407 + * @uid: the user ID doing the change
32408 + * @gid: the group ID doing the change
32413 +gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change,
32414 + uint32_t uid, uint32_t gid)
32416 + struct gfs_trans *tr;
32417 + struct list_head *tmp, *head, *next;
32418 + struct gfs_log_element *le;
32419 + struct gfs_quota_le *ql;
32420 + int found_uid, found_gid;
32423 + if (!sdp->sd_tune.gt_quota_account)
32426 + GFS_ASSERT_SBD(change, sdp,);
32428 + found_uid = (uid == NO_QUOTA_CHANGE);
32429 + found_gid = (gid == NO_QUOTA_CHANGE);
32431 + GFS_ASSERT_SBD(!found_uid || !found_gid, sdp,);
32433 + tr = current_transaction;
32434 + GFS_ASSERT_SBD(tr, sdp,);
32436 + for (head = &tr->tr_elements, tmp = head->next, next = tmp->next;
32438 + tmp = next, next = next->next) {
32439 + le = list_entry(tmp, struct gfs_log_element, le_list);
32440 + if (le->le_ops != &gfs_quota_lops)
32443 + ql = container_of(le, struct gfs_quota_le, ql_le);
32445 + if (test_bit(QDF_USER, &ql->ql_data->qd_flags)) {
32446 + if (ql->ql_data->qd_id == uid) {
32447 + ql->ql_change += change;
32449 + spin_lock(&sdp->sd_quota_lock);
32450 + ql->ql_data->qd_change_new += change;
32451 + spin_unlock(&sdp->sd_quota_lock);
32453 + list_del(&le->le_list);
32455 + if (ql->ql_change)
32456 + list_add(&le->le_list,
32457 + &tr->tr_elements);
32459 + gfs_quota_put(sdp, ql->ql_data);
32464 + GFS_ASSERT_SBD(!found_uid, sdp,);
32465 + found_uid = TRUE;
32470 + if (ql->ql_data->qd_id == gid) {
32471 + ql->ql_change += change;
32473 + spin_lock(&sdp->sd_quota_lock);
32474 + ql->ql_data->qd_change_new += change;
32475 + spin_unlock(&sdp->sd_quota_lock);
32477 + list_del(&le->le_list);
32479 + if (ql->ql_change)
32480 + list_add(&le->le_list,
32481 + &tr->tr_elements);
32483 + gfs_quota_put(sdp, ql->ql_data);
32488 + GFS_ASSERT_SBD(!found_gid, sdp,);
32489 + found_gid = TRUE;
32496 + while (!found_uid || !found_gid) {
32497 + ql = gmalloc(sizeof(struct gfs_quota_le));
32498 + memset(ql, 0, sizeof(struct gfs_quota_le));
32500 + INIT_LE(&ql->ql_le, &gfs_quota_lops);
32503 + error = gfs_quota_get(sdp, FALSE, gid,
32506 + found_gid = TRUE;
32508 + error = gfs_quota_get(sdp, TRUE, uid,
32511 + found_uid = TRUE;
32514 + GFS_ASSERT_SBD(!error && ql->ql_data, sdp,);
32516 + ql->ql_change = change;
32518 + spin_lock(&sdp->sd_quota_lock);
32519 + ql->ql_data->qd_change_new += change;
32520 + spin_unlock(&sdp->sd_quota_lock);
32522 + LO_ADD(sdp, &ql->ql_le);
32526 diff -urN linux-orig/fs/gfs/trans.h linux-patched/fs/gfs/trans.h
32527 --- linux-orig/fs/gfs/trans.h 1969-12-31 18:00:00.000000000 -0600
32528 +++ linux-patched/fs/gfs/trans.h 2004-06-30 13:27:49.359707419 -0500
32530 +/******************************************************************************
32531 +*******************************************************************************
32533 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
32534 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
32536 +** This copyrighted material is made available to anyone wishing to use,
32537 +** modify, copy, or redistribute it subject to the terms and conditions
32538 +** of the GNU General Public License v.2.
32540 +*******************************************************************************
32541 +******************************************************************************/
32543 +#ifndef __TRANS_DOT_H__
32544 +#define __TRANS_DOT_H__
32546 +#define TRANS_IS_NEW (53)
32547 +#define TRANS_IS_INCORE (54)
32548 +void gfs_trans_print(struct gfs_sbd *sdp, struct gfs_trans *tr,
32549 + unsigned int where);
32551 +int gfs_trans_begin_i(struct gfs_sbd *sdp,
32552 + unsigned int meta_blocks, unsigned int extra_blocks,
32553 + char *file, unsigned int line);
32554 +#define gfs_trans_begin(sdp, mb, eb) \
32555 +gfs_trans_begin_i((sdp), (mb), (eb), __FILE__, __LINE__)
32557 +void gfs_trans_end(struct gfs_sbd *sdp);
32559 +void gfs_trans_add_gl(struct gfs_glock *gl);
32560 +void gfs_trans_add_bh(struct gfs_glock *gl, struct buffer_head *bh);
32561 +struct gfs_unlinked *gfs_trans_add_unlinked(struct gfs_sbd *sdp, unsigned int type,
32562 + struct gfs_inum *inum);
32563 +void gfs_trans_add_quota(struct gfs_sbd *sdp, int64_t change, uint32_t uid,
32566 +#endif /* __TRANS_DOT_H__ */
32567 diff -urN linux-orig/fs/gfs/unlinked.c linux-patched/fs/gfs/unlinked.c
32568 --- linux-orig/fs/gfs/unlinked.c 1969-12-31 18:00:00.000000000 -0600
32569 +++ linux-patched/fs/gfs/unlinked.c 2004-06-30 13:27:49.360707187 -0500
32571 +/******************************************************************************
32572 +*******************************************************************************
32574 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
32575 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
32577 +** This copyrighted material is made available to anyone wishing to use,
32578 +** modify, copy, or redistribute it subject to the terms and conditions
32579 +** of the GNU General Public License v.2.
32581 +*******************************************************************************
32582 +******************************************************************************/
32584 +#include <linux/sched.h>
32585 +#include <linux/slab.h>
32586 +#include <linux/smp_lock.h>
32587 +#include <linux/spinlock.h>
32588 +#include <asm/semaphore.h>
32589 +#include <linux/completion.h>
32590 +#include <linux/buffer_head.h>
32593 +#include "inode.h"
32596 +#include "unlinked.h"
32599 + * gfs_unlinked_get - Get a structure to represent an unlinked inode
32600 + * @sdp: the filesystem
32601 + * @inum: the inode that's unlinked
32602 + * @create: if TRUE, create the structure, otherwise return NULL
32604 + * Returns: the structure, or NULL
32607 +struct gfs_unlinked *
32608 +gfs_unlinked_get(struct gfs_sbd *sdp, struct gfs_inum *inum, int create)
32610 + struct gfs_unlinked *ul = NULL, *new_ul = NULL;
32611 + struct list_head *tmp, *head;
32614 + spin_lock(&sdp->sd_unlinked_lock);
32616 + for (head = &sdp->sd_unlinked_list, tmp = head->next;
32618 + tmp = tmp->next) {
32619 + ul = list_entry(tmp, struct gfs_unlinked, ul_list);
32620 + if (gfs_inum_equal(&ul->ul_inum, inum)) {
32629 + if (!ul && new_ul) {
32631 + list_add(&ul->ul_list, &sdp->sd_unlinked_list);
32635 + spin_unlock(&sdp->sd_unlinked_lock);
32637 + if (ul || !create) {
32643 + new_ul = gmalloc(sizeof(struct gfs_unlinked));
32644 + memset(new_ul, 0, sizeof(struct gfs_unlinked));
32646 + new_ul->ul_count = 1;
32647 + new_ul->ul_inum = *inum;
32649 + INIT_LE(&new_ul->ul_new_le, &gfs_unlinked_lops);
32650 + INIT_LE(&new_ul->ul_incore_le, &gfs_unlinked_lops);
32651 + INIT_LE(&new_ul->ul_ondisk_le, &gfs_unlinked_lops);
32656 + * gfs_unlinked_hold - increment the usage count on a struct gfs_unlinked
32657 + * @sdp: the filesystem
32658 + * @ul: the structure
32663 +gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32665 + spin_lock(&sdp->sd_unlinked_lock);
32667 + spin_unlock(&sdp->sd_unlinked_lock);
32671 + * gfs_unlinked_put - decrement the usage count on a struct gfs_unlinked
32672 + * @sdp: the filesystem
32673 + * @ul: the structure
32675 + * Free the structure if its reference count hits zero.
32680 +gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32682 + spin_lock(&sdp->sd_unlinked_lock);
32684 + GFS_ASSERT_SBD(ul->ul_count, sdp,);
32687 + if (!ul->ul_count) {
32688 + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32689 + !test_bit(ULF_OD_LIST, &ul->ul_flags) &&
32690 + !test_bit(ULF_LOCK, &ul->ul_flags),
32692 + list_del(&ul->ul_list);
32693 + spin_unlock(&sdp->sd_unlinked_lock);
32696 + spin_unlock(&sdp->sd_unlinked_lock);
32700 + * unlinked_find - Find a inode to try to deallocate
32701 + * @sdp: the filesystem
32703 + * The returned structure is locked and needs to be unlocked
32704 + * with gfs_unlinked_unlock().
32706 + * Returns: A unlinked structure, or NULL
32709 +struct gfs_unlinked *
32710 +unlinked_find(struct gfs_sbd *sdp)
32712 + struct list_head *tmp, *head;
32713 + struct gfs_unlinked *ul = NULL;
32715 + if (test_bit(SDF_ROFS, &sdp->sd_flags))
32718 + gfs_log_lock(sdp);
32719 + spin_lock(&sdp->sd_unlinked_lock);
32721 + if (!atomic_read(&sdp->sd_unlinked_ic_count))
32724 + for (head = &sdp->sd_unlinked_list, tmp = head->next;
32726 + tmp = tmp->next) {
32727 + ul = list_entry(tmp, struct gfs_unlinked, ul_list);
32729 + if (test_bit(ULF_LOCK, &ul->ul_flags))
32731 + if (!test_bit(ULF_IC_LIST, &ul->ul_flags))
32734 + list_move_tail(&ul->ul_list, &sdp->sd_unlinked_list);
32736 + set_bit(ULF_LOCK, &ul->ul_flags);
32745 + spin_unlock(&sdp->sd_unlinked_lock);
32746 + gfs_log_unlock(sdp);
32752 + * gfs_unlinked_lock - lock a unlinked structure
32753 + * @sdp: the filesystem
32754 + * @ul: the unlinked inode structure
32759 +gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32761 + spin_lock(&sdp->sd_unlinked_lock);
32763 + GFS_ASSERT_SBD(!test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32764 + set_bit(ULF_LOCK, &ul->ul_flags);
32768 + spin_unlock(&sdp->sd_unlinked_lock);
32772 + * gfs_unlinked_unlock - drop and a reference on a unlinked structure
32773 + * @sdp: the filesystem
32774 + * @ul: the unlinked inode structure
32779 +gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul)
32781 + spin_lock(&sdp->sd_unlinked_lock);
32783 + GFS_ASSERT_SBD(test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32784 + clear_bit(ULF_LOCK, &ul->ul_flags);
32786 + GFS_ASSERT_SBD(ul->ul_count, sdp,);
32789 + if (!ul->ul_count) {
32790 + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32791 + !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32792 + list_del(&ul->ul_list);
32793 + spin_unlock(&sdp->sd_unlinked_lock);
32796 + spin_unlock(&sdp->sd_unlinked_lock);
32800 + * gfs_unlinked_merge - add/remove a unlinked inode from the in-memory list
32801 + * @sdp: the filesystem
32802 + * @type: is this a unlink tag or a dealloc tag
32803 + * @inum: the inode number
32808 +gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type,
32809 + struct gfs_inum *inum)
32811 + struct gfs_unlinked *ul;
32813 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) ==
32814 + atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32816 + ul = gfs_unlinked_get(sdp, inum, CREATE);
32818 + gfs_log_lock(sdp);
32821 + case GFS_LOG_DESC_IUL:
32822 + gfs_unlinked_hold(sdp, ul);
32823 + gfs_unlinked_hold(sdp, ul);
32824 + GFS_ASSERT_SBD(!test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32825 + !test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32826 + set_bit(ULF_IC_LIST, &ul->ul_flags);
32827 + set_bit(ULF_OD_LIST, &ul->ul_flags);
32828 + atomic_inc(&sdp->sd_unlinked_ic_count);
32829 + atomic_inc(&sdp->sd_unlinked_od_count);
32833 + case GFS_LOG_DESC_IDA:
32834 + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32835 + test_bit(ULF_OD_LIST, &ul->ul_flags), sdp,);
32836 + clear_bit(ULF_IC_LIST, &ul->ul_flags);
32837 + clear_bit(ULF_OD_LIST, &ul->ul_flags);
32838 + gfs_unlinked_put(sdp, ul);
32839 + gfs_unlinked_put(sdp, ul);
32840 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count), sdp,);
32841 + atomic_dec(&sdp->sd_unlinked_ic_count);
32842 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32843 + atomic_dec(&sdp->sd_unlinked_od_count);
32848 + gfs_log_unlock(sdp);
32850 + gfs_unlinked_put(sdp, ul);
32854 + * gfs_unlinked_cleanup - get rid of any extra struct gfs_unlinked structures
32855 + * @sdp: the filesystem
32860 +gfs_unlinked_cleanup(struct gfs_sbd *sdp)
32862 + struct gfs_unlinked *ul;
32865 + gfs_log_lock(sdp);
32867 + GFS_ASSERT_SBD(atomic_read(&sdp->sd_unlinked_ic_count) ==
32868 + atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32870 + spin_lock(&sdp->sd_unlinked_lock);
32872 + while (!list_empty(&sdp->sd_unlinked_list)) {
32873 + ul = list_entry(sdp->sd_unlinked_list.next,
32874 + struct gfs_unlinked, ul_list);
32876 + if (ul->ul_count > 2) {
32877 + spin_unlock(&sdp->sd_unlinked_lock);
32878 + gfs_log_unlock(sdp);
32879 + current->state = TASK_UNINTERRUPTIBLE;
32880 + schedule_timeout(HZ);
32883 + GFS_ASSERT_SBD(ul->ul_count == 2, sdp,);
32885 + GFS_ASSERT_SBD(test_bit(ULF_IC_LIST, &ul->ul_flags) &&
32886 + test_bit(ULF_OD_LIST, &ul->ul_flags) &&
32887 + !test_bit(ULF_LOCK, &ul->ul_flags), sdp,);
32889 + list_del(&ul->ul_list);
32891 + atomic_dec(&sdp->sd_unlinked_ic_count);
32892 + atomic_dec(&sdp->sd_unlinked_od_count);
32894 + spin_unlock(&sdp->sd_unlinked_lock);
32896 + spin_lock(&sdp->sd_unlinked_lock);
32899 + spin_unlock(&sdp->sd_unlinked_lock);
32901 + GFS_ASSERT_SBD(!atomic_read(&sdp->sd_unlinked_ic_count) &&
32902 + !atomic_read(&sdp->sd_unlinked_od_count), sdp,);
32904 + gfs_log_unlock(sdp);
32908 + * gfs_unlinked_limit - limit the number of inodes waiting to be deallocated
32909 + * @sdp: the filesystem
32911 + * Returns: 0 on success, -EXXX on failure;
32915 +gfs_unlinked_limit(struct gfs_sbd *sdp)
32917 + unsigned int tries = 0, min = 0;
32920 + if (atomic_read(&sdp->sd_unlinked_ic_count) >=
32921 + sdp->sd_tune.gt_ilimit2) {
32922 + tries = sdp->sd_tune.gt_ilimit2_tries;
32923 + min = sdp->sd_tune.gt_ilimit2_min;
32924 + } else if (atomic_read(&sdp->sd_unlinked_ic_count) >=
32925 + sdp->sd_tune.gt_ilimit1) {
32926 + tries = sdp->sd_tune.gt_ilimit1_tries;
32927 + min = sdp->sd_tune.gt_ilimit1_min;
32930 + while (tries--) {
32931 + struct gfs_unlinked *ul = unlinked_find(sdp);
32935 + error = gfs_inode_dealloc(sdp, &ul->ul_inum);
32937 + gfs_unlinked_unlock(sdp, ul);
32942 + } else if (error != 1)
32948 + * gfs_unlinked_dealloc - Go through the list of inodes to be deallocated
32949 + * @sdp: the filesystem
32951 + * Returns: 0 on success, -EXXX on failure
32955 +gfs_unlinked_dealloc(struct gfs_sbd *sdp)
32957 + unsigned int hits, strikes;
32965 + struct gfs_unlinked *ul = unlinked_find(sdp);
32969 + error = gfs_inode_dealloc(sdp, &ul->ul_inum);
32971 + gfs_unlinked_unlock(sdp, ul);
32977 + } else if (error == 1) {
32979 + if (strikes >= atomic_read(&sdp->sd_unlinked_ic_count)) {
32987 + if (!hits || !test_bit(SDF_INODED_RUN, &sdp->sd_flags))
32994 + if (error && error != -EROFS)
32995 + printk("GFS: fsid=%s: error deallocating inodes: %d\n",
32996 + sdp->sd_fsname, error);
32998 diff -urN linux-orig/fs/gfs/unlinked.h linux-patched/fs/gfs/unlinked.h
32999 --- linux-orig/fs/gfs/unlinked.h 1969-12-31 18:00:00.000000000 -0600
33000 +++ linux-patched/fs/gfs/unlinked.h 2004-06-30 13:27:49.360707187 -0500
33002 +/******************************************************************************
33003 +*******************************************************************************
33005 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
33006 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
33008 +** This copyrighted material is made available to anyone wishing to use,
33009 +** modify, copy, or redistribute it subject to the terms and conditions
33010 +** of the GNU General Public License v.2.
33012 +*******************************************************************************
33013 +******************************************************************************/
33015 +#ifndef __UNLINKED_DOT_H__
33016 +#define __UNLINKED_DOT_H__
33018 +struct gfs_unlinked *gfs_unlinked_get(struct gfs_sbd *sdp,
33019 + struct gfs_inum *inum, int create);
33020 +void gfs_unlinked_hold(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33021 +void gfs_unlinked_put(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33023 +void gfs_unlinked_lock(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33024 +void gfs_unlinked_unlock(struct gfs_sbd *sdp, struct gfs_unlinked *ul);
33026 +void gfs_unlinked_merge(struct gfs_sbd *sdp, unsigned int type,
33027 + struct gfs_inum *inum);
33028 +void gfs_unlinked_cleanup(struct gfs_sbd *sdp);
33030 +void gfs_unlinked_limit(struct gfs_sbd *sdp);
33031 +void gfs_unlinked_dealloc(struct gfs_sbd *sdp);
33033 +#endif /* __UNLINKED_DOT_H__ */
33034 diff -urN linux-orig/fs/gfs/util.c linux-patched/fs/gfs/util.c
33035 --- linux-orig/fs/gfs/util.c 1969-12-31 18:00:00.000000000 -0600
33036 +++ linux-patched/fs/gfs/util.c 2004-06-30 13:27:49.360707187 -0500
33038 +/******************************************************************************
33039 +*******************************************************************************
33041 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
33042 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
33044 +** This copyrighted material is made available to anyone wishing to use,
33045 +** modify, copy, or redistribute it subject to the terms and conditions
33046 +** of the GNU General Public License v.2.
33048 +*******************************************************************************
33049 +******************************************************************************/
33051 +#include <linux/sched.h>
33052 +#include <linux/slab.h>
33053 +#include <linux/smp_lock.h>
33054 +#include <linux/spinlock.h>
33055 +#include <asm/semaphore.h>
33056 +#include <linux/completion.h>
33057 +#include <linux/buffer_head.h>
33060 +#include "glock.h"
33062 +uint32_t gfs_random_number;
33064 +volatile int gfs_in_panic = FALSE;
33066 +kmem_cache_t *gfs_glock_cachep = NULL;
33067 +kmem_cache_t *gfs_inode_cachep = NULL;
33068 +kmem_cache_t *gfs_bufdata_cachep = NULL;
33069 +kmem_cache_t *gfs_mhc_cachep = NULL;
33072 + * gfs_random - Generate a random 32-bit number
33074 + * Generate a semi-crappy 32-bit pseudo-random number without using
33075 + * floating point.
33077 + * The PRNG is from "Numerical Recipes in C" (second edition), page 284.
33079 + * Returns: a 32-bit random number
33085 + gfs_random_number = 0x0019660D * gfs_random_number + 0x3C6EF35F;
33086 + return gfs_random_number;
33090 + * hash_more_internal - hash an array of data
33091 + * @data: the data to be hashed
33092 + * @len: the length of data to be hashed
33093 + * @hash: the hash from a previous call
33095 + * Take some data and convert it to a 32-bit hash.
33097 + * This is the 32-bit FNV-1a hash from:
33098 + * http://www.isthe.com/chongo/tech/comp/fnv/
33102 + * Returns: the hash
33105 +static __inline__ uint32_t
33106 +hash_more_internal(const void *data, unsigned int len, uint32_t hash)
33108 + unsigned char *p = (unsigned char *)data;
33109 + unsigned char *e = p + len;
33110 + uint32_t h = hash;
33113 + h ^= (uint32_t)(*p++);
33121 + * gfs_hash - hash an array of data
33122 + * @data: the data to be hashed
33123 + * @len: the length of data to be hashed
33125 + * Take some data and convert it to a 32-bit hash.
33127 + * This is the 32-bit FNV-1a hash from:
33128 + * http://www.isthe.com/chongo/tech/comp/fnv/
33130 + * Returns: the hash
33134 +gfs_hash(const void *data, unsigned int len)
33136 + uint32_t h = 0x811C9DC5;
33137 + h = hash_more_internal(data, len, h);
33142 + * gfs_hash_more - hash an array of data
33143 + * @data: the data to be hashed
33144 + * @len: the length of data to be hashed
33145 + * @hash: the hash from a previous call
33147 + * Take some data and convert it to a 32-bit hash.
33149 + * This is the 32-bit FNV-1a hash from:
33150 + * http://www.isthe.com/chongo/tech/comp/fnv/
33152 + * This version let's you hash together discontinuous regions.
33153 + * For example, to compute the combined hash of the memory in
33154 + * (data1, len1), (data2, len2), and (data3, len3) you:
33156 + * h = gfs_hash(data1, len1);
33157 + * h = gfs_hash_more(data2, len2, h);
33158 + * h = gfs_hash_more(data3, len3, h);
33160 + * Returns: the hash
33164 +gfs_hash_more(const void *data, unsigned int len, uint32_t hash)
33167 + h = hash_more_internal(data, len, hash);
33171 +/* Byte-wise swap two items of size SIZE. */
33173 +#define SWAP(a, b, size) \
33175 + register size_t __size = (size); \
33176 + register char *__a = (a), *__b = (b); \
33178 + char __tmp = *__a; \
33180 + *__b++ = __tmp; \
33181 + } while (__size-- > 1); \
33185 + * gfs_sort - Sort base array using shell sort algorithm
33186 + * @base: the input array
33187 + * @num_elem: number of elements in array
33188 + * @size: size of each element in array
33189 + * @compar: fxn to compare array elements (returns negative
33190 + * for lt, 0 for eq, and positive for gt
33192 + * Sorts the array passed in using the compar fxn to compare elements using
33193 + * the shell sort algorithm
33197 +gfs_sort(void *base, unsigned int num_elem, unsigned int size,
33198 + int (*compar) (const void *, const void *))
33200 + register char *pbase = (char *)base;
33202 + int cols[16] = {1391376, 463792, 198768, 86961, 33936, 13776, 4592,
33203 + 1968, 861, 336, 112, 48, 21, 7, 3, 1};
33205 + for (k = 0; k < 16; k++) {
33207 + for (i = h; i < num_elem; i++) {
33210 + (*compar)((void *)(pbase + size * (j - h)),
33211 + (void *)(pbase + size * j)) > 0) {
33212 + SWAP(pbase + size * j,
33213 + pbase + size * (j - h),
33223 + * @sdp: the filesystem
33224 + * @last: the last time we bitched
33230 +bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about)
33232 + if (time_after_eq(jiffies, *last + sdp->sd_tune.gt_complain_secs * HZ)) {
33233 + printk("GFS: fsid=%s: %s by program \"%s\"\n",
33234 + sdp->sd_fsname, about, current->comm);
33240 + * gfs_assert_i - Stop the machine
33241 + * @assertion: the assertion that failed
33242 + * @file: the file that called us
33243 + * @line: the line number of the file that called us
33245 + * Don't do ENTER() and EXIT() here.
33250 +gfs_assert_i(char *assertion,
33251 + unsigned int type, void *ptr,
33252 + char *file, unsigned int line)
33254 + gfs_in_panic = TRUE;
33256 + printk("\nGFS: Assertion failed on line %d of file %s\n"
33257 + "GFS: assertion: \"%s\"\n"
33258 + "GFS: time = %lu\n",
33259 + line, file, assertion, get_seconds());
33262 + case GFS_ASSERT_TYPE_SBD:
33264 + struct gfs_sbd *sdp = (struct gfs_sbd *)ptr;
33265 + printk("GFS: fsid=%s\n", sdp->sd_fsname);
33269 + case GFS_ASSERT_TYPE_GLOCK:
33271 + struct gfs_glock *gl = (struct gfs_glock *)ptr;
33272 + struct gfs_sbd *sdp = gl->gl_sbd;
33273 + printk("GFS: fsid=%s: glock = (%u, %"PRIu64")\n",
33275 + gl->gl_name.ln_type,
33276 + gl->gl_name.ln_number);
33280 + case GFS_ASSERT_TYPE_INODE:
33282 + struct gfs_inode *ip = (struct gfs_inode *)ptr;
33283 + struct gfs_sbd *sdp = ip->i_sbd;
33284 + printk("GFS: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n",
33286 + ip->i_num.no_formal_ino, ip->i_num.no_addr);
33290 + case GFS_ASSERT_TYPE_RGRPD:
33292 + struct gfs_rgrpd *rgd = (struct gfs_rgrpd *)ptr;
33293 + struct gfs_sbd *sdp = rgd->rd_sbd;
33294 + printk("GFS: fsid=%s: rgroup = %"PRIu64"\n",
33295 + sdp->sd_fsname, rgd->rd_ri.ri_addr);
33302 + printk("GFS: Record message above and reboot.\n");
33305 + panic("GFS: Record message above and reboot.\n");
33309 + * gfs_io_errori - handle an I/O error
33310 + * @sdp: the filesystem
33311 + * @bh: the buffer the error happened on (can be NULL)
33313 + * This will do something other than panic, eventually.
33317 +void gfs_io_error_i(struct gfs_sbd *sdp,
33318 + unsigned int type, void *ptr,
33319 + char *file, unsigned int line)
33322 + case GFS_IO_ERROR_TYPE_BH:
33324 + struct buffer_head *bh = (struct buffer_head *)ptr;
33325 + printk("GFS: fsid=%s: I/O error on block %"PRIu64"\n",
33326 + sdp->sd_fsname, (uint64_t)bh->b_blocknr);
33330 + case GFS_IO_ERROR_TYPE_INODE:
33332 + struct gfs_inode *ip = (struct gfs_inode *)ptr;
33333 + printk("GFS: fsid=%s: I/O error in inode %"PRIu64"/%"PRIu64"\n",
33335 + ip->i_num.no_formal_ino, ip->i_num.no_addr);
33340 + printk("GFS: fsid=%s: I/O error\n", sdp->sd_fsname);
33344 + GFS_ASSERT_SBD(FALSE, sdp,);
33348 + * gmalloc - malloc a small amount of memory
33349 + * @size: the number of bytes to malloc
33351 + * Returns: the memory
33355 +gmalloc(unsigned int size)
33358 + RETRY_MALLOC(p = kmalloc(size, GFP_KERNEL), p);
33362 diff -urN linux-orig/fs/gfs/util.h linux-patched/fs/gfs/util.h
33363 --- linux-orig/fs/gfs/util.h 1969-12-31 18:00:00.000000000 -0600
33364 +++ linux-patched/fs/gfs/util.h 2004-06-30 13:27:49.360707187 -0500
33366 +/******************************************************************************
33367 +*******************************************************************************
33369 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
33370 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
33372 +** This copyrighted material is made available to anyone wishing to use,
33373 +** modify, copy, or redistribute it subject to the terms and conditions
33374 +** of the GNU General Public License v.2.
33376 +*******************************************************************************
33377 +******************************************************************************/
33379 +#ifndef __UTIL_DOT_H__
33380 +#define __UTIL_DOT_H__
33383 +/* Utility functions */
33385 +extern uint32_t gfs_random_number;
33386 +uint32_t gfs_random(void);
33388 +uint32_t gfs_hash(const void *data, unsigned int len);
33389 +uint32_t gfs_hash_more(const void *data, unsigned int len, uint32_t hash);
33391 +void gfs_sort(void *base, unsigned int num_elem, unsigned int size,
33392 + int (*compar) (const void *, const void *));
33394 +void bitch_about(struct gfs_sbd *sdp, unsigned long *last, char *about);
33398 +/* Assertion stuff */
33400 +#define GFS_ASSERT_TYPE_NONE (18)
33401 +#define GFS_ASSERT_TYPE_SBD (19)
33402 +#define GFS_ASSERT_TYPE_GLOCK (20)
33403 +#define GFS_ASSERT_TYPE_INODE (21)
33404 +#define GFS_ASSERT_TYPE_RGRPD (22)
33406 +#define GFS_ASSERT(x, todo) \
33412 + gfs_assert_i(#x, GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); \
33417 +#define GFS_ASSERT_SBD(x, sdp, todo) \
33422 + struct gfs_sbd *gfs_assert_sbd = (sdp); \
33424 + gfs_assert_i(#x, GFS_ASSERT_TYPE_SBD, gfs_assert_sbd, __FILE__, __LINE__); \
33429 +#define GFS_ASSERT_GLOCK(x, gl, todo) \
33434 + struct gfs_glock *gfs_assert_glock = (gl); \
33436 + gfs_assert_i(#x, GFS_ASSERT_TYPE_GLOCK, gfs_assert_glock, __FILE__, __LINE__); \
33441 +#define GFS_ASSERT_INODE(x, ip, todo) \
33446 + struct gfs_inode *gfs_assert_inode = (ip); \
33448 + gfs_assert_i(#x, GFS_ASSERT_TYPE_INODE, gfs_assert_inode, __FILE__, __LINE__); \
33453 +#define GFS_ASSERT_RGRPD(x, rgd, todo) \
33458 + struct gfs_rgrpd *gfs_assert_rgrpd = (rgd); \
33460 + gfs_assert_i(#x, GFS_ASSERT_TYPE_RGRPD, gfs_assert_rgrpd, __FILE__, __LINE__); \
33465 +extern volatile int gfs_in_panic;
33466 +void gfs_assert_i(char *assertion,
33467 + unsigned int type, void *ptr,
33468 + char *file, unsigned int line) __attribute__ ((noreturn));
33471 +/* I/O error stuff */
33473 +#define GFS_IO_ERROR_TYPE_NONE (118)
33474 +#define GFS_IO_ERROR_TYPE_BH (119)
33475 +#define GFS_IO_ERROR_TYPE_INODE (120)
33477 +#define gfs_io_error(sdp) \
33478 +gfs_io_error_i((sdp), GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__);
33480 +#define gfs_io_error_bh(sdp, bh) \
33483 + struct buffer_head *gfs_io_error_bh = (bh); \
33484 + gfs_io_error_i((sdp), GFS_IO_ERROR_TYPE_BH, gfs_io_error_bh, __FILE__, __LINE__); \
33488 +#define gfs_io_error_inode(ip) \
33491 + struct gfs_inode *gfs_io_error_inode = (ip); \
33492 + gfs_io_error_i((ip)->i_sbd, GFS_IO_ERROR_TYPE_INODE, gfs_io_error_inode, __FILE__, __LINE__); \
33496 +void gfs_io_error_i(struct gfs_sbd *sdp,
33497 + unsigned int type, void *ptr,
33498 + char *file, unsigned int line);
33501 +/* Memory stuff */
33503 +#define RETRY_MALLOC(do_this, until_this) \
33506 + do { do_this; } while (0); \
33507 + if (until_this) \
33509 + printk("GFS: out of memory: %s, %u\n", __FILE__, __LINE__); \
33513 +extern kmem_cache_t *gfs_glock_cachep;
33514 +extern kmem_cache_t *gfs_inode_cachep;
33515 +extern kmem_cache_t *gfs_bufdata_cachep;
33516 +extern kmem_cache_t *gfs_mhc_cachep;
33518 +void *gmalloc(unsigned int size);
33521 +#endif /* __UTIL_DOT_H__ */
33522 diff -urN linux-orig/include/linux/gfs_ioctl.h linux-patched/include/linux/gfs_ioctl.h
33523 --- linux-orig/include/linux/gfs_ioctl.h 1969-12-31 18:00:00.000000000 -0600
33524 +++ linux-patched/include/linux/gfs_ioctl.h 2004-06-30 13:27:49.340711826 -0500
33526 +/******************************************************************************
33527 +*******************************************************************************
33529 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
33530 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
33532 +** This copyrighted material is made available to anyone wishing to use,
33533 +** modify, copy, or redistribute it subject to the terms and conditions
33534 +** of the GNU General Public License v.2.
33536 +*******************************************************************************
33537 +******************************************************************************/
33539 +#ifndef __GFS_IOCTL_DOT_H__
33540 +#define __GFS_IOCTL_DOT_H__
33542 +#define GFS_IOCTL_VERSION (0)
33544 +#define _GFSC_(x) (('G' << 8) | (x))
33547 + Ioctls implemented
33549 + Reserved Ioctls: 3, 7, 8, 9, 10, 4, 13
33553 +#define GFS_STACK_PRINT _GFSC_(40)
33555 +#define GFS_GET_META _GFSC_(31)
33556 +#define GFS_FILE_STAT _GFSC_(30)
33558 +#define GFS_SHRINK _GFSC_(5)
33560 +#define GFS_GET_ARGS _GFSC_(29)
33561 +#define GFS_GET_LOCKSTRUCT _GFSC_(39)
33562 +#define GFS_GET_SUPER _GFSC_(19)
33563 +#define GFS_JREAD _GFSC_(23)
33564 +#define GFS_JWRITE _GFSC_(24)
33565 +#define GFS_JSTAT _GFSC_(20)
33566 +#define GFS_JTRUNC _GFSC_(33)
33568 +#define GFS_LOCK_DUMP _GFSC_(11)
33570 +#define GFS_STATGFS _GFSC_(12)
33572 +#define GFS_FREEZE _GFSC_(14)
33573 +#define GFS_UNFREEZE _GFSC_(15)
33575 +#define GFS_RECLAIM_METADATA _GFSC_(16)
33577 +#define GFS_QUOTA_SYNC _GFSC_(17)
33578 +#define GFS_QUOTA_REFRESH _GFSC_(18)
33579 +#define GFS_QUOTA_READ _GFSC_(32)
33581 +#define GFS_GET_TUNE _GFSC_(21)
33582 +#define GFS_SET_TUNE _GFSC_(22)
33584 +#define GFS_EATTR_GET _GFSC_(26)
33585 +#define GFS_EATTR_SET _GFSC_(27)
33587 +#define GFS_WHERE_ARE_YOU _GFSC_(35)
33589 +#define GFS_SET_FLAG _GFSC_(36)
33590 +#define GFS_CLEAR_FLAG _GFSC_(37)
33592 +#define GFS_GET_COUNTERS _GFSC_(43)
33594 +#define GFS_FILE_FLUSH _GFSC_(42)
33596 +struct gfs_user_buffer {
33598 + unsigned int ub_size;
33599 + unsigned int ub_count;
33602 +/* Structure for jread/jwrite */
33604 +#define GFS_HIDDEN_JINDEX (0x10342345)
33605 +#define GFS_HIDDEN_RINDEX (0x10342346)
33606 +#define GFS_HIDDEN_QUOTA (0x10342347)
33607 +#define GFS_HIDDEN_LICENSE (0x10342348)
33610 + unsigned int jio_file;
33612 + uint32_t jio_size;
33613 + uint64_t jio_offset;
33616 + uint32_t jio_count;
33619 +/* Structure for better GFS-specific df */
33621 +struct gfs_usage {
33622 + unsigned int gu_block_size;
33623 + uint64_t gu_total_blocks;
33624 + uint64_t gu_free;
33625 + uint64_t gu_used_dinode;
33626 + uint64_t gu_free_dinode;
33627 + uint64_t gu_used_meta;
33628 + uint64_t gu_free_meta;
33631 +struct gfs_reclaim_stats {
33632 + uint64_t rc_inodes;
33633 + uint64_t rc_metadata;
33636 +struct gfs_quota_name {
33642 + * You can tune a filesystem, but you can't tune a yak.
33645 +#define GFS_TUNE_VERSION ((GFS_IOCTL_VERSION << 16) | (138))
33648 + unsigned int gt_tune_version;
33650 + unsigned int gt_ilimit1;
33651 + unsigned int gt_ilimit1_tries;
33652 + unsigned int gt_ilimit1_min;
33653 + unsigned int gt_ilimit2;
33654 + unsigned int gt_ilimit2_tries;
33655 + unsigned int gt_ilimit2_min;
33656 + unsigned int gt_demote_secs;
33657 + unsigned int gt_incore_log_blocks;
33658 + unsigned int gt_jindex_refresh_secs;
33659 + unsigned int gt_depend_secs;
33660 + unsigned int gt_scand_secs;
33661 + unsigned int gt_recoverd_secs;
33662 + unsigned int gt_logd_secs;
33663 + unsigned int gt_quotad_secs;
33664 + unsigned int gt_inoded_secs;
33665 + unsigned int gt_quota_simul_sync;
33666 + unsigned int gt_quota_warn_period;
33667 + unsigned int gt_atime_quantum;
33668 + unsigned int gt_quota_quantum;
33669 + unsigned int gt_quota_scale_num;
33670 + unsigned int gt_quota_scale_den;
33671 + unsigned int gt_quota_enforce;
33672 + unsigned int gt_quota_account;
33673 + unsigned int gt_new_files_jdata;
33674 + unsigned int gt_new_files_directio;
33675 + unsigned int gt_max_atomic_write;
33676 + unsigned int gt_max_readahead;
33677 + unsigned int gt_lockdump_size;
33678 + unsigned int gt_stall_secs;
33679 + unsigned int gt_complain_secs;
33680 + unsigned int gt_reclaim_limit;
33681 + unsigned int gt_entries_per_readdir;
33682 + unsigned int gt_prefetch_secs;
33683 + unsigned int gt_statfs_slots;
33684 + unsigned int gt_max_mhc;
33688 + * Extended Attribute Ioctl structures
33690 + * Note: The name_len does not include a null character.
33692 + * Getting and setting EAs return the following errors that aren't
33695 + * ENODATA - No such extended attribute
33696 + * ERANGE - Extended attribute data is too large for the buffer
33697 + * ENOSPC - No space left for extended attributes
33698 + * EEXIST - Extended attribute already exists
33701 +#define GFS_EACMD_SET (0)
33702 +#define GFS_EACMD_CREATE (1)
33703 +#define GFS_EACMD_REPLACE (2)
33704 +#define GFS_EACMD_REMOVE (3)
33706 +struct gfs_eaget_io {
33710 + uint32_t eg_data_len;
33711 + uint8_t eg_name_len;
33712 + uint8_t eg_type; /* GFS_EATYPE_... */
33715 +struct gfs_easet_io {
33716 + const char *es_data;
33718 + uint16_t es_data_len;
33719 + uint8_t es_name_len; /* not counting the NULL */
33720 + uint8_t es_cmd; /* GFS_EACMD_... */
33721 + uint8_t es_type; /* GFS_EATYPE_... */
33724 +#define GFS_GLOCKD_DEFAULT (1)
33725 +#define GFS_GLOCKD_MAX (32)
33728 + char ar_lockproto[256]; /* The name of the Lock Protocol */
33729 + char ar_locktable[256]; /* The name of the Lock Table */
33730 + char ar_hostdata[256]; /* The host specific data */
33732 + int ar_ignore_local_fs; /* Ignore the local_fs field in the struct lm_lockops */
33733 + int ar_localflocks; /* let the VFS do flock|fcntl locks for us */
33734 + int ar_localcaching; /* Local-style caching (dangerous on mulithost) */
33736 + int ar_upgrade; /* Upgrade ondisk/multihost format */
33738 + unsigned int ar_num_glockd;
33740 + int ar_posixacls; /* Enable posix acls */
33743 +#endif /* ___GFS_IOCTL_DOT_H__ */
33744 diff -urN linux-orig/include/linux/gfs_ondisk.h linux-patched/include/linux/gfs_ondisk.h
33745 --- linux-orig/include/linux/gfs_ondisk.h 1969-12-31 18:00:00.000000000 -0600
33746 +++ linux-patched/include/linux/gfs_ondisk.h 2004-06-30 13:27:49.341711594 -0500
33748 +/******************************************************************************
33749 +*******************************************************************************
33751 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
33752 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
33754 +** This copyrighted material is made available to anyone wishing to use,
33755 +** modify, copy, or redistribute it subject to the terms and conditions
33756 +** of the GNU General Public License v.2.
33758 +*******************************************************************************
33759 +******************************************************************************/
33763 +* If you add 8 byte fields to these structures, they must be 8 byte
33764 +* aligned. 4 byte field must be 4 byte aligned, etc...
33766 +* All structures must be a multiple of 8 bytes long.
33769 +* We should have forgetten about supporting 512B FS block sizes
33770 +* and made the di_reserved field in the struct gfs_dinode structure
33773 +* de_rec_len in struct gfs_dirent should really have been a 32-bit value
33774 +* as it now limits us to a 64k FS block size (with the current code
33778 +#ifndef __GFS_ONDISK_DOT_H__
33779 +#define __GFS_ONDISK_DOT_H__
33781 +#define GFS_MAGIC (0x01161970)
33782 +#define GFS_BASIC_BLOCK (512)
33783 +#define GFS_BASIC_BLOCK_SHIFT (9)
33784 +#define GFS_DUMPS_PER_LOG (4)
33786 +/* Lock numbers of the LM_TYPE_NONDISK type */
33788 +#define GFS_MOUNT_LOCK (0)
33789 +#define GFS_LIVE_LOCK (1)
33790 +#define GFS_TRANS_LOCK (2)
33791 +#define GFS_RENAME_LOCK (3)
33793 +/* Format numbers for various metadata types */
33795 +#define GFS_FORMAT_SB (100)
33796 +#define GFS_FORMAT_RG (200)
33797 +#define GFS_FORMAT_RB (300)
33798 +#define GFS_FORMAT_DI (400)
33799 +#define GFS_FORMAT_IN (500)
33800 +#define GFS_FORMAT_LF (600)
33801 +#define GFS_FORMAT_JD (700)
33802 +#define GFS_FORMAT_LH (800)
33803 +#define GFS_FORMAT_LD (900)
33804 +/* These don't have actual struct gfs_meta_header structures to go with them */
33805 +#define GFS_FORMAT_JI (1000)
33806 +#define GFS_FORMAT_RI (1100)
33807 +#define GFS_FORMAT_DE (1200)
33808 +#define GFS_FORMAT_QU (1500)
33809 +#define GFS_FORMAT_EA (1600)
33810 +/* These are part of the superblock */
33811 +#define GFS_FORMAT_FS (1309)
33812 +#define GFS_FORMAT_MULTI (1401)
33815 + * An on-disk inode number
33818 +#define gfs_inum_equal(ino1, ino2) \
33819 +(((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \
33820 + ((ino1)->no_addr == (ino2)->no_addr))
33823 + uint64_t no_formal_ino;
33824 + uint64_t no_addr;
33828 + * Generic metadata head structure
33830 + * Every inplace buffer logged in the journal must start with this.
33833 +#define GFS_METATYPE_NONE (0)
33834 +#define GFS_METATYPE_SB (1)
33835 +#define GFS_METATYPE_RG (2)
33836 +#define GFS_METATYPE_RB (3)
33837 +#define GFS_METATYPE_DI (4)
33838 +#define GFS_METATYPE_IN (5)
33839 +#define GFS_METATYPE_LF (6)
33840 +#define GFS_METATYPE_JD (7)
33841 +#define GFS_METATYPE_LH (8)
33842 +#define GFS_METATYPE_LD (9)
33843 +#define GFS_METATYPE_EA (10)
33845 +#define GFS_META_CLUMP (64)
33847 +struct gfs_meta_header {
33848 + uint32_t mh_magic; /* Magic number */
33849 + uint32_t mh_type; /* GFS_METATYPE_XX */
33850 + uint64_t mh_generation; /* Generation number */
33851 + uint32_t mh_format; /* GFS_FORMAT_XX */
33852 + uint32_t mh_incarn;
33856 + * super-block structure
33858 + * It's probably good if SIZEOF_SB <= GFS_BASIC_BLOCK
33861 +/* Address of SuperBlock in GFS basic blocks */
33862 +#define GFS_SB_ADDR (128)
33863 +/* The lock number for the superblock (must be zero) */
33864 +#define GFS_SB_LOCK (0)
33865 +#define GFS_CRAP_LOCK (1)
33867 +/* Requirement: GFS_LOCKNAME_LEN % 8 == 0
33868 + Includes: the fencing zero at the end */
33869 +#define GFS_LOCKNAME_LEN (64)
33872 + /* Order is important */
33873 + struct gfs_meta_header sb_header;
33875 + uint32_t sb_fs_format;
33876 + uint32_t sb_multihost_format;
33877 + uint32_t sb_flags;
33879 + /* Important information */
33880 + uint32_t sb_bsize; /* fundamental fs block size in bytes */
33881 + uint32_t sb_bsize_shift; /* log2(sb_bsize) */
33882 + uint32_t sb_seg_size; /* Journal segment size in FS blocks */
33884 + struct gfs_inum sb_jindex_di; /* journal index inode number (GFS_SB_LOCK) */
33885 + struct gfs_inum sb_rindex_di; /* resource index inode number (GFS_SB_LOCK) */
33886 + struct gfs_inum sb_root_di; /* root directory inode number (GFS_ROOT_LOCK) */
33888 + char sb_lockproto[GFS_LOCKNAME_LEN]; /* Type of locking this FS uses */
33889 + char sb_locktable[GFS_LOCKNAME_LEN]; /* Name of lock table for this FS */
33891 + struct gfs_inum sb_quota_di;
33892 + struct gfs_inum sb_license_di;
33894 + char sb_reserved[96];
33898 + * journal index structure
33901 +struct gfs_jindex {
33902 + uint64_t ji_addr; /* starting block of the journal */
33903 + uint32_t ji_nsegment; /* number of segments in journal */
33906 + char ji_reserved[64];
33910 + * resource index structure
33913 +struct gfs_rindex {
33914 + uint64_t ri_addr; /* rgrp block disk address */
33915 + uint32_t ri_length; /* length of rgrp header in fs blocks */
33918 + uint64_t ri_data1; /* first data location */
33919 + uint32_t ri_data; /* num of data blocks in rgrp */
33921 + uint32_t ri_bitbytes; /* number of bytes in data bitmaps */
33923 + char ri_reserved[64];
33927 + * resource group header structure
33931 +/* Number of blocks per byte in rgrp */
33932 +#define GFS_NBBY (4)
33933 +#define GFS_BIT_SIZE (2)
33934 +#define GFS_BIT_MASK (0x00000003)
33936 +#define GFS_BLKST_FREE (0)
33937 +#define GFS_BLKST_USED (1)
33938 +#define GFS_BLKST_FREEMETA (2)
33939 +#define GFS_BLKST_USEDMETA (3)
33942 + struct gfs_meta_header rg_header;
33944 + uint32_t rg_flags; /* flags */
33946 + uint32_t rg_free; /* number of free data blocks */
33948 + uint32_t rg_useddi; /* number of dinodes */
33949 + uint32_t rg_freedi; /* number of unused dinodes */
33950 + struct gfs_inum rg_freedi_list; /* list of free dinodes */
33952 + uint32_t rg_usedmeta; /* number of used metadata blocks (not including dinodes) */
33953 + uint32_t rg_freemeta; /* number of unused metadata blocks */
33955 + char rg_reserved[64];
33959 + * Quota Structures
33962 +struct gfs_quota {
33963 + uint64_t qu_limit;
33964 + uint64_t qu_warn;
33965 + int64_t qu_value;
33967 + char qu_reserved[64];
33971 + * dinode structure
33974 +#define GFS_MAX_META_HEIGHT (10)
33975 +#define GFS_DIR_MAX_DEPTH (17)
33977 +/* Dinode types */
33978 +#define GFS_FILE_NON (0)
33979 +#define GFS_FILE_REG (1)
33980 +#define GFS_FILE_DIR (2)
33981 +#define GFS_FILE_LNK (5)
33982 +#define GFS_FILE_BLK (7)
33983 +#define GFS_FILE_CHR (8)
33984 +#define GFS_FILE_FIFO (101)
33985 +#define GFS_FILE_SOCK (102)
33987 +/* Dinode flags */
33988 +#define GFS_DIF_JDATA (0x00000001)
33989 +#define GFS_DIF_EXHASH (0x00000002)
33990 +#define GFS_DIF_UNUSED (0x00000004)
33991 +#define GFS_DIF_EA_INDIRECT (0x00000008)
33992 +#define GFS_DIF_DIRECTIO (0x00000010)
33993 +#define GFS_DIF_IMMUTABLE (0x00000020)
33994 +#define GFS_DIF_APPENDONLY (0x00000040)
33995 +#define GFS_DIF_NOATIME (0x00000080)
33996 +#define GFS_DIF_SYNC (0x00000100)
33997 +#define GFS_DIF_INHERIT_DIRECTIO (0x40000000)
33998 +#define GFS_DIF_INHERIT_JDATA (0x80000000)
34000 +struct gfs_dinode {
34001 + struct gfs_meta_header di_header;
34003 + struct gfs_inum di_num;
34005 + uint32_t di_mode; /* mode of file */
34006 + uint32_t di_uid; /* owner's user id */
34007 + uint32_t di_gid; /* owner's group id */
34008 + uint32_t di_nlink; /* number of links to this file */
34009 + uint64_t di_size; /* number of bytes in file */
34010 + uint64_t di_blocks; /* number of blocks in file */
34011 + int64_t di_atime; /* time last accessed */
34012 + int64_t di_mtime; /* time last modified */
34013 + int64_t di_ctime; /* time last changed */
34014 + uint32_t di_major; /* device major number */
34015 + uint32_t di_minor; /* device minor number */
34017 + uint64_t di_rgrp; /* dinode rgrp block number */
34018 + uint64_t di_goal_rgrp; /* rgrp to alloc from next */
34019 + uint32_t di_goal_dblk; /* data block goal */
34020 + uint32_t di_goal_mblk; /* metadata block goal */
34021 + uint32_t di_flags; /* flags */
34022 + uint32_t di_payload_format; /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */
34023 + uint16_t di_type; /* type of file */
34024 + uint16_t di_height; /* height of metadata */
34025 + uint32_t di_incarn; /* incarnation number */
34028 + /* These only apply to directories */
34029 + uint16_t di_depth; /* Number of bits in the table */
34030 + uint32_t di_entries; /* The number of entries in the directory */
34032 + /* This only applies to unused inodes */
34033 + struct gfs_inum di_next_unused;
34035 + uint64_t di_eattr; /* extended attribute block number */
34037 + char di_reserved[56];
34041 + * indirect block header
34044 +struct gfs_indirect {
34045 + struct gfs_meta_header in_header;
34047 + char in_reserved[64];
34051 + * directory structure - many of these per directory file
34054 +#define GFS_FNAMESIZE (255)
34055 +#define GFS_DIRENT_SIZE(name_len) ((sizeof(struct gfs_dirent) + (name_len) + 7) & ~7)
34057 +struct gfs_dirent {
34058 + struct gfs_inum de_inum; /* Inode number */
34059 + uint32_t de_hash; /* hash of the filename */
34060 + uint16_t de_rec_len; /* the length of the dirent */
34061 + uint16_t de_name_len; /* the length of the name */
34062 + uint16_t de_type; /* type of dinode this points to */
34064 + char de_reserved[14];
34068 + * Header of leaf directory nodes
34072 + struct gfs_meta_header lf_header;
34074 + uint16_t lf_depth; /* Depth of leaf */
34075 + uint16_t lf_entries; /* Number of dirents in leaf */
34076 + uint32_t lf_dirent_format; /* Format of the dirents */
34077 + uint64_t lf_next; /* Next leaf, if overflow */
34079 + char lf_reserved[64];
34083 + * Log header structure
34086 +#define GFS_LOG_HEAD_UNMOUNT (0x00000001)
34088 +struct gfs_log_header {
34089 + struct gfs_meta_header lh_header;
34091 + uint32_t lh_flags; /* Flags */
34094 + uint64_t lh_first; /* Block number of first header in this trans */
34095 + uint64_t lh_sequence; /* Sequence number of this transaction */
34097 + uint64_t lh_tail; /* Block number of log tail */
34098 + uint64_t lh_last_dump; /* block number of last dump */
34100 + char lh_reserved[64];
34104 + * Log type descriptor
34107 +#define GFS_LOG_DESC_METADATA (300)
34108 +/* ld_data1 is the number of metadata blocks in the descriptor.
34109 + ld_data2 is unused.
34112 +#define GFS_LOG_DESC_IUL (400)
34113 +/* ld_data1 is TRUE if this is a dump.
34114 + ld_data2 is unused.
34115 + FixMe!!! ld_data1 should be the number of entries.
34116 + ld_data2 should be "TRUE if this is a dump".
34119 +#define GFS_LOG_DESC_IDA (401)
34120 +/* ld_data1 is unused.
34121 + ld_data2 is unused.
34122 + FixMe!!! ld_data1 should be the number of entries.
34125 +#define GFS_LOG_DESC_Q (402)
34126 +/* ld_data1 is the number of quota changes in the descriptor.
34127 + ld_data2 is TRUE if this is a dump.
34130 +#define GFS_LOG_DESC_LAST (500)
34131 +/* ld_data1 is unused.
34132 + ld_data2 is unused.
34135 +struct gfs_log_descriptor {
34136 + struct gfs_meta_header ld_header;
34138 + uint32_t ld_type; /* Type of data in this log chunk */
34139 + uint32_t ld_length; /* Number of buffers in this chunk */
34140 + uint32_t ld_data1; /* descriptor specific field */
34141 + uint32_t ld_data2; /* descriptor specific field */
34143 + char ld_reserved[64];
34147 + * Metadata block tags
34150 +struct gfs_block_tag {
34151 + uint64_t bt_blkno; /* inplace block number */
34152 + uint32_t bt_flags; /* flags */
34157 + * Quota Journal Tag
34160 +#define GFS_QTF_USER (0x00000001)
34162 +struct gfs_quota_tag {
34163 + int64_t qt_change;
34164 + uint32_t qt_flags;
34169 + * Extended attribute header format
34172 +#define GFS_EA_MAX_NAME_LEN (255)
34173 +#define GFS_EA_MAX_DATA_LEN (65535)
34175 +#define GFS_EATYPE_LAST (2)
34177 +#define GFS_EATYPE_UNUSED (0)
34178 +#define GFS_EATYPE_USR (1)
34179 +#define GFS_EATYPE_SYS (2)
34180 +#define GFS_EATYPE_VALID(x) ((x) && (x) <= GFS_EATYPE_LAST) /* this is only
34183 +#define GFS_EAFLAG_LAST (0x01) /* last ea in block */
34185 +struct gfs_ea_header {
34186 + uint32_t ea_rec_len;
34187 + uint32_t ea_data_len;
34188 + uint8_t ea_name_len; /* no NULL pointer after the string */
34189 + uint8_t ea_type; /* GFS_EATYPE_... */
34190 + uint8_t ea_flags;
34191 + uint8_t ea_num_ptrs;
34195 +/* Endian functions */
34197 +#define GFS_ENDIAN_BIG
34199 +#ifdef GFS_ENDIAN_BIG
34201 +#define gfs16_to_cpu be16_to_cpu
34202 +#define gfs32_to_cpu be32_to_cpu
34203 +#define gfs64_to_cpu be64_to_cpu
34205 +#define cpu_to_gfs16 cpu_to_be16
34206 +#define cpu_to_gfs32 cpu_to_be32
34207 +#define cpu_to_gfs64 cpu_to_be64
34209 +#else /* GFS_ENDIAN_BIG */
34211 +#define gfs16_to_cpu le16_to_cpu
34212 +#define gfs32_to_cpu le32_to_cpu
34213 +#define gfs64_to_cpu le64_to_cpu
34215 +#define cpu_to_gfs16 cpu_to_le16
34216 +#define cpu_to_gfs32 cpu_to_le32
34217 +#define cpu_to_gfs64 cpu_to_le64
34219 +#endif /* GFS_ENDIAN_BIG */
34221 +/* Translation functions */
34223 +void gfs_inum_in(struct gfs_inum *no, char *buf);
34224 +void gfs_inum_out(struct gfs_inum *no, char *buf);
34225 +void gfs_meta_header_in(struct gfs_meta_header *mh, char *buf);
34226 +void gfs_meta_header_out(struct gfs_meta_header *mh, char *buf);
34227 +void gfs_sb_in(struct gfs_sb *sb, char *buf);
34228 +void gfs_sb_out(struct gfs_sb *sb, char *buf);
34229 +void gfs_jindex_in(struct gfs_jindex *jindex, char *buf);
34230 +void gfs_jindex_out(struct gfs_jindex *jindex, char *buf);
34231 +void gfs_rindex_in(struct gfs_rindex *rindex, char *buf);
34232 +void gfs_rindex_out(struct gfs_rindex *rindex, char *buf);
34233 +void gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf);
34234 +void gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf);
34235 +void gfs_quota_in(struct gfs_quota *quota, char *buf);
34236 +void gfs_quota_out(struct gfs_quota *quota, char *buf);
34237 +void gfs_dinode_in(struct gfs_dinode *dinode, char *buf);
34238 +void gfs_dinode_out(struct gfs_dinode *dinode, char *buf);
34239 +void gfs_indirect_in(struct gfs_indirect *indirect, char *buf);
34240 +void gfs_indirect_out(struct gfs_indirect *indirect, char *buf);
34241 +void gfs_dirent_in(struct gfs_dirent *dirent, char *buf);
34242 +void gfs_dirent_out(struct gfs_dirent *dirent, char *buf);
34243 +void gfs_leaf_in(struct gfs_leaf *leaf, char *buf);
34244 +void gfs_leaf_out(struct gfs_leaf *leaf, char *buf);
34245 +void gfs_log_header_in(struct gfs_log_header *head, char *buf);
34246 +void gfs_log_header_out(struct gfs_log_header *head, char *buf);
34247 +void gfs_desc_in(struct gfs_log_descriptor *desc, char *buf);
34248 +void gfs_desc_out(struct gfs_log_descriptor *desc, char *buf);
34249 +void gfs_block_tag_in(struct gfs_block_tag *btag, char *buf);
34250 +void gfs_block_tag_out(struct gfs_block_tag *btag, char *buf);
34251 +void gfs_quota_tag_in(struct gfs_quota_tag *qtag, char *buf);
34252 +void gfs_quota_tag_out(struct gfs_quota_tag *qtag, char *buf);
34253 +void gfs_ea_header_in(struct gfs_ea_header *qtag, char *buf);
34254 +void gfs_ea_header_out(struct gfs_ea_header *qtag, char *buf);
34256 +/* Printing functions */
34258 +void gfs_inum_print(struct gfs_inum *no);
34259 +void gfs_meta_header_print(struct gfs_meta_header *mh);
34260 +void gfs_sb_print(struct gfs_sb *sb);
34261 +void gfs_jindex_print(struct gfs_jindex *jindex);
34262 +void gfs_rindex_print(struct gfs_rindex *rindex);
34263 +void gfs_rgrp_print(struct gfs_rgrp *rgrp);
34264 +void gfs_quota_print(struct gfs_quota *quota);
34265 +void gfs_dinode_print(struct gfs_dinode *dinode);
34266 +void gfs_indirect_print(struct gfs_indirect *indirect);
34267 +void gfs_dirent_print(struct gfs_dirent *dirent, char *name);
34268 +void gfs_leaf_print(struct gfs_leaf *leaf);
34269 +void gfs_log_header_print(struct gfs_log_header *head);
34270 +void gfs_desc_print(struct gfs_log_descriptor *desc);
34271 +void gfs_block_tag_print(struct gfs_block_tag *tag);
34272 +void gfs_quota_tag_print(struct gfs_quota_tag *tag);
34273 +void gfs_ea_header_print(struct gfs_ea_header *tag);
34275 +/* The hash function for ExHash directories */
34277 +uint32_t gfs_dir_hash(const char *data, int len);
34279 +#endif /* __GFS_ONDISK_DOT_H__ */
34283 +#ifdef WANT_GFS_CONVERSION_FUNCTIONS
34285 +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));}
34286 +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));}
34287 +#define CPIN_16(s1, s2, member) {(s1->member) = gfs16_to_cpu((s2->member));}
34288 +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_gfs16((s1->member));}
34289 +#define CPIN_32(s1, s2, member) {(s1->member) = gfs32_to_cpu((s2->member));}
34290 +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_gfs32((s1->member));}
34291 +#define CPIN_64(s1, s2, member) {(s1->member) = gfs64_to_cpu((s2->member));}
34292 +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_gfs64((s1->member));}
34294 +#define pa(struct, member, count) print_array(#member, struct->member, count);
34297 + * print_array - Print out an array of bytes
34298 + * @title: what to print before the array
34299 + * @buf: the array
34300 + * @count: the number of bytes
34305 +print_array(char *title, char *buf, int count)
34309 + printk(" %s =\n", title);
34310 + for (x = 0; x < count; x++) {
34311 + printk("%.2X ", (unsigned char)buf[x]);
34312 + if (x % 16 == 15)
34320 + * gfs_inum_in - Read in an inode number
34321 + * @no: the cpu-order structure
34322 + * @buf: the disk-order buffer
34327 +gfs_inum_in(struct gfs_inum *no, char *buf)
34329 + struct gfs_inum *str = (struct gfs_inum *)buf;
34331 + CPIN_64(no, str, no_formal_ino);
34332 + CPIN_64(no, str, no_addr);
34336 + * gfs_inum_out - Write out an inode number
34337 + * @no: the cpu-order structure
34338 + * @buf: the disk-order buffer
34343 +gfs_inum_out(struct gfs_inum *no, char *buf)
34345 + struct gfs_inum *str = (struct gfs_inum *)buf;
34347 + CPOUT_64(no, str, no_formal_ino);
34348 + CPOUT_64(no, str, no_addr);
34352 + * gfs_inum_print - Print out a inode number
34353 + * @no: the cpu-order buffer
34358 +gfs_inum_print(struct gfs_inum *no)
34360 + pv(no, no_formal_ino, "%"PRIu64);
34361 + pv(no, no_addr, "%"PRIu64);
34365 + * gfs_meta_header_in - Read in a metadata header
34366 + * @mh: the cpu-order structure
34367 + * @buf: the disk-order buffer
34372 +gfs_meta_header_in(struct gfs_meta_header *mh, char *buf)
34374 + struct gfs_meta_header *str = (struct gfs_meta_header *)buf;
34376 + CPIN_32(mh, str, mh_magic);
34377 + CPIN_32(mh, str, mh_type);
34378 + CPIN_64(mh, str, mh_generation);
34379 + CPIN_32(mh, str, mh_format);
34380 + CPIN_32(mh, str, mh_incarn);
34384 + * gfs_meta_header_in - Write out a metadata header
34385 + * @mh: the cpu-order structure
34386 + * @buf: the disk-order buffer
34388 + * Don't ever change the generation number in this routine.
34389 + * It's done manually in increment_generation().
34393 +gfs_meta_header_out(struct gfs_meta_header *mh, char *buf)
34395 + struct gfs_meta_header *str = (struct gfs_meta_header *)buf;
34397 + CPOUT_32(mh, str, mh_magic);
34398 + CPOUT_32(mh, str, mh_type);
34400 + /* Don't do this!
34401 + Mh_generation should only be change manually. */
34402 + CPOUT_64(mh, str, mh_generation);
34404 + CPOUT_32(mh, str, mh_format);
34405 + CPOUT_32(mh, str, mh_incarn);
34409 + * gfs_meta_header_print - Print out a metadata header
34410 + * @mh: the cpu-order buffer
34415 +gfs_meta_header_print(struct gfs_meta_header *mh)
34417 + pv(mh, mh_magic, "0x%.8X");
34418 + pv(mh, mh_type, "%u");
34419 + pv(mh, mh_generation, "%"PRIu64);
34420 + pv(mh, mh_format, "%u");
34421 + pv(mh, mh_incarn, "%u");
34425 + * gfs_sb_in - Read in a superblock
34426 + * @sb: the cpu-order structure
34427 + * @buf: the disk-order buffer
34432 +gfs_sb_in(struct gfs_sb *sb, char *buf)
34434 + struct gfs_sb *str = (struct gfs_sb *)buf;
34436 + gfs_meta_header_in(&sb->sb_header, buf);
34438 + CPIN_32(sb, str, sb_fs_format);
34439 + CPIN_32(sb, str, sb_multihost_format);
34440 + CPIN_32(sb, str, sb_flags);
34442 + CPIN_32(sb, str, sb_bsize);
34443 + CPIN_32(sb, str, sb_bsize_shift);
34444 + CPIN_32(sb, str, sb_seg_size);
34446 + gfs_inum_in(&sb->sb_jindex_di, (char *)&str->sb_jindex_di);
34447 + gfs_inum_in(&sb->sb_rindex_di, (char *)&str->sb_rindex_di);
34448 + gfs_inum_in(&sb->sb_root_di, (char *)&str->sb_root_di);
34450 + CPIN_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN);
34451 + CPIN_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN);
34453 + gfs_inum_in(&sb->sb_quota_di, (char *)&str->sb_quota_di);
34454 + gfs_inum_in(&sb->sb_license_di, (char *)&str->sb_license_di);
34456 + CPIN_08(sb, str, sb_reserved, 96);
34460 + * gfs_sb_out - Write out a superblock
34461 + * @sb: the cpu-order structure
34462 + * @buf: the disk-order buffer
34467 +gfs_sb_out(struct gfs_sb *sb, char *buf)
34469 + struct gfs_sb *str = (struct gfs_sb *)buf;
34471 + gfs_meta_header_out(&sb->sb_header, buf);
34473 + CPOUT_32(sb, str, sb_fs_format);
34474 + CPOUT_32(sb, str, sb_multihost_format);
34475 + CPOUT_32(sb, str, sb_flags);
34477 + CPOUT_32(sb, str, sb_bsize);
34478 + CPOUT_32(sb, str, sb_bsize_shift);
34479 + CPOUT_32(sb, str, sb_seg_size);
34481 + gfs_inum_out(&sb->sb_jindex_di, (char *)&str->sb_jindex_di);
34482 + gfs_inum_out(&sb->sb_rindex_di, (char *)&str->sb_rindex_di);
34483 + gfs_inum_out(&sb->sb_root_di, (char *)&str->sb_root_di);
34485 + CPOUT_08(sb, str, sb_lockproto, GFS_LOCKNAME_LEN);
34486 + CPOUT_08(sb, str, sb_locktable, GFS_LOCKNAME_LEN);
34488 + gfs_inum_out(&sb->sb_quota_di, (char *)&str->sb_quota_di);
34489 + gfs_inum_out(&sb->sb_license_di, (char *)&str->sb_license_di);
34491 + CPOUT_08(sb, str, sb_reserved, 96);
34495 + * gfs_sb_print - Print out a superblock
34496 + * @sb: the cpu-order buffer
34501 +gfs_sb_print(struct gfs_sb *sb)
34503 + gfs_meta_header_print(&sb->sb_header);
34505 + pv(sb, sb_fs_format, "%u");
34506 + pv(sb, sb_multihost_format, "%u");
34507 + pv(sb, sb_flags, "%u");
34509 + pv(sb, sb_bsize, "%u");
34510 + pv(sb, sb_bsize_shift, "%u");
34511 + pv(sb, sb_seg_size, "%u");
34513 + gfs_inum_print(&sb->sb_jindex_di);
34514 + gfs_inum_print(&sb->sb_rindex_di);
34515 + gfs_inum_print(&sb->sb_root_di);
34517 + pv(sb, sb_lockproto, "%s");
34518 + pv(sb, sb_locktable, "%s");
34520 + gfs_inum_print(&sb->sb_quota_di);
34521 + gfs_inum_print(&sb->sb_license_di);
34523 + pa(sb, sb_reserved, 96);
34527 + * gfs_jindex_in - Read in a journal index structure
34528 + * @jindex: the cpu-order structure
34529 + * @buf: the disk-order buffer
34534 +gfs_jindex_in(struct gfs_jindex *jindex, char *buf)
34536 + struct gfs_jindex *str = (struct gfs_jindex *)buf;
34538 + CPIN_64(jindex, str, ji_addr);
34539 + CPIN_32(jindex, str, ji_nsegment);
34540 + CPIN_32(jindex, str, ji_pad);
34542 + CPIN_08(jindex, str, ji_reserved, 64);
34546 + * gfs_jindex_out - Write out a journal index structure
34547 + * @jindex: the cpu-order structure
34548 + * @buf: the disk-order buffer
34553 +gfs_jindex_out(struct gfs_jindex *jindex, char *buf)
34555 + struct gfs_jindex *str = (struct gfs_jindex *)buf;
34557 + CPOUT_64(jindex, str, ji_addr);
34558 + CPOUT_32(jindex, str, ji_nsegment);
34559 + CPOUT_32(jindex, str, ji_pad);
34561 + CPOUT_08(jindex, str, ji_reserved, 64);
34565 + * gfs_jindex_print - Print out a journal index structure
34566 + * @ji: the cpu-order buffer
34571 +gfs_jindex_print(struct gfs_jindex *ji)
34573 + pv(ji, ji_addr, "%"PRIu64);
34574 + pv(ji, ji_nsegment, "%u");
34575 + pv(ji, ji_pad, "%u");
34577 + pa(ji, ji_reserved, 64);
34581 + * gfs_rindex_in - Read in a resource index structure
34582 + * @rindex: the cpu-order structure
34583 + * @buf: the disk-order buffer
34588 +gfs_rindex_in(struct gfs_rindex *rindex, char *buf)
34590 + struct gfs_rindex *str = (struct gfs_rindex *)buf;
34592 + CPIN_64(rindex, str, ri_addr);
34593 + CPIN_32(rindex, str, ri_length);
34594 + CPIN_32(rindex, str, ri_pad);
34596 + CPIN_64(rindex, str, ri_data1);
34597 + CPIN_32(rindex, str, ri_data);
34599 + CPIN_32(rindex, str, ri_bitbytes);
34601 + CPIN_08(rindex, str, ri_reserved, 64);
34605 + * gfs_rindex_out - Write out a resource index structure
34606 + * @rindex: the cpu-order structure
34607 + * @buf: the disk-order buffer
34612 +gfs_rindex_out(struct gfs_rindex *rindex, char *buf)
34614 + struct gfs_rindex *str = (struct gfs_rindex *)buf;
34616 + CPOUT_64(rindex, str, ri_addr);
34617 + CPOUT_32(rindex, str, ri_length);
34618 + CPOUT_32(rindex, str, ri_pad);
34620 + CPOUT_64(rindex, str, ri_data1);
34621 + CPOUT_32(rindex, str, ri_data);
34623 + CPOUT_32(rindex, str, ri_bitbytes);
34625 + CPOUT_08(rindex, str, ri_reserved, 64);
34629 + * gfs_rindex_print - Print out a resource index structure
34630 + * @ri: the cpu-order buffer
34635 +gfs_rindex_print(struct gfs_rindex *ri)
34637 + pv(ri, ri_addr, "%"PRIu64);
34638 + pv(ri, ri_length, "%u");
34639 + pv(ri, ri_pad, "%u");
34641 + pv(ri, ri_data1, "%"PRIu64);
34642 + pv(ri, ri_data, "%u");
34644 + pv(ri, ri_bitbytes, "%u");
34646 + pa(ri, ri_reserved, 64);
34650 + * gfs_rgrp_in - Read in a resource group header
34651 + * @rgrp: the cpu-order structure
34652 + * @buf: the disk-order buffer
34657 +gfs_rgrp_in(struct gfs_rgrp *rgrp, char *buf)
34659 + struct gfs_rgrp *str = (struct gfs_rgrp *)buf;
34661 + gfs_meta_header_in(&rgrp->rg_header, buf);
34663 + CPIN_32(rgrp, str, rg_flags);
34665 + CPIN_32(rgrp, str, rg_free);
34667 + CPIN_32(rgrp, str, rg_useddi);
34668 + CPIN_32(rgrp, str, rg_freedi);
34669 + gfs_inum_in(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list);
34671 + CPIN_32(rgrp, str, rg_usedmeta);
34672 + CPIN_32(rgrp, str, rg_freemeta);
34674 + CPIN_08(rgrp, str, rg_reserved, 64);
34678 + * gfs_rgrp_out - Write out a resource group header
34679 + * @rgrp: the cpu-order structure
34680 + * @buf: the disk-order buffer
34685 +gfs_rgrp_out(struct gfs_rgrp *rgrp, char *buf)
34687 + struct gfs_rgrp *str = (struct gfs_rgrp *)buf;
34689 + gfs_meta_header_out(&rgrp->rg_header, buf);
34691 + CPOUT_32(rgrp, str, rg_flags);
34693 + CPOUT_32(rgrp, str, rg_free);
34695 + CPOUT_32(rgrp, str, rg_useddi);
34696 + CPOUT_32(rgrp, str, rg_freedi);
34697 + gfs_inum_out(&rgrp->rg_freedi_list, (char *)&str->rg_freedi_list);
34699 + CPOUT_32(rgrp, str, rg_usedmeta);
34700 + CPOUT_32(rgrp, str, rg_freemeta);
34702 + CPOUT_08(rgrp, str, rg_reserved, 64);
34706 + * gfs_rgrp_print - Print out a resource group header
34707 + * @rg: the cpu-order buffer
34712 +gfs_rgrp_print(struct gfs_rgrp *rg)
34714 + gfs_meta_header_print(&rg->rg_header);
34716 + pv(rg, rg_flags, "%u");
34718 + pv(rg, rg_free, "%u");
34720 + pv(rg, rg_useddi, "%u");
34721 + pv(rg, rg_freedi, "%u");
34722 + gfs_inum_print(&rg->rg_freedi_list);
34724 + pv(rg, rg_usedmeta, "%u");
34725 + pv(rg, rg_freemeta, "%u");
34727 + pa(rg, rg_reserved, 64);
34731 + * gfs_quota_in - Read in a quota structures
34732 + * @quota: the cpu-order structure
34733 + * @buf: the disk-order buffer
34738 +gfs_quota_in(struct gfs_quota *quota, char *buf)
34740 + struct gfs_quota *str = (struct gfs_quota *)buf;
34742 + CPIN_64(quota, str, qu_limit);
34743 + CPIN_64(quota, str, qu_warn);
34744 + CPIN_64(quota, str, qu_value);
34746 + CPIN_08(quota, str, qu_reserved, 64);
34750 + * gfs_quota_out - Write out a quota structure
34751 + * @quota: the cpu-order structure
34752 + * @buf: the disk-order buffer
34757 +gfs_quota_out(struct gfs_quota *quota, char *buf)
34759 + struct gfs_quota *str = (struct gfs_quota *)buf;
34761 + CPOUT_64(quota, str, qu_limit);
34762 + CPOUT_64(quota, str, qu_warn);
34763 + CPOUT_64(quota, str, qu_value);
34765 + CPOUT_08(quota, str, qu_reserved, 64);
34769 + * gfs_quota_print - Print out a quota structure
34770 + * @quota: the cpu-order buffer
34775 +gfs_quota_print(struct gfs_quota *quota)
34777 + pv(quota, qu_limit, "%"PRIu64);
34778 + pv(quota, qu_warn, "%"PRIu64);
34779 + pv(quota, qu_value, "%"PRId64);
34781 + pa(quota, qu_reserved, 64);
34785 + * gfs_dinode_in - Read in a dinode
34786 + * @dinode: the cpu-order structure
34787 + * @buf: the disk-order buffer
34792 +gfs_dinode_in(struct gfs_dinode *dinode, char *buf)
34794 + struct gfs_dinode *str = (struct gfs_dinode *)buf;
34796 + gfs_meta_header_in(&dinode->di_header, buf);
34798 + gfs_inum_in(&dinode->di_num, (char *)&str->di_num);
34800 + CPIN_32(dinode, str, di_mode);
34801 + CPIN_32(dinode, str, di_uid);
34802 + CPIN_32(dinode, str, di_gid);
34803 + CPIN_32(dinode, str, di_nlink);
34804 + CPIN_64(dinode, str, di_size);
34805 + CPIN_64(dinode, str, di_blocks);
34806 + CPIN_64(dinode, str, di_atime);
34807 + CPIN_64(dinode, str, di_mtime);
34808 + CPIN_64(dinode, str, di_ctime);
34809 + CPIN_32(dinode, str, di_major);
34810 + CPIN_32(dinode, str, di_minor);
34812 + CPIN_64(dinode, str, di_rgrp);
34813 + CPIN_64(dinode, str, di_goal_rgrp);
34814 + CPIN_32(dinode, str, di_goal_dblk);
34815 + CPIN_32(dinode, str, di_goal_mblk);
34816 + CPIN_32(dinode, str, di_flags);
34817 + CPIN_32(dinode, str, di_payload_format);
34818 + CPIN_16(dinode, str, di_type);
34819 + CPIN_16(dinode, str, di_height);
34820 + CPIN_32(dinode, str, di_incarn);
34821 + CPIN_16(dinode, str, di_pad);
34823 + CPIN_16(dinode, str, di_depth);
34824 + CPIN_32(dinode, str, di_entries);
34826 + gfs_inum_in(&dinode->di_next_unused, (char *)&str->di_next_unused);
34828 + CPIN_64(dinode, str, di_eattr);
34830 + CPIN_08(dinode, str, di_reserved, 56);
34834 + * gfs_dinode_out - Write out a dinode
34835 + * @dinode: the cpu-order structure
34836 + * @buf: the disk-order buffer
34841 +gfs_dinode_out(struct gfs_dinode *dinode, char *buf)
34843 + struct gfs_dinode *str = (struct gfs_dinode *)buf;
34845 + gfs_meta_header_out(&dinode->di_header, buf);
34847 + gfs_inum_out(&dinode->di_num, (char *)&str->di_num);
34849 + CPOUT_32(dinode, str, di_mode);
34850 + CPOUT_32(dinode, str, di_uid);
34851 + CPOUT_32(dinode, str, di_gid);
34852 + CPOUT_32(dinode, str, di_nlink);
34853 + CPOUT_64(dinode, str, di_size);
34854 + CPOUT_64(dinode, str, di_blocks);
34855 + CPOUT_64(dinode, str, di_atime);
34856 + CPOUT_64(dinode, str, di_mtime);
34857 + CPOUT_64(dinode, str, di_ctime);
34858 + CPOUT_32(dinode, str, di_major);
34859 + CPOUT_32(dinode, str, di_minor);
34861 + CPOUT_64(dinode, str, di_rgrp);
34862 + CPOUT_64(dinode, str, di_goal_rgrp);
34863 + CPOUT_32(dinode, str, di_goal_dblk);
34864 + CPOUT_32(dinode, str, di_goal_mblk);
34865 + CPOUT_32(dinode, str, di_flags);
34866 + CPOUT_32(dinode, str, di_payload_format);
34867 + CPOUT_16(dinode, str, di_type);
34868 + CPOUT_16(dinode, str, di_height);
34869 + CPOUT_32(dinode, str, di_incarn);
34870 + CPOUT_16(dinode, str, di_pad);
34872 + CPOUT_16(dinode, str, di_depth);
34873 + CPOUT_32(dinode, str, di_entries);
34875 + gfs_inum_out(&dinode->di_next_unused, (char *)&str->di_next_unused);
34877 + CPOUT_64(dinode, str, di_eattr);
34879 + CPOUT_08(dinode, str, di_reserved, 56);
34883 + * gfs_dinode_print - Print out a dinode
34884 + * @di: the cpu-order buffer
34889 +gfs_dinode_print(struct gfs_dinode *di)
34891 + gfs_meta_header_print(&di->di_header);
34893 + gfs_inum_print(&di->di_num);
34895 + pv(di, di_mode, "0%o");
34896 + pv(di, di_uid, "%u");
34897 + pv(di, di_gid, "%u");
34898 + pv(di, di_nlink, "%u");
34899 + pv(di, di_size, "%"PRIu64);
34900 + pv(di, di_blocks, "%"PRIu64);
34901 + pv(di, di_atime, "%"PRId64);
34902 + pv(di, di_mtime, "%"PRId64);
34903 + pv(di, di_ctime, "%"PRId64);
34904 + pv(di, di_major, "%u");
34905 + pv(di, di_minor, "%u");
34907 + pv(di, di_rgrp, "%"PRIu64);
34908 + pv(di, di_goal_rgrp, "%"PRIu64);
34909 + pv(di, di_goal_dblk, "%u");
34910 + pv(di, di_goal_mblk, "%u");
34911 + pv(di, di_flags, "0x%.8X");
34912 + pv(di, di_payload_format, "%u");
34913 + pv(di, di_type, "%u");
34914 + pv(di, di_height, "%u");
34915 + pv(di, di_incarn, "%u");
34916 + pv(di, di_pad, "%u");
34918 + pv(di, di_depth, "%u");
34919 + pv(di, di_entries, "%u");
34921 + gfs_inum_print(&di->di_next_unused);
34923 + pv(di, di_eattr, "%"PRIu64);
34925 + pa(di, di_reserved, 56);
34929 + * gfs_indirect_in - copy in the header of an indirect block
34930 + * @indirect: the in memory copy
34931 + * @buf: the buffer copy
34936 +gfs_indirect_in(struct gfs_indirect *indirect, char *buf)
34938 + struct gfs_indirect *str = (struct gfs_indirect *)buf;
34940 + gfs_meta_header_in(&indirect->in_header, buf);
34942 + CPIN_08(indirect, str, in_reserved, 64);
34946 + * gfs_indirect_out - copy out the header of an indirect block
34947 + * @indirect: the in memory copy
34948 + * @buf: the buffer copy
34953 +gfs_indirect_out(struct gfs_indirect *indirect, char *buf)
34955 + struct gfs_indirect *str = (struct gfs_indirect *)buf;
34957 + gfs_meta_header_out(&indirect->in_header, buf);
34959 + CPOUT_08(indirect, str, in_reserved, 64);
34963 + * gfs_indirect_print - Print out a indirect block header
34964 + * @indirect: the cpu-order buffer
34969 +gfs_indirect_print(struct gfs_indirect *indirect)
34971 + gfs_meta_header_print(&indirect->in_header);
34973 + pa(indirect, in_reserved, 64);
34977 + * gfs_dirent_in - Read in a directory entry
34978 + * @dirent: the cpu-order structure
34979 + * @buf: the disk-order buffer
34984 +gfs_dirent_in(struct gfs_dirent *dirent, char *buf)
34986 + struct gfs_dirent *str = (struct gfs_dirent *)buf;
34988 + gfs_inum_in(&dirent->de_inum, (char *)&str->de_inum);
34989 + CPIN_32(dirent, str, de_hash);
34990 + CPIN_16(dirent, str, de_rec_len);
34991 + CPIN_16(dirent, str, de_name_len);
34992 + CPIN_16(dirent, str, de_type);
34994 + CPIN_08(dirent, str, de_reserved, 14);
34998 + * gfs_dirent_out - Write out a directory entry
34999 + * @dirent: the cpu-order structure
35000 + * @buf: the disk-order buffer
35005 +gfs_dirent_out(struct gfs_dirent *dirent, char *buf)
35007 + struct gfs_dirent *str = (struct gfs_dirent *)buf;
35009 + gfs_inum_out(&dirent->de_inum, (char *)&str->de_inum);
35010 + CPOUT_32(dirent, str, de_hash);
35011 + CPOUT_16(dirent, str, de_rec_len);
35012 + CPOUT_16(dirent, str, de_name_len);
35013 + CPOUT_16(dirent, str, de_type);
35015 + CPOUT_08(dirent, str, de_reserved, 14);
35019 + * gfs_dirent_print - Print out a directory entry
35020 + * @de: the cpu-order buffer
35021 + * @name: the filename
35026 +gfs_dirent_print(struct gfs_dirent *de, char *name)
35028 + char buf[GFS_FNAMESIZE + 1];
35030 + gfs_inum_print(&de->de_inum);
35031 + pv(de, de_hash, "0x%.8X");
35032 + pv(de, de_rec_len, "%u");
35033 + pv(de, de_name_len, "%u");
35034 + pv(de, de_type, "%u");
35036 + pa(de, de_reserved, 14);
35038 + memset(buf, 0, GFS_FNAMESIZE + 1);
35039 + memcpy(buf, name, de->de_name_len);
35040 + printk(" name = %s\n", buf);
35044 + * gfs_leaf_in - Read in a directory leaf header
35045 + * @leaf: the cpu-order structure
35046 + * @buf: the disk-order buffer
35051 +gfs_leaf_in(struct gfs_leaf *leaf, char *buf)
35053 + struct gfs_leaf *str = (struct gfs_leaf *)buf;
35055 + gfs_meta_header_in(&leaf->lf_header, buf);
35057 + CPIN_16(leaf, str, lf_depth);
35058 + CPIN_16(leaf, str, lf_entries);
35059 + CPIN_32(leaf, str, lf_dirent_format);
35060 + CPIN_64(leaf, str, lf_next);
35062 + CPIN_08(leaf, str, lf_reserved, 64);
35066 + * gfs_leaf_out - Write out a directory leaf header
35067 + * @leaf: the cpu-order structure
35068 + * @buf: the disk-order buffer
35073 +gfs_leaf_out(struct gfs_leaf *leaf, char *buf)
35075 + struct gfs_leaf *str = (struct gfs_leaf *)buf;
35077 + gfs_meta_header_out(&leaf->lf_header, buf);
35079 + CPOUT_16(leaf, str, lf_depth);
35080 + CPOUT_16(leaf, str, lf_entries);
35081 + CPOUT_32(leaf, str, lf_dirent_format);
35082 + CPOUT_64(leaf, str, lf_next);
35084 + CPOUT_08(leaf, str, lf_reserved, 64);
35088 + * gfs_leaf_print - Print out a directory leaf header
35089 + * @lf: the cpu-order buffer
35094 +gfs_leaf_print(struct gfs_leaf *lf)
35096 + gfs_meta_header_print(&lf->lf_header);
35098 + pv(lf, lf_depth, "%u");
35099 + pv(lf, lf_entries, "%u");
35100 + pv(lf, lf_dirent_format, "%u");
35101 + pv(lf, lf_next, "%"PRIu64);
35103 + pa(lf, lf_reserved, 64);
35107 + * gfs_log_header_in - Read in a log header
35108 + * @head: the cpu-order structure
35109 + * @buf: the disk-order buffer
35114 +gfs_log_header_in(struct gfs_log_header *head, char *buf)
35116 + struct gfs_log_header *str = (struct gfs_log_header *)buf;
35118 + gfs_meta_header_in(&head->lh_header, buf);
35120 + CPIN_32(head, str, lh_flags);
35121 + CPIN_32(head, str, lh_pad);
35123 + CPIN_64(head, str, lh_first);
35124 + CPIN_64(head, str, lh_sequence);
35126 + CPIN_64(head, str, lh_tail);
35127 + CPIN_64(head, str, lh_last_dump);
35129 + CPIN_08(head, str, lh_reserved, 64);
35133 + * gfs_log_header_out - Write out a log header
35134 + * @head: the cpu-order structure
35135 + * @buf: the disk-order buffer
35140 +gfs_log_header_out(struct gfs_log_header *head, char *buf)
35142 + struct gfs_log_header *str = (struct gfs_log_header *)buf;
35144 + gfs_meta_header_out(&head->lh_header, buf);
35146 + CPOUT_32(head, str, lh_flags);
35147 + CPOUT_32(head, str, lh_pad);
35149 + CPOUT_64(head, str, lh_first);
35150 + CPOUT_64(head, str, lh_sequence);
35152 + CPOUT_64(head, str, lh_tail);
35153 + CPOUT_64(head, str, lh_last_dump);
35155 + CPOUT_08(head, str, lh_reserved, 64);
35159 + * gfs_log_header_print - Print out a log header
35160 + * @head: the cpu-order buffer
35165 +gfs_log_header_print(struct gfs_log_header *lh)
35167 + gfs_meta_header_print(&lh->lh_header);
35169 + pv(lh, lh_flags, "0x%.8X");
35170 + pv(lh, lh_pad, "%u");
35172 + pv(lh, lh_first, "%"PRIu64);
35173 + pv(lh, lh_sequence, "%"PRIu64);
35175 + pv(lh, lh_tail, "%"PRIu64);
35176 + pv(lh, lh_last_dump, "%"PRIu64);
35178 + pa(lh, lh_reserved, 64);
35182 + * gfs_desc_in - Read in a log descriptor
35183 + * @desc: the cpu-order structure
35184 + * @buf: the disk-order buffer
35189 +gfs_desc_in(struct gfs_log_descriptor *desc, char *buf)
35191 + struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf;
35193 + gfs_meta_header_in(&desc->ld_header, buf);
35195 + CPIN_32(desc, str, ld_type);
35196 + CPIN_32(desc, str, ld_length);
35197 + CPIN_32(desc, str, ld_data1);
35198 + CPIN_32(desc, str, ld_data2);
35200 + CPIN_08(desc, str, ld_reserved, 64);
35204 + * gfs_desc_out - Write out a log descriptor
35205 + * @desc: the cpu-order structure
35206 + * @buf: the disk-order buffer
35211 +gfs_desc_out(struct gfs_log_descriptor *desc, char *buf)
35213 + struct gfs_log_descriptor *str = (struct gfs_log_descriptor *)buf;
35215 + gfs_meta_header_out(&desc->ld_header, buf);
35217 + CPOUT_32(desc, str, ld_type);
35218 + CPOUT_32(desc, str, ld_length);
35219 + CPOUT_32(desc, str, ld_data1);
35220 + CPOUT_32(desc, str, ld_data2);
35222 + CPOUT_08(desc, str, ld_reserved, 64);
35226 + * gfs_desc_print - Print out a log descriptor
35227 + * @ld: the cpu-order buffer
35232 +gfs_desc_print(struct gfs_log_descriptor *ld)
35234 + gfs_meta_header_print(&ld->ld_header);
35236 + pv(ld, ld_type, "%u");
35237 + pv(ld, ld_length, "%u");
35238 + pv(ld, ld_data1, "%u");
35239 + pv(ld, ld_data2, "%u");
35241 + pa(ld, ld_reserved, 64);
35245 + * gfs_block_tag_in - Read in a block tag
35246 + * @tag: the cpu-order structure
35247 + * @buf: the disk-order buffer
35252 +gfs_block_tag_in(struct gfs_block_tag *tag, char *buf)
35254 + struct gfs_block_tag *str = (struct gfs_block_tag *)buf;
35256 + CPIN_64(tag, str, bt_blkno);
35257 + CPIN_32(tag, str, bt_flags);
35258 + CPIN_32(tag, str, bt_pad);
35262 + * gfs_block_tag_out - Write out a block tag
35263 + * @tag: the cpu-order structure
35264 + * @buf: the disk-order buffer
35269 +gfs_block_tag_out(struct gfs_block_tag *tag, char *buf)
35271 + struct gfs_block_tag *str = (struct gfs_block_tag *)buf;
35273 + CPOUT_64(tag, str, bt_blkno);
35274 + CPOUT_32(tag, str, bt_flags);
35275 + CPOUT_32(tag, str, bt_pad);
35279 + * gfs_block_tag_print - Print out a block tag
35280 + * @tag: the cpu-order buffer
35285 +gfs_block_tag_print(struct gfs_block_tag *tag)
35287 + pv(tag, bt_blkno, "%"PRIu64);
35288 + pv(tag, bt_flags, "%u");
35289 + pv(tag, bt_pad, "%u");
35293 + * gfs_quota_tag_in - Read in a quota tag
35294 + * @tag: the cpu-order structure
35295 + * @buf: the disk-order buffer
35300 +gfs_quota_tag_in(struct gfs_quota_tag *tag, char *buf)
35302 + struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf;
35304 + CPIN_64(tag, str, qt_change);
35305 + CPIN_32(tag, str, qt_flags);
35306 + CPIN_32(tag, str, qt_id);
35310 + * gfs_quota_tag_out - Write out a quota tag
35311 + * @tag: the cpu-order structure
35312 + * @buf: the disk-order buffer
35317 +gfs_quota_tag_out(struct gfs_quota_tag *tag, char *buf)
35319 + struct gfs_quota_tag *str = (struct gfs_quota_tag *)buf;
35321 + CPOUT_64(tag, str, qt_change);
35322 + CPOUT_32(tag, str, qt_flags);
35323 + CPOUT_32(tag, str, qt_id);
35327 + * gfs_quota_tag_print - Print out a quota tag
35328 + * @tag: the cpu-order buffer
35333 +gfs_quota_tag_print(struct gfs_quota_tag *tag)
35335 + pv(tag, qt_change, "%"PRId64);
35336 + pv(tag, qt_flags, "0x%.8X");
35337 + pv(tag, qt_id, "%u");
35341 + * gfs_ea_header_in - Read in a Extended Attribute header
35342 + * @tag: the cpu-order structure
35343 + * @buf: the disk-order buffer
35348 +gfs_ea_header_in(struct gfs_ea_header *ea, char *buf)
35350 + struct gfs_ea_header *str = (struct gfs_ea_header *)buf;
35352 + CPIN_32(ea, str, ea_rec_len);
35353 + CPIN_32(ea, str, ea_data_len);
35354 + ea->ea_name_len = str->ea_name_len;
35355 + ea->ea_type = str->ea_type;
35356 + ea->ea_flags = str->ea_flags;
35357 + ea->ea_num_ptrs = str->ea_num_ptrs;
35358 + CPIN_32(ea, str, ea_pad);
35362 + * gfs_ea_header_out - Write out a Extended Attribute header
35363 + * @ea: the cpu-order structure
35364 + * @buf: the disk-order buffer
35369 +gfs_ea_header_out(struct gfs_ea_header *ea, char *buf)
35371 + struct gfs_ea_header *str = (struct gfs_ea_header *)buf;
35373 + CPOUT_32(ea, str, ea_rec_len);
35374 + CPOUT_32(ea, str, ea_data_len);
35375 + str->ea_name_len = ea->ea_name_len;
35376 + str->ea_type = ea->ea_type;
35377 + str->ea_flags = ea->ea_flags;
35378 + str->ea_num_ptrs = ea->ea_num_ptrs;
35379 + CPOUT_32(ea, str, ea_pad);
35383 + * gfs_ea_header_printt - Print out a Extended Attribute header
35384 + * @ea: the cpu-order buffer
35389 +gfs_ea_header_print(struct gfs_ea_header *ea)
35391 + pv(ea, ea_rec_len, "%u");
35392 + pv(ea, ea_data_len, "%u");
35393 + pv(ea, ea_name_len, "%u");
35394 + pv(ea, ea_type, "%u");
35395 + pv(ea, ea_flags, "%u");
35396 + pv(ea, ea_num_ptrs, "%u");
35397 + pv(ea, ea_pad, "%u");
35400 +static const uint32_t crc_32_tab[] =
35402 + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
35403 + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
35404 + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
35405 + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
35406 + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
35407 + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
35408 + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
35409 + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
35410 + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
35411 + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
35412 + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
35413 + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
35414 + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
35415 + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
35416 + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
35417 + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
35418 + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
35419 + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
35420 + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
35421 + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
35422 + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
35423 + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
35424 + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
35425 + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
35426 + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
35427 + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
35428 + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
35429 + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
35430 + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
35431 + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
35432 + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
35433 + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
35437 + * gfs_dir_hash - hash an array of data
35438 + * @data: the data to be hashed
35439 + * @len: the length of data to be hashed
35441 + * Take some data and convert it to a 32-bit hash.
35443 + * The hash function is a 32-bit CRC of the data. The algorithm uses
35444 + * the crc_32_tab table above.
35446 + * This may not be the fastest hash function, but it does a fair bit better
35447 + * at providing uniform results than the others I've looked at. That's
35448 + * really important for efficient directories.
35450 + * Returns: the hash
35454 +gfs_dir_hash(const char *data, int len)
35456 + uint32_t hash = 0xFFFFFFFF;
35458 + for (; len--; data++)
35459 + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
35466 +#endif /* WANT_GFS_CONVERSION_FUNCTIONS */
35468 diff -urN linux-orig/fs/gfs_locking/lock_dlm/group.c linux-patched/fs/gfs_locking/lock_dlm/group.c
35469 --- linux-orig/fs/gfs_locking/lock_dlm/group.c 1969-12-31 18:00:00.000000000 -0600
35470 +++ linux-patched/fs/gfs_locking/lock_dlm/group.c 2004-06-16 12:03:17.967822065 -0500
35472 +/******************************************************************************
35473 +*******************************************************************************
35475 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
35476 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
35478 +** This copyrighted material is made available to anyone wishing to use,
35479 +** modify, copy, or redistribute it subject to the terms and conditions
35480 +** of the GNU General Public License v.2.
35482 +*******************************************************************************
35483 +******************************************************************************/
35485 +#include <linux/socket.h>
35486 +#include <net/sock.h>
35488 +#include "lock_dlm.h"
35489 +#include <cluster/cnxman.h>
35490 +#include <cluster/service.h>
35493 +struct kcl_service_ops mg_ops;
35496 + * Get the node struct for a given nodeid.
35499 +static dlm_node_t *find_node_by_nodeid(dlm_t *dlm, uint32_t nodeid)
35501 + dlm_node_t *node;
35503 + list_for_each_entry(node, &dlm->mg_nodes, list) {
35504 + if (node->nodeid == nodeid)
35511 + * Get the node struct for a given journalid.
35514 +static dlm_node_t *find_node_by_jid(dlm_t *dlm, uint32_t jid)
35516 + dlm_node_t *node;
35518 + list_for_each_entry(node, &dlm->mg_nodes, list) {
35519 + if (node->jid == jid)
35526 + * If the given ID is clear, get it, setting to the given VALUE. The ID is a
35527 + * journalid, the VALUE is our nodeid. When successful, the held ID-lock is
35528 + * returned (in shared mode). As long as this ID-lock is held, the journalid
35532 +static int id_test_and_set(dlm_t *dlm, uint32_t id, uint32_t val,
35533 + dlm_lock_t **lp_set)
35535 + dlm_lock_t *lp = NULL;
35536 + struct lm_lockname name;
35539 + uint32_t exist_val, beval;
35542 + name.ln_type = LM_TYPE_JID;
35543 + name.ln_number = id;
35545 + error = lm_dlm_get_lock(dlm, &name, &lock);
35549 + error = lm_dlm_hold_lvb(lock, &lvb);
35553 + lp = (dlm_lock_t *) lock;
35554 + set_bit(LFL_IDLOCK, &lp->flags);
35558 + error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED,
35559 + LM_FLAG_TRY | LM_FLAG_NOEXP);
35560 + if (error == -EAGAIN) {
35561 + current->state = TASK_UNINTERRUPTIBLE;
35562 + schedule_timeout(HZ);
35566 + goto fail_unhold;
35568 + memcpy(&beval, lvb, sizeof(beval));
35569 + exist_val = be32_to_cpu(beval);
35571 + if (!exist_val) {
35573 + * This id is unused. Attempt to claim it by getting EX mode
35574 + * and writing our nodeid into the lvb.
35576 + error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE,
35577 + LM_FLAG_TRY | LM_FLAG_NOEXP);
35578 + if (error == -EAGAIN) {
35579 + lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35580 + current->state = TASK_UNINTERRUPTIBLE;
35581 + schedule_timeout(HZ);
35585 + goto fail_unlock;
35587 + beval = cpu_to_be32(val);
35588 + memcpy(lvb, &beval, sizeof(beval));
35590 + error = lm_dlm_lock_sync(lock, LM_ST_EXCLUSIVE, LM_ST_SHARED,
35592 + DLM_ASSERT(!error,);
35598 + * This id is already used. It has a non-zero nodeid in the lvb
35600 + lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35601 + lm_dlm_unhold_lvb(lock, lvb);
35602 + lm_dlm_put_lock(lock);
35603 + error = exist_val;
35609 + lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35612 + lm_dlm_unhold_lvb(lock, lvb);
35615 + lm_dlm_put_lock(lock);
35622 + * Release a held ID-lock clearing its VALUE. We have to acquire the lock in
35623 + * EX again so we can write out a zeroed lvb.
35626 +static void id_clear(dlm_t *dlm, dlm_lock_t *lp)
35628 + lm_lock_t *lock = (lm_lock_t *) lp;
35632 + * This flag means that DLM_LKF_CONVDEADLK should not be used.
35634 + set_bit(LFL_FORCE_PROMOTE, &lp->flags);
35638 + error = lm_dlm_lock_sync(lock, LM_ST_SHARED, LM_ST_EXCLUSIVE,
35639 + LM_FLAG_TRY | LM_FLAG_NOEXP);
35640 + if (error == -EAGAIN) {
35647 + memset(lp->lvb, 0, DLM_LVB_LEN);
35648 + lm_dlm_unlock_sync(lock, LM_ST_EXCLUSIVE);
35651 + lm_dlm_unhold_lvb(lock, lp->lvb);
35652 + lm_dlm_put_lock(lock);
35656 + * Get the VALUE for a given ID. The ID is a journalid, the VALUE is a nodeid.
35659 +static int id_value(dlm_t *dlm, uint32_t id, uint32_t *val)
35661 + dlm_lock_t *lp = NULL;
35662 + struct lm_lockname name;
35668 + name.ln_type = LM_TYPE_JID;
35669 + name.ln_number = id;
35671 + error = lm_dlm_get_lock(dlm, &name, &lock);
35675 + error = lm_dlm_hold_lvb(lock, &lvb);
35679 + lp = (dlm_lock_t *) lock;
35680 + set_bit(LFL_IDLOCK, &lp->flags);
35684 + error = lm_dlm_lock_sync(lock, LM_ST_UNLOCKED, LM_ST_SHARED,
35685 + LM_FLAG_TRY | LM_FLAG_NOEXP);
35686 + if (error == -EAGAIN) {
35687 + current->state = TASK_UNINTERRUPTIBLE;
35688 + schedule_timeout(HZ);
35694 + memcpy(&beval, lvb, sizeof(beval));
35695 + *val = be32_to_cpu(beval);
35697 + lm_dlm_unlock_sync(lock, LM_ST_SHARED);
35702 + lm_dlm_unhold_lvb(lock, lvb);
35705 + lm_dlm_put_lock(lock);
35712 + * Find an ID with a given VALUE. The ID is a journalid, the VALUE is a
35716 +static int id_find(dlm_t *dlm, uint32_t value, uint32_t *id_out)
35718 + uint32_t val, id;
35719 + int error = 0, found = FALSE;
35721 + for (id = 0; id < dlm->max_nodes; id++) {
35722 + error = id_value(dlm, id, &val);
35726 + if (val == value) {
35734 + if (!error && !found)
35741 + * Get a journalid to use. The journalid must be owned exclusively as long as
35742 + * this fs is mounted. Other nodes must be able to discover our nodeid as the
35743 + * owner of the journalid. The journalid we claim should have the lowest value
35744 + * of all unused journalids.
35747 +static int claim_jid(dlm_t *dlm)
35749 + dlm_node_t *node;
35753 + DLM_ASSERT(dlm->our_nodeid,);
35756 + * Search an arbitrary number (8) past max nodes so we're sure to find
35757 + * one so we can let the GFS handle the "too big jid" error and fail
35761 + for (id = 0; id < dlm->max_nodes + 8; id++) {
35762 + error = id_test_and_set(dlm, id, dlm->our_nodeid, &dlm->jid_lock);
35769 + node = find_node_by_nodeid(dlm, dlm->our_nodeid);
35771 + set_bit(NFL_HAVE_JID, &node->flags);
35776 + * If we have a problem getting a jid, pick a bogus one which should
35777 + * cause GFS to complain and fail to mount.
35781 + printk("lock_dlm: %s: no journal id available (%d)\n",
35782 + dlm->fsname, error);
35783 + dlm->jid = dlm->max_nodes + dlm->our_nodeid;
35786 + log_debug("claim_jid %u", dlm->jid);
35791 + * Release our journalid, allowing it to be used by a node subsequently
35792 + * mounting the fs.
35795 +static void release_jid(dlm_t *dlm)
35797 + id_clear(dlm, dlm->jid_lock);
35798 + dlm->jid_lock = NULL;
35802 + * For all nodes in the mountgroup, find the journalid being used by each.
35805 +static int discover_jids(dlm_t *dlm)
35807 + dlm_node_t *node;
35809 + int error, notfound = 0;
35811 + list_for_each_entry(node, &dlm->mg_nodes, list) {
35812 + if (test_bit(NFL_HAVE_JID, &node->flags))
35815 + error = id_find(dlm, node->nodeid, &id);
35817 + log_debug("jid for node %d not found", node->nodeid);
35823 + set_bit(NFL_HAVE_JID, &node->flags);
35830 + * Discover the nodeid that we've been assigned by the cluster manager.
35833 +static int get_our_nodeid(dlm_t *dlm)
35835 + LIST_HEAD(cur_memb);
35836 + struct kcl_cluster_node *cur_node;
35838 + kcl_get_members(&cur_memb);
35840 + list_for_each_entry(cur_node, &cur_memb, list) {
35841 + if (cur_node->us) {
35842 + dlm->our_nodeid = cur_node->node_id;
35847 + while (!list_empty(&cur_memb)) {
35848 + cur_node = list_entry(cur_memb.next, struct kcl_cluster_node,
35850 + list_del(&cur_node->list);
35858 + * Run in dlm_async thread
35861 +void process_start(dlm_t *dlm, dlm_start_t *ds)
35863 + dlm_node_t *node;
35865 + int last_stop, last_start, error, i, new = FALSE, found;
35868 + log_debug("start c %d type %d e %d", ds->count, ds->type, ds->event_id);
35871 + * gfs won't do journal recoveries once it's sent us an unmount
35874 + if (test_bit(DFL_UMOUNT, &dlm->flags)) {
35875 + log_debug("process_start %d skip for umount", ds->event_id);
35876 + kcl_start_done(dlm->mg_local_id, ds->event_id);
35881 + * check if first start
35884 + if (!test_and_set_bit(DFL_GOT_NODEID, &dlm->flags)) {
35885 + get_our_nodeid(dlm);
35886 + if (ds->count == 1)
35887 + set_bit(DFL_FIRST_MOUNT, &dlm->flags);
35890 + down(&dlm->mg_nodes_lock);
35893 + * find nodes which are gone
35896 + list_for_each_entry(node, &dlm->mg_nodes, list) {
35898 + for (i = 0; i < ds->count; i++) {
35899 + if (node->nodeid != ds->nodeids[i])
35905 + /* node is still a member */
35909 + set_bit(NFL_NOT_MEMBER, &node->flags);
35911 + /* no gfs recovery needed for nodes that left cleanly */
35912 + if (ds->type != SERVICE_NODE_FAILED)
35915 + /* callbacks sent only for nodes in last completed MG */
35916 + if (!test_bit(NFL_LAST_FINISH, &node->flags))
35919 + /* only send a single callback per node */
35920 + if (test_and_set_bit(NFL_SENT_CB, &node->flags))
35923 + dlm->fscb(dlm->fsdata, LM_CB_NEED_RECOVERY, &node->jid);
35924 + set_bit(DFL_NEED_STARTDONE, &dlm->flags);
35925 + log_debug("cb_need_recovery jid %u", node->jid);
35932 + for (i = 0; i < ds->count; i++) {
35933 + nodeid = ds->nodeids[i];
35935 + node = find_node_by_nodeid(dlm, nodeid);
35939 + DLM_RETRY(node = kmalloc(sizeof(dlm_node_t), GFP_KERNEL), node);
35941 + memset(node, 0, sizeof(dlm_node_t));
35943 + node->nodeid = nodeid;
35944 + list_add(&node->list, &dlm->mg_nodes);
35948 + up(&dlm->mg_nodes_lock);
35951 + * get a jid for ourself when started for first time
35954 + if (!test_and_set_bit(DFL_HAVE_JID, &dlm->flags))
35957 + /* give new nodes a little time to claim a jid */
35958 + current->state = TASK_INTERRUPTIBLE;
35959 + schedule_timeout(HZ);
35963 + * find jid's of new nodes
35967 + /* we don't need to do these jid lookups if this start has been
35968 + followed by a stop event (and thus cancelled) */
35970 + spin_lock(&dlm->async_lock);
35971 + last_stop = dlm->mg_last_stop;
35972 + last_start = dlm->mg_last_start;
35973 + spin_unlock(&dlm->async_lock);
35975 + if (last_stop >= ds->event_id)
35978 + error = discover_jids(dlm);
35980 + /* Not all jids were found. Wait for a time to let all
35981 + new nodes claim_jid, then try to scan for jids
35983 + current->state = TASK_INTERRUPTIBLE;
35984 + schedule_timeout(HZ);
35991 + * tell SM we're done if there are no GFS recoveries to wait for
35994 + if (last_start > last_stop) {
35996 + down(&dlm->mg_nodes_lock);
35998 + list_for_each_entry(node, &dlm->mg_nodes, list) {
35999 + if (!test_bit(NFL_SENT_CB, &node->flags))
36004 + up(&dlm->mg_nodes_lock);
36007 + kcl_start_done(dlm->mg_local_id, ds->event_id);
36011 + kfree(ds->nodeids);
36015 +void process_finish(dlm_t *dlm)
36017 + struct list_head *tmp, *tmpsafe;
36018 + dlm_node_t *node;
36021 + spin_lock(&dlm->async_lock);
36022 + clear_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36024 + list_for_each_safe(tmp, tmpsafe, &dlm->delayed) {
36025 + lp = list_entry(tmp, dlm_lock_t, dlist);
36027 + if (lp->type != QUEUE_LOCKS_BLOCKED)
36031 + list_del(&lp->dlist);
36032 + list_add_tail(&lp->slist, &dlm->submit);
36034 + clear_bit(LFL_DLIST, &lp->flags);
36035 + set_bit(LFL_SLIST, &lp->flags);
36037 + spin_unlock(&dlm->async_lock);
36039 + down(&dlm->mg_nodes_lock);
36041 + list_for_each_safe(tmp, tmpsafe, &dlm->mg_nodes) {
36042 + node = list_entry(tmp, dlm_node_t, list);
36044 + if (test_bit(NFL_NOT_MEMBER, &node->flags)) {
36045 + list_del(&node->list);
36048 + set_bit(NFL_LAST_FINISH, &node->flags);
36050 + up(&dlm->mg_nodes_lock);
36052 + wake_up(&dlm->wait);
36056 + * Run in user process
36059 +int init_mountgroup(dlm_t *dlm)
36064 + error = kcl_register_service(dlm->fsname, dlm->fnlen, SERVICE_LEVEL_GFS,
36065 + &mg_ops, TRUE, (void *) dlm, &id);
36069 + dlm->mg_local_id = id;
36071 + /* BLOCK_LOCKS is cleared when the join is finished */
36072 + set_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36074 + error = kcl_join_service(id);
36078 + if (test_bit(DFL_START_ERROR, &dlm->flags))
36084 + kcl_leave_service(dlm->mg_local_id);
36087 + kcl_unregister_service(id);
36090 + printk("lock_dlm: service error %d\n", error);
36094 +void release_mountgroup(dlm_t *dlm)
36096 + int last_start, last_stop;
36098 + /* this flag causes a kcl_start_done() to be sent right away for
36099 + any start callbacks we get from SM */
36101 + log_debug("umount flags %lx", dlm->flags);
36102 + set_bit(DFL_UMOUNT, &dlm->flags);
36104 + /* gfs has done a unmount and will not call jid_recovery_done()
36105 + any longer so make necessary kcl_start_done() calls so
36106 + kcl_leave_service() will complete */
36108 + spin_lock(&dlm->async_lock);
36109 + last_start = dlm->mg_last_start;
36110 + last_stop = dlm->mg_last_stop;
36111 + spin_unlock(&dlm->async_lock);
36113 + if ((last_start > last_stop) &&
36114 + test_and_clear_bit(DFL_NEED_STARTDONE, &dlm->flags)) {
36115 + log_debug("umount doing start_done %d", last_start);
36116 + kcl_start_done(dlm->mg_local_id, last_start);
36119 + kcl_leave_service(dlm->mg_local_id);
36120 + kcl_unregister_service(dlm->mg_local_id);
36121 + release_jid(dlm);
36125 + * Run in GFS thread
36128 +void jid_recovery_done(dlm_t *dlm, unsigned int jid, unsigned int message)
36130 + dlm_node_t *node;
36131 + int last_start, last_stop;
36134 + log_debug("recovery_done jid %u msg %u", jid, message);
36136 + node = find_node_by_jid(dlm, jid);
36140 + log_debug("recovery_done %u,%u f %lx", jid, node->nodeid, node->flags);
36142 + if (!test_bit(NFL_SENT_CB, &node->flags))
36145 + if (!test_bit(NFL_NOT_MEMBER, &node->flags))
36148 + set_bit(NFL_RECOVERY_DONE, &node->flags);
36151 + * when recovery is done for all nodes, we're done with the start
36154 + down(&dlm->mg_nodes_lock);
36156 + list_for_each_entry(node, &dlm->mg_nodes, list) {
36157 + if (test_bit(NFL_SENT_CB, &node->flags) &&
36158 + !test_bit(NFL_RECOVERY_DONE, &node->flags))
36161 + up(&dlm->mg_nodes_lock);
36164 + /* don't send a start_done if there's since been a stop which
36165 + * cancels this start */
36167 + spin_lock(&dlm->async_lock);
36168 + last_start = dlm->mg_last_start;
36169 + last_stop = dlm->mg_last_stop;
36170 + spin_unlock(&dlm->async_lock);
36172 + if (last_start > last_stop) {
36173 + log_debug("recovery_done start_done %d", last_start);
36174 + kcl_start_done(dlm->mg_local_id, last_start);
36175 + clear_bit(DFL_NEED_STARTDONE, &dlm->flags);
36184 + * Run in CMAN SM thread
36187 +static void queue_start(dlm_t *dlm, uint32_t *nodeids, int count,
36188 + int event_id, int type)
36192 + DLM_RETRY(ds = kmalloc(sizeof(dlm_start_t), GFP_KERNEL), ds);
36194 + memset(ds, 0, sizeof(dlm_start_t));
36196 + ds->nodeids = nodeids;
36197 + ds->count = count;
36198 + ds->event_id = event_id;
36201 + spin_lock(&dlm->async_lock);
36202 + dlm->mg_last_start = event_id;
36203 + list_add_tail(&ds->list, &dlm->starts);
36204 + spin_unlock(&dlm->async_lock);
36206 + wake_up(&dlm->wait);
36209 +static int mg_stop(void *data)
36211 + dlm_t *dlm = (dlm_t *) data;
36213 + spin_lock(&dlm->async_lock);
36214 + set_bit(DFL_BLOCK_LOCKS, &dlm->flags);
36215 + dlm->mg_last_stop = dlm->mg_last_start;
36216 + spin_unlock(&dlm->async_lock);
36221 +static int mg_start(void *data, uint32_t *nodeids, int count, int event_id,
36224 + dlm_t *dlm = (dlm_t *) data;
36226 + queue_start(dlm, nodeids, count, event_id, type);
36231 +static void mg_finish(void *data, int event_id)
36233 + dlm_t *dlm = (dlm_t *) data;
36235 + spin_lock(&dlm->async_lock);
36236 + dlm->mg_last_finish = event_id;
36237 + set_bit(DFL_MG_FINISH, &dlm->flags);
36238 + spin_unlock(&dlm->async_lock);
36240 + wake_up(&dlm->wait);
36243 +struct kcl_service_ops mg_ops = {
36245 + .start = mg_start,
36246 + .finish = mg_finish
36248 diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock.c linux-patched/fs/gfs_locking/lock_dlm/lock.c
36249 --- linux-orig/fs/gfs_locking/lock_dlm/lock.c 1969-12-31 18:00:00.000000000 -0600
36250 +++ linux-patched/fs/gfs_locking/lock_dlm/lock.c 2004-06-16 12:03:17.967822065 -0500
36252 +/******************************************************************************
36253 +*******************************************************************************
36255 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
36256 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
36258 +** This copyrighted material is made available to anyone wishing to use,
36259 +** modify, copy, or redistribute it subject to the terms and conditions
36260 +** of the GNU General Public License v.2.
36262 +*******************************************************************************
36263 +******************************************************************************/
36265 +#include "lock_dlm.h"
36268 + * Run in DLM thread
36271 +static void queue_complete(dlm_lock_t *lp)
36273 + dlm_t *dlm = lp->dlm;
36275 + clear_bit(LFL_WAIT_COMPLETE, &lp->flags);
36277 + spin_lock(&dlm->async_lock);
36278 + list_add_tail(&lp->clist, &dlm->complete);
36279 + set_bit(LFL_CLIST, &lp->flags);
36280 + spin_unlock(&dlm->async_lock);
36281 + wake_up(&dlm->wait);
36284 +static void queue_blocking(dlm_lock_t *lp, int mode)
36286 + dlm_t *dlm = lp->dlm;
36288 + if (test_bit(LFL_WAIT_COMPLETE, &lp->flags)) {
36289 + /* We often receive basts for EX while we're promoting
36290 + from SH to EX. */
36291 + /* printk("lock_dlm: bast before complete %x,%"PRIx64" "
36292 + "gr=%d rq=%d bast=%d\n", lp->lockname.ln_type,
36293 + lp->lockname.ln_number, lp->cur, lp->req, mode); */
36297 + spin_lock(&dlm->async_lock);
36299 + if (!lp->bast_mode) {
36300 + list_add_tail(&lp->blist, &dlm->blocking);
36301 + set_bit(LFL_BLIST, &lp->flags);
36302 + lp->bast_mode = mode;
36303 + } else if (lp->bast_mode < mode)
36304 + lp->bast_mode = mode;
36306 + spin_unlock(&dlm->async_lock);
36307 + wake_up(&dlm->wait);
36310 +static __inline__ void lock_ast(void *astargs)
36312 + dlm_lock_t *lp = (dlm_lock_t *) astargs;
36313 + queue_complete(lp);
36316 +static __inline__ void lock_bast(void *astargs, int mode)
36318 + dlm_lock_t *lp = (dlm_lock_t *) astargs;
36319 + queue_blocking(lp, mode);
36323 + * Run in GFS or user thread
36327 + * queue_delayed - add request to queue to be submitted later
36329 + * @type: the reason the lock is blocked
36331 + * Queue of locks which need submitting sometime later. Locks here
36332 + * due to BLOCKED_LOCKS are moved to request queue when recovery is
36333 + * done. Locks here due to an ERROR are moved to request queue after
36334 + * some delay. This could also be called from dlm_async thread.
36337 +void queue_delayed(dlm_lock_t *lp, int type)
36339 + dlm_t *dlm = lp->dlm;
36343 + spin_lock(&dlm->async_lock);
36344 + list_add_tail(&lp->dlist, &dlm->delayed);
36345 + set_bit(LFL_DLIST, &lp->flags);
36346 + spin_unlock(&dlm->async_lock);
36350 + * make_mode - convert to DLM_LOCK_
36351 + * @lmstate: GFS lock state
36353 + * Returns: DLM lock mode
36356 +static int16_t make_mode(int16_t lmstate)
36358 + switch (lmstate) {
36359 + case LM_ST_UNLOCKED:
36360 + return DLM_LOCK_NL;
36361 + case LM_ST_EXCLUSIVE:
36362 + return DLM_LOCK_EX;
36363 + case LM_ST_DEFERRED:
36364 + return DLM_LOCK_CW;
36365 + case LM_ST_SHARED:
36366 + return DLM_LOCK_PR;
36368 + DLM_ASSERT(0, printk("unknown LM state %d\n", lmstate););
36373 + * make_lmstate - convert to LM_ST_
36374 + * @dlmmode: DLM lock mode
36376 + * Returns: GFS lock state
36379 +int16_t make_lmstate(int16_t dlmmode)
36381 + switch (dlmmode) {
36382 + case DLM_LOCK_IV:
36383 + case DLM_LOCK_NL:
36384 + return LM_ST_UNLOCKED;
36385 + case DLM_LOCK_EX:
36386 + return LM_ST_EXCLUSIVE;
36387 + case DLM_LOCK_CW:
36388 + return LM_ST_DEFERRED;
36389 + case DLM_LOCK_PR:
36390 + return LM_ST_SHARED;
36392 + DLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode););
36397 + * check_cur_state - verify agreement with GFS on the current lock state
36398 + * @lp: the DLM lock
36399 + * @cur_state: the current lock state from GFS
36401 + * NB: DLM_LOCK_NL and DLM_LOCK_IV are both considered
36402 + * LM_ST_UNLOCKED by GFS.
36406 +static void check_cur_state(dlm_lock_t *lp, unsigned int cur_state)
36408 + int16_t cur = make_mode(cur_state);
36409 + if (lp->cur != DLM_LOCK_IV)
36410 + DLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur););
36414 + * make_flags - put together necessary DLM flags
36416 + * @gfs_flags: GFS flags
36417 + * @cur: current DLM lock mode
36418 + * @req: requested DLM lock mode
36420 + * Returns: DLM flags
36423 +static unsigned int make_flags(dlm_lock_t *lp, unsigned int gfs_flags,
36424 + int16_t cur, int16_t req)
36426 + unsigned int lkf = 0;
36428 + if (gfs_flags & LM_FLAG_TRY)
36429 + lkf |= DLM_LKF_NOQUEUE;
36431 + if (gfs_flags & LM_FLAG_TRY_1CB) {
36432 + lkf |= DLM_LKF_NOQUEUE;
36433 + lkf |= DLM_LKF_NOQUEUEBAST;
36436 + if (lp->lksb.sb_lkid != 0) {
36437 + lkf |= DLM_LKF_CONVERT;
36439 + if (gfs_flags & LM_FLAG_PRIORITY)
36440 + lkf |= DLM_LKF_EXPEDITE;
36441 + else if (req > cur)
36442 + lkf |= DLM_LKF_QUECVT;
36444 + /* Conversion deadlock avoidance by DLM */
36446 + if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
36447 + cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
36448 + lkf |= DLM_LKF_CONVDEADLK;
36452 + lkf |= DLM_LKF_VALBLK;
36458 + * make_strname - convert GFS lock numbers to string
36459 + * @lockname: the lock type/number
36460 + * @str: the lock string/length
36464 +static __inline__ void make_strname(struct lm_lockname *lockname,
36467 + sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type,
36468 + lockname->ln_number);
36469 + str->namelen = LOCK_DLM_STRNAME_BYTES;
36472 +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp)
36476 + lp = kmalloc(sizeof(dlm_lock_t), GFP_KERNEL);
36480 + memset(lp, 0, sizeof(dlm_lock_t));
36481 + lp->lockname = *name;
36483 + lp->cur = DLM_LOCK_IV;
36484 + init_completion(&lp->uast_wait);
36490 + * dlm_get_lock - get a lm_lock_t given a descripton of the lock
36491 + * @lockspace: the lockspace the lock lives in
36492 + * @name: the name of the lock
36493 + * @lockp: return the lm_lock_t here
36495 + * Returns: 0 on success, -EXXX on failure
36498 +int lm_dlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
36499 + lm_lock_t **lockp)
36504 + error = create_lp((dlm_t *) lockspace, name, &lp);
36506 + *lockp = (lm_lock_t *) lp;
36510 +int do_unlock(dlm_lock_t *lp)
36514 + init_completion(&lp->uast_wait);
36516 + set_bit(LFL_DLM_UNLOCK, &lp->flags);
36518 + error = dlm_unlock(lp->dlm->gdlm_lsp, lp->lksb.sb_lkid, 0, &lp->lksb,
36521 + DLM_ASSERT(!error, printk("%s: error=%d num=%x,%"PRIx64"\n",
36522 + lp->dlm->fsname, error, lp->lockname.ln_type,
36523 + lp->lockname.ln_number););
36525 + wait_for_completion(&lp->uast_wait);
36527 + spin_lock(&lp->dlm->async_lock);
36528 + if (test_bit(LFL_CLIST, &lp->flags)) {
36529 + printk("lock_dlm: dlm_put_lock lp on clist num=%x,%"PRIx64"\n", lp->lockname.ln_type, lp->lockname.ln_number);
36530 + list_del(&lp->clist);
36532 + if (test_bit(LFL_BLIST, &lp->flags)) {
36533 + printk("lock_dlm: dlm_put_lock lp on blist num=%x,%"PRIx64"\n",
36534 + lp->lockname.ln_type, lp->lockname.ln_number);
36535 + list_del(&lp->blist);
36537 + if (test_bit(LFL_DLIST, &lp->flags)) {
36538 + printk("lock_dlm: dlm_put_lock lp on dlist num=%x,%"PRIx64"\n",
36539 + lp->lockname.ln_type, lp->lockname.ln_number);
36540 + list_del(&lp->dlist);
36542 + if (test_bit(LFL_SLIST, &lp->flags)) {
36543 + printk("lock_dlm: dlm_put_lock lp on slist num=%x,%"PRIx64"\n",
36544 + lp->lockname.ln_type, lp->lockname.ln_number);
36545 + list_del(&lp->slist);
36547 + spin_unlock(&lp->dlm->async_lock);
36553 + * dlm_put_lock - get rid of a lock structure
36554 + * @lock: the lock to throw away
36558 +void lm_dlm_put_lock(lm_lock_t *lock)
36560 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36562 + if (lp->cur != DLM_LOCK_IV) {
36569 + * do_lock - acquire a lock
36570 + * @lp: the DLM lock
36571 + * @range: optional range
36574 +void do_lock(dlm_lock_t *lp, struct dlm_range *range)
36576 + dlm_t *dlm = lp->dlm;
36581 + * When recovery is in progress, delay lock requests for submission
36582 + * once recovery is done. Requests for recovery (NOEXP) and unlocks
36586 + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
36587 + !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
36588 + queue_delayed(lp, QUEUE_LOCKS_BLOCKED);
36593 + * Submit the actual lock request.
36596 + make_strname(&lp->lockname, &str);
36598 + set_bit(LFL_WAIT_COMPLETE, &lp->flags);
36600 + error = dlm_lock(dlm->gdlm_lsp, lp->req, &lp->lksb, lp->lkf, str.name,
36601 + str.namelen, 0, lock_ast, (void *) lp,
36602 + lp->posix ? NULL : lock_bast, range);
36604 + if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
36605 + lp->lksb.sb_status = -EAGAIN;
36606 + queue_complete(lp);
36610 + DLM_ASSERT(!error,
36611 + printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n",
36612 + dlm->fsname, lp->lockname.ln_type,
36613 + lp->lockname.ln_number, error, lp->cur, lp->req,
36618 + * lm_dlm_lock - acquire a lock
36619 + * @lock: the lock to manipulate
36620 + * @cur_state: the current state
36621 + * @req_state: the requested state
36622 + * @flags: modifier flags
36624 + * Returns: A bitmap of LM_OUT_* on success, -EXXX on failure
36627 +unsigned int lm_dlm_lock(lm_lock_t *lock, unsigned int cur_state,
36628 + unsigned int req_state, unsigned int flags)
36630 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36632 + if (flags & LM_FLAG_NOEXP)
36633 + set_bit(LFL_NOBLOCK, &lp->flags);
36635 + check_cur_state(lp, cur_state);
36636 + lp->req = make_mode(req_state);
36637 + lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
36639 + do_lock(lp, NULL);
36640 + return LM_OUT_ASYNC;
36643 +int lm_dlm_lock_sync(lm_lock_t *lock, unsigned int cur_state,
36644 + unsigned int req_state, unsigned int flags)
36646 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36648 + init_completion(&lp->uast_wait);
36649 + lm_dlm_lock(lock, cur_state, req_state, flags);
36650 + wait_for_completion(&lp->uast_wait);
36652 + return lp->lksb.sb_status;
36656 + * lm_dlm_unlock - unlock a lock
36657 + * @lock: the lock to manipulate
36658 + * @cur_state: the current state
36660 + * Returns: 0 on success, -EXXX on failure
36663 +unsigned int lm_dlm_unlock(lm_lock_t *lock, unsigned int cur_state)
36665 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36667 + check_cur_state(lp, cur_state);
36668 + lp->req = DLM_LOCK_NL;
36669 + lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
36671 + do_lock(lp, NULL);
36673 + return LM_OUT_ASYNC;
36676 +void lm_dlm_unlock_sync(lm_lock_t *lock, unsigned int cur_state)
36678 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36680 + init_completion(&lp->uast_wait);
36681 + lm_dlm_unlock(lock, cur_state);
36682 + wait_for_completion(&lp->uast_wait);
36686 + * dlm_cancel - cancel a request that is blocked due to DFL_BLOCK_LOCKS
36687 + * @lock: the lock to cancel request for
36691 +void lm_dlm_cancel(lm_lock_t *lock)
36693 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36694 + int dlist = FALSE;
36696 + printk("lock_dlm: cancel num=%x,%"PRIx64"\n",
36697 + lp->lockname.ln_type, lp->lockname.ln_number);
36699 + spin_lock(&lp->dlm->async_lock);
36700 + if (test_and_clear_bit(LFL_DLIST, &lp->flags)) {
36701 + list_del(&lp->dlist);
36705 + spin_unlock(&lp->dlm->async_lock);
36708 + set_bit(LFL_CANCEL, &lp->flags);
36709 + queue_complete(lp);
36714 + * dlm_hold_lvb - hold on to a lock value block
36715 + * @lock: the lock the LVB is associated with
36716 + * @lvbp: return the lvb memory here
36718 + * Returns: 0 on success, -EXXX on failure
36721 +int lm_dlm_hold_lvb(lm_lock_t *lock, char **lvbp)
36723 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36726 + lvb = kmalloc(DLM_LVB_SIZE, GFP_KERNEL);
36730 + memset(lvb, 0, DLM_LVB_SIZE);
36732 + lp->lksb.sb_lvbptr = lvb;
36740 + * dlm_unhold_lvb - release a LVB
36741 + * @lock: the lock the LVB is associated with
36742 + * @lvb: the lock value block
36746 +void lm_dlm_unhold_lvb(lm_lock_t *lock, char *lvb)
36748 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36751 + lp->lksb.sb_lvbptr = NULL;
36755 + * dlm_sync_lvb - sync out the value of a lvb
36756 + * @lock: the lock the LVB is associated with
36757 + * @lvb: the lock value block
36761 +void lm_dlm_sync_lvb(lm_lock_t *lock, char *lvb)
36763 + dlm_lock_t *lp = (dlm_lock_t *) lock;
36765 + if (lp->cur != DLM_LOCK_EX)
36768 + init_completion(&lp->uast_wait);
36769 + set_bit(LFL_SYNC_LVB, &lp->flags);
36771 + lp->req = DLM_LOCK_EX;
36772 + lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
36774 + do_lock(lp, NULL);
36775 + wait_for_completion(&lp->uast_wait);
36779 + * dlm_recovery_done - reset the expired locks for a given jid
36780 + * @lockspace: the lockspace
36785 +void lm_dlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
36786 + unsigned int message)
36788 + jid_recovery_done((dlm_t *) lockspace, jid, message);
36792 + * Run in dlm_async
36796 + * process_submit - make DLM lock requests from dlm_async thread
36801 +void process_submit(dlm_lock_t *lp)
36803 + struct dlm_range range, *r = NULL;
36806 + range.ra_start = lp->posix->start;
36807 + range.ra_end = lp->posix->end;
36813 diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h
36814 --- linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h 1969-12-31 18:00:00.000000000 -0600
36815 +++ linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h 2004-06-16 12:03:17.967822065 -0500
36817 +/******************************************************************************
36818 +*******************************************************************************
36820 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
36821 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
36823 +** This copyrighted material is made available to anyone wishing to use,
36824 +** modify, copy, or redistribute it subject to the terms and conditions
36825 +** of the GNU General Public License v.2.
36827 +*******************************************************************************
36828 +******************************************************************************/
36830 +#ifndef LOCK_DLM_DOT_H
36831 +#define LOCK_DLM_DOT_H
36833 +#include <linux/module.h>
36834 +#include <linux/slab.h>
36835 +#include <linux/spinlock.h>
36836 +#include <linux/module.h>
36837 +#include <linux/types.h>
36838 +#include <linux/string.h>
36839 +#include <linux/list.h>
36840 +#include <linux/lm_interface.h>
36841 +#include <cluster/dlm.h>
36843 +/* We take a shortcut and use lm_lockname structs for internal locks. This
36844 + means we must be careful to keep these types different from those used in
36845 + lm_interface.h. */
36847 +#define LM_TYPE_JID (0x10)
36848 +#define LM_TYPE_PLOCK_UPDATE (0x11)
36850 +#define DLM_LVB_SIZE (DLM_LVB_LEN)
36852 +/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
36853 + We sprintf these numbers into a 24 byte string of hex values to make them
36854 + human-readable (to make debugging simpler.) */
36856 +#define LOCK_DLM_STRNAME_BYTES (24)
36858 +#define LOCK_DLM_MAX_NODES (128)
36866 +typedef struct dlm dlm_t;
36867 +typedef struct dlm_lock dlm_lock_t;
36868 +typedef struct dlm_node dlm_node_t;
36869 +typedef struct dlm_start dlm_start_t;
36870 +typedef struct strname strname_t;
36872 +#define DFL_FIRST_MOUNT 0
36873 +#define DFL_THREAD_STOP 1
36874 +#define DFL_GOT_NODEID 2
36875 +#define DFL_MG_FINISH 3
36876 +#define DFL_HAVE_JID 4
36877 +#define DFL_BLOCK_LOCKS 5
36878 +#define DFL_START_ERROR 6
36879 +#define DFL_UMOUNT 7
36880 +#define DFL_NEED_STARTDONE 8
36884 + uint32_t our_nodeid;
36885 + unsigned long flags;
36888 + char * clustername;
36893 + dlm_lockspace_t * gdlm_lsp;
36895 + lm_callback_t fscb;
36896 + lm_fsdata_t * fsdata;
36897 + dlm_lock_t * jid_lock;
36899 + spinlock_t async_lock;
36900 + struct list_head complete;
36901 + struct list_head blocking;
36902 + struct list_head delayed;
36903 + struct list_head submit;
36904 + struct list_head starts;
36906 + wait_queue_head_t wait;
36907 + atomic_t threads;
36910 + int mg_last_start;
36911 + int mg_last_stop;
36912 + int mg_last_finish;
36913 + struct list_head mg_nodes;
36914 + struct semaphore mg_nodes_lock;
36916 + struct list_head resources;
36917 + struct semaphore res_lock;
36920 +struct dlm_resource {
36922 + struct list_head list; /* list of resources */
36923 + struct lm_lockname name; /* the resource name */
36924 + struct semaphore sema;
36925 + struct list_head locks; /* one lock for each range */
36927 + dlm_lock_t * update;
36928 + struct list_head async_locks;
36929 + spinlock_t async_spin;
36932 +struct posix_lock {
36933 + struct list_head list; /* resource locks list */
36934 + struct list_head async_list; /* resource async_locks list */
36935 + struct dlm_resource * resource;
36937 + unsigned long owner;
36944 +#define LFL_NOBLOCK 0
36945 +#define LFL_NOCACHE 1
36946 +#define LFL_UNLOCK_RECOVERY 2
36947 +#define LFL_DLM_UNLOCK 3
36948 +#define LFL_TRYFAILED 4
36949 +#define LFL_SYNC_LVB 5
36950 +#define LFL_FORCE_PROMOTE 6
36951 +#define LFL_REREQUEST 7
36952 +#define LFL_WAIT_COMPLETE 8
36953 +#define LFL_CLIST 9
36954 +#define LFL_BLIST 10
36955 +#define LFL_DLIST 11
36956 +#define LFL_SLIST 12
36957 +#define LFL_IDLOCK 13
36958 +#define LFL_CANCEL 14
36962 + struct lm_lockname lockname;
36964 + struct dlm_lksb lksb;
36968 + int16_t prev_req;
36969 + unsigned int lkf;
36970 + unsigned int type;
36971 + unsigned long flags;
36973 + int bast_mode; /* protected by async_lock */
36974 + struct completion uast_wait;
36976 + struct list_head clist; /* complete */
36977 + struct list_head blist; /* blocking */
36978 + struct list_head dlist; /* delayed */
36979 + struct list_head slist; /* submit */
36981 + struct posix_lock * posix;
36984 +#define NFL_SENT_CB 0
36985 +#define NFL_NOT_MEMBER 1
36986 +#define NFL_RECOVERY_DONE 2
36987 +#define NFL_LAST_FINISH 3
36988 +#define NFL_HAVE_JID 4
36993 + unsigned long flags;
36994 + struct list_head list;
36997 +#define QUEUE_LOCKS_BLOCKED 1
36998 +#define QUEUE_ERROR_UNLOCK 2
36999 +#define QUEUE_ERROR_LOCK 3
37000 +#define QUEUE_ERROR_RETRY 4
37003 + unsigned char name[LOCK_DLM_STRNAME_BYTES];
37004 + unsigned short namelen;
37007 +struct dlm_start {
37008 + uint32_t * nodeids;
37012 + struct list_head list;
37023 +#if (BITS_PER_LONG == 64)
37024 +#define PRIu64 "lu"
37025 +#define PRId64 "ld"
37026 +#define PRIo64 "lo"
37027 +#define PRIx64 "lx"
37028 +#define PRIX64 "lX"
37029 +#define SCNu64 "lu"
37030 +#define SCNd64 "ld"
37031 +#define SCNo64 "lo"
37032 +#define SCNx64 "lx"
37033 +#define SCNX64 "lX"
37035 +#define PRIu64 "Lu"
37036 +#define PRId64 "Ld"
37037 +#define PRIo64 "Lo"
37038 +#define PRIx64 "Lx"
37039 +#define PRIX64 "LX"
37040 +#define SCNu64 "Lu"
37041 +#define SCNd64 "Ld"
37042 +#define SCNo64 "Lo"
37043 +#define SCNx64 "Lx"
37044 +#define SCNX64 "LX"
37047 +extern struct lm_lockops lock_dlm_ops;
37051 +int init_mountgroup(dlm_t * dlm);
37052 +void release_mountgroup(dlm_t * dlm);
37053 +void process_start(dlm_t * dlm, dlm_start_t * ds);
37054 +void process_finish(dlm_t * dlm);
37055 +void jid_recovery_done(dlm_t * dlm, unsigned int jid, unsigned int message);
37059 +int init_async_thread(dlm_t * dlm);
37060 +void release_async_thread(dlm_t * dlm);
37064 +int16_t make_lmstate(int16_t dlmmode);
37065 +void queue_delayed(dlm_lock_t * lp, int type);
37066 +void process_submit(dlm_lock_t * lp);
37067 +int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp);
37068 +void do_lock(dlm_lock_t *lp, struct dlm_range *range);
37069 +int do_unlock(dlm_lock_t *lp);
37071 +int lm_dlm_get_lock(lm_lockspace_t * lockspace, struct lm_lockname * name,
37072 + lm_lock_t ** lockp);
37073 +void lm_dlm_put_lock(lm_lock_t * lock);
37074 +unsigned int lm_dlm_lock(lm_lock_t * lock, unsigned int cur_state,
37075 + unsigned int req_state, unsigned int flags);
37076 +int lm_dlm_lock_sync(lm_lock_t * lock, unsigned int cur_state,
37077 + unsigned int req_state, unsigned int flags);
37078 +unsigned int lm_dlm_unlock(lm_lock_t * lock, unsigned int cur_state);
37079 +void lm_dlm_unlock_sync(lm_lock_t * lock, unsigned int cur_state);
37080 +void lm_dlm_cancel(lm_lock_t * lock);
37081 +int lm_dlm_hold_lvb(lm_lock_t * lock, char **lvbp);
37082 +void lm_dlm_unhold_lvb(lm_lock_t * lock, char *lvb);
37083 +void lm_dlm_sync_lvb(lm_lock_t * lock, char *lvb);
37084 +void lm_dlm_recovery_done(lm_lockspace_t * lockspace, unsigned int jid,
37085 + unsigned int message);
37089 +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
37090 + unsigned long owner, int wait, int ex, uint64_t start,
37093 +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
37094 + unsigned long owner, uint64_t start, uint64_t end);
37096 +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
37097 + unsigned long owner, uint64_t *start, uint64_t *end,
37098 + int *ex, unsigned long *rowner);
37102 +void lock_dlm_debug_log(const char *fmt, ...);
37103 +void lock_dlm_debug_dump(void);
37106 +#define LOCK_DLM_DEBUG
37108 +#ifdef LOCK_DLM_DEBUG
37109 +#define log_debug(fmt, args...) lock_dlm_debug_log(fmt, ##args)
37111 +#define log_debug(fmt, args...)
37114 +#define DLM_ASSERT(x, do) \
37118 + lock_dlm_debug_dump(); \
37119 + printk("\nlock_dlm: Assertion failed on line %d of file %s\n" \
37120 + "lock_dlm: assertion: \"%s\"\n" \
37121 + "lock_dlm: time = %lu\n", \
37122 + __LINE__, __FILE__, #x, jiffies); \
37125 + panic("lock_dlm: Record message above and reboot.\n"); \
37129 +#define DLM_RETRY(do_this, until_this) \
37132 + do { do_this; } while (0); \
37133 + if (until_this) \
37135 + printk("lock_dlm: out of memory: %s, %u\n", __FILE__, __LINE__); \
37140 diff -urN linux-orig/fs/gfs_locking/lock_dlm/main.c linux-patched/fs/gfs_locking/lock_dlm/main.c
37141 --- linux-orig/fs/gfs_locking/lock_dlm/main.c 1969-12-31 18:00:00.000000000 -0600
37142 +++ linux-patched/fs/gfs_locking/lock_dlm/main.c 2004-06-16 12:03:17.967822065 -0500
37144 +/******************************************************************************
37145 +*******************************************************************************
37147 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
37148 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
37150 +** This copyrighted material is made available to anyone wishing to use,
37151 +** modify, copy, or redistribute it subject to the terms and conditions
37152 +** of the GNU General Public License v.2.
37154 +*******************************************************************************
37155 +******************************************************************************/
37157 +#include "lock_dlm.h"
37158 +#include <linux/init.h>
37159 +#include <linux/proc_fs.h>
37161 +#if defined(LOCK_DLM_DEBUG)
37162 +#define LOCK_DLM_DEBUG_SIZE (1024)
37163 +#define MAX_DEBUG_MSG_LEN (64)
37165 +#define LOCK_DLM_DEBUG_SIZE (0)
37166 +#define MAX_DEBUG_MSG_LEN (0)
37169 +static char * debug_buf;
37170 +static unsigned int debug_size;
37171 +static unsigned int debug_point;
37172 +static int debug_wrap;
37173 +static spinlock_t debug_lock;
37174 +static struct proc_dir_entry * debug_proc_entry = NULL;
37177 +void lock_dlm_debug_log(const char *fmt, ...)
37180 + int i, n, size, len;
37181 + char buf[MAX_DEBUG_MSG_LEN+1];
37183 + spin_lock(&debug_lock);
37188 + size = MAX_DEBUG_MSG_LEN;
37189 + memset(buf, 0, size+1);
37192 + /* n = snprintf(buf, size, "%s ", dlm->fsname); */
37195 + va_start(va, fmt);
37196 + vsnprintf(buf+n, size, fmt, va);
37199 + len = strlen(buf);
37200 + if (len > MAX_DEBUG_MSG_LEN-1)
37201 + len = MAX_DEBUG_MSG_LEN-1;
37203 + buf[len+1] = '\0';
37205 + for (i = 0; i < strlen(buf); i++) {
37206 + debug_buf[debug_point++] = buf[i];
37208 + if (debug_point == debug_size) {
37214 + spin_unlock(&debug_lock);
37217 +static void debug_setup(int size)
37221 + if (size > PAGE_SIZE)
37222 + size = PAGE_SIZE;
37224 + b = kmalloc(size, GFP_KERNEL);
37226 + spin_lock(&debug_lock);
37228 + kfree(debug_buf);
37231 + debug_size = size;
37235 + memset(debug_buf, 0, debug_size);
37237 + spin_unlock(&debug_lock);
37240 +static void debug_init(void)
37242 + debug_buf = NULL;
37246 + spin_lock_init(&debug_lock);
37247 + debug_setup(LOCK_DLM_DEBUG_SIZE);
37250 +void lock_dlm_debug_dump(void)
37254 + spin_lock(&debug_lock);
37256 + if (debug_wrap) {
37257 + for (i = debug_point; i < debug_size; i++)
37258 + printk("%c", debug_buf[i]);
37260 + for (i = 0; i < debug_point; i++)
37261 + printk("%c", debug_buf[i]);
37263 + spin_unlock(&debug_lock);
37266 +#ifdef CONFIG_PROC_FS
37267 +int lock_dlm_debug_info(char *b, char **start, off_t offset, int length)
37271 + spin_lock(&debug_lock);
37273 + if (debug_wrap) {
37274 + for (i = debug_point; i < debug_size; i++)
37275 + n += sprintf(b + n, "%c", debug_buf[i]);
37277 + for (i = 0; i < debug_point; i++)
37278 + n += sprintf(b + n, "%c", debug_buf[i]);
37280 + spin_unlock(&debug_lock);
37287 + * init_dlm - Initialize the dlm module
37289 + * Returns: 0 on success, -EXXX on failure
37292 +int __init init_lock_dlm(void)
37296 + error = lm_register_proto(&lock_dlm_ops);
37298 + printk("lock_dlm: can't register protocol: (%d)\n", error);
37302 +#ifdef CONFIG_PROC_FS
37303 + debug_proc_entry = create_proc_entry("cluster/lock_dlm_debug", S_IRUGO,
37305 + if (debug_proc_entry)
37306 + debug_proc_entry->get_info = &lock_dlm_debug_info;
37310 + printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
37315 + * exit_dlm - cleanup the dlm module
37319 +void __exit exit_lock_dlm(void)
37321 + lm_unregister_proto(&lock_dlm_ops);
37323 +#ifdef CONFIG_PROC_FS
37324 + if (debug_proc_entry)
37325 + remove_proc_entry("cluster/lock_dlm_debug", NULL);
37330 +module_init(init_lock_dlm);
37331 +module_exit(exit_lock_dlm);
37333 +MODULE_DESCRIPTION("GFS DLM Locking Module");
37334 +MODULE_AUTHOR("Red Hat, Inc.");
37335 +MODULE_LICENSE("GPL");
37336 diff -urN linux-orig/fs/gfs_locking/lock_dlm/mount.c linux-patched/fs/gfs_locking/lock_dlm/mount.c
37337 --- linux-orig/fs/gfs_locking/lock_dlm/mount.c 1969-12-31 18:00:00.000000000 -0600
37338 +++ linux-patched/fs/gfs_locking/lock_dlm/mount.c 2004-06-16 12:03:17.967822065 -0500
37340 +/******************************************************************************
37341 +*******************************************************************************
37343 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
37344 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
37346 +** This copyrighted material is made available to anyone wishing to use,
37347 +** modify, copy, or redistribute it subject to the terms and conditions
37348 +** of the GNU General Public License v.2.
37350 +*******************************************************************************
37351 +******************************************************************************/
37353 +#include <linux/socket.h>
37354 +#include <net/sock.h>
37356 +#include "lock_dlm.h"
37357 +#include <cluster/cnxman.h>
37358 +#include <cluster/service.h>
37360 +static int init_cman(dlm_t *dlm)
37363 + char *name = NULL;
37365 + if (!dlm->clustername)
37368 + error = kcl_addref_cluster();
37370 + printk("lock_dlm: cannot get cman reference %d\n", error);
37374 + error = kcl_cluster_name(&name);
37376 + printk("lock_dlm: cannot get cman cluster name %d\n", error);
37380 + if (strcmp(name, dlm->clustername)) {
37382 + printk("lock_dlm: cman cluster name \"%s\" does not match "
37383 + "file system cluster name \"%s\"\n",
37384 + name, dlm->clustername);
37392 + kcl_releaseref_cluster();
37399 +static int release_cman(dlm_t *dlm)
37401 + return kcl_releaseref_cluster();
37404 +static int init_cluster(dlm_t *dlm, char *table_name)
37406 + char *buf, *c, *clname, *fsname;
37407 + int len, error = -1;
37410 + * Parse superblock lock table <clustername>:<fsname>
37413 + len = strlen(table_name) + 1;
37414 + buf = kmalloc(len, GFP_KERNEL);
37417 + memset(buf, 0, len);
37418 + memcpy(buf, table_name, strlen(table_name));
37420 + c = strstr(buf, ":");
37428 + dlm->max_nodes = LOCK_DLM_MAX_NODES;
37430 + len = strlen(clname) + 1;
37431 + c = kmalloc(len, GFP_KERNEL);
37434 + memset(c, 0, len);
37435 + memcpy(c, clname, len-1);
37436 + dlm->cnlen = len-1;
37437 + dlm->clustername = c;
37439 + len = strlen(fsname) + 1;
37440 + c = kmalloc(len, GFP_KERNEL);
37443 + memset(c, 0, len);
37444 + memcpy(c, fsname, len-1);
37445 + dlm->fnlen = len-1;
37448 + error = init_cman(dlm);
37456 + kfree(dlm->fsname);
37458 + kfree(dlm->clustername);
37462 + printk("lock_dlm: init_cluster error %d\n", error);
37466 +static int release_cluster(dlm_t *dlm)
37468 + release_cman(dlm);
37469 + kfree(dlm->clustername);
37470 + kfree(dlm->fsname);
37474 +static int init_fence(dlm_t *dlm)
37477 + struct kcl_service *s, *safe;
37478 + int error, found = FALSE;
37480 + error = kcl_get_services(&head, SERVICE_LEVEL_FENCE);
37484 + list_for_each_entry_safe(s, safe, &head, list) {
37485 + list_del(&s->list);
37486 + if (!found && !strcmp(s->name, "default"))
37496 + printk("lock_dlm: init_fence error %d\n", error);
37500 +static int release_fence(dlm_t *dlm)
37505 +static int init_gdlm(dlm_t *dlm)
37509 + error = dlm_new_lockspace(dlm->fsname, dlm->fnlen, &dlm->gdlm_lsp,
37510 + DLM_LSF_NOTIMERS);
37512 + printk("lock_dlm: new lockspace error %d\n", error);
37517 +static int release_gdlm(dlm_t *dlm)
37519 + dlm_release_lockspace(dlm->gdlm_lsp, 1);
37523 +static dlm_t *init_dlm(lm_callback_t cb, lm_fsdata_t *fsdata)
37527 + dlm = kmalloc(sizeof(dlm_t), GFP_KERNEL);
37531 + memset(dlm, 0, sizeof(dlm_t));
37534 + dlm->fsdata = fsdata;
37536 + spin_lock_init(&dlm->async_lock);
37538 + INIT_LIST_HEAD(&dlm->complete);
37539 + INIT_LIST_HEAD(&dlm->blocking);
37540 + INIT_LIST_HEAD(&dlm->delayed);
37541 + INIT_LIST_HEAD(&dlm->submit);
37542 + INIT_LIST_HEAD(&dlm->starts);
37543 + INIT_LIST_HEAD(&dlm->resources);
37545 + init_waitqueue_head(&dlm->wait);
37547 + INIT_LIST_HEAD(&dlm->mg_nodes);
37548 + init_MUTEX(&dlm->mg_nodes_lock);
37549 + init_MUTEX(&dlm->res_lock);
37555 + * dlm_mount - mount a dlm lockspace
37556 + * @table_name: the name of the space to mount
37557 + * @host_data: host specific data
37558 + * @cb: the callback
37559 + * @lockstruct: the structure of crap to fill in
37561 + * Returns: 0 on success, -EXXX on failure
37564 +static int lm_dlm_mount(char *table_name, char *host_data,
37565 + lm_callback_t cb, lm_fsdata_t *fsdata,
37566 + unsigned int min_lvb_size,
37567 + struct lm_lockstruct *lockstruct)
37570 + int error = -ENOMEM;
37572 + if (min_lvb_size > DLM_LVB_SIZE)
37575 + dlm = init_dlm(cb, fsdata);
37579 + error = init_cluster(dlm, table_name);
37583 + error = init_fence(dlm);
37585 + goto out_cluster;
37587 + error = init_gdlm(dlm);
37591 + error = init_async_thread(dlm);
37595 + error = init_mountgroup(dlm);
37599 + lockstruct->ls_jid = dlm->jid;
37600 + lockstruct->ls_first = test_bit(DFL_FIRST_MOUNT, &dlm->flags);
37601 + lockstruct->ls_lockspace = dlm;
37602 + lockstruct->ls_ops = &lock_dlm_ops;
37603 + lockstruct->ls_flags = LM_LSFLAG_ASYNC;
37604 + lockstruct->ls_lvb_size = DLM_LVB_SIZE;
37608 + release_async_thread(dlm);
37611 + release_gdlm(dlm);
37614 + release_fence(dlm);
37617 + release_cluster(dlm);
37627 + * dlm_others_may_mount
37628 + * @lockspace: the lockspace to unmount
37632 +static void lm_dlm_others_may_mount(lm_lockspace_t *lockspace)
37634 + /* Do nothing. The first node to join the Mount Group will complete
37635 + * before Service Manager allows another node to join. */
37639 + * dlm_unmount - unmount a lock space
37640 + * @lockspace: the lockspace to unmount
37644 +static void lm_dlm_unmount(lm_lockspace_t *lockspace)
37646 + dlm_t *dlm = (dlm_t *) lockspace;
37648 + release_mountgroup(dlm);
37649 + release_async_thread(dlm);
37650 + release_gdlm(dlm);
37651 + release_fence(dlm);
37652 + release_cluster(dlm);
37656 +struct lm_lockops lock_dlm_ops = {
37657 + lm_proto_name:"lock_dlm",
37658 + lm_mount:lm_dlm_mount,
37659 + lm_others_may_mount:lm_dlm_others_may_mount,
37660 + lm_unmount:lm_dlm_unmount,
37661 + lm_get_lock:lm_dlm_get_lock,
37662 + lm_put_lock:lm_dlm_put_lock,
37663 + lm_lock:lm_dlm_lock,
37664 + lm_unlock:lm_dlm_unlock,
37665 + lm_plock:lm_dlm_plock,
37666 + lm_punlock:lm_dlm_punlock,
37667 + lm_plock_get:lm_dlm_plock_get,
37668 + lm_cancel:lm_dlm_cancel,
37669 + lm_hold_lvb:lm_dlm_hold_lvb,
37670 + lm_unhold_lvb:lm_dlm_unhold_lvb,
37671 + lm_sync_lvb:lm_dlm_sync_lvb,
37672 + lm_recovery_done:lm_dlm_recovery_done,
37673 + lm_owner:THIS_MODULE,
37675 diff -urN linux-orig/fs/gfs_locking/lock_dlm/plock.c linux-patched/fs/gfs_locking/lock_dlm/plock.c
37676 --- linux-orig/fs/gfs_locking/lock_dlm/plock.c 1969-12-31 18:00:00.000000000 -0600
37677 +++ linux-patched/fs/gfs_locking/lock_dlm/plock.c 2004-06-16 12:03:17.967822065 -0500
37679 +/******************************************************************************
37680 +*******************************************************************************
37682 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
37683 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
37685 +** This copyrighted material is made available to anyone wishing to use,
37686 +** modify, copy, or redistribute it subject to the terms and conditions
37687 +** of the GNU General Public License v.2.
37689 +*******************************************************************************
37690 +******************************************************************************/
37692 +#include "lock_dlm.h"
37694 +#define MIN(a,b) ((a) <= (b)) ? (a) : (b)
37695 +#define MAX(a,b) ((a) >= (b)) ? (a) : (b)
37698 +#define NO_CREATE 0
37709 +static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
37710 + struct lm_lockname *name, unsigned long owner,
37711 + uint64_t start, uint64_t end, int ex);
37714 +static int lock_resource(struct dlm_resource *r)
37717 + struct lm_lockname name;
37720 + name.ln_type = LM_TYPE_PLOCK_UPDATE;
37721 + name.ln_number = r->name.ln_number;
37723 + error = create_lp(r->dlm, &name, &lp);
37727 + set_bit(LFL_IDLOCK, &lp->flags);
37728 + lp->req = DLM_LOCK_EX;
37729 + do_lock(lp, NULL);
37730 + wait_for_completion(&lp->uast_wait);
37732 + error = lp->lksb.sb_status;
37742 +static void unlock_resource(struct dlm_resource *r)
37744 + do_unlock(r->update);
37745 + kfree(r->update);
37748 +static struct dlm_resource *search_resource(dlm_t *dlm, struct lm_lockname *name)
37750 + struct dlm_resource *r;
37752 + list_for_each_entry(r, &dlm->resources, list) {
37753 + if (lm_name_equal(&r->name, name))
37759 +static int get_resource(dlm_t *dlm, struct lm_lockname *name, int create,
37760 + struct dlm_resource **res)
37762 + struct dlm_resource *r, *r2;
37763 + int error = -ENOMEM;
37765 + down(&dlm->res_lock);
37766 + r = search_resource(dlm, name);
37769 + up(&dlm->res_lock);
37774 + if (create == NO_CREATE) {
37779 + r = kmalloc(sizeof(struct dlm_resource), GFP_KERNEL);
37783 + memset(r, 0, sizeof(struct dlm_resource));
37787 + INIT_LIST_HEAD(&r->locks);
37788 + INIT_LIST_HEAD(&r->async_locks);
37789 + init_MUTEX(&r->sema);
37790 + spin_lock_init(&r->async_spin);
37792 + down(&dlm->res_lock);
37793 + r2 = search_resource(dlm, name);
37796 + up(&dlm->res_lock);
37802 + list_add_tail(&r->list, &dlm->resources);
37803 + up(&dlm->res_lock);
37812 +static void put_resource(struct dlm_resource *r)
37814 + dlm_t *dlm = r->dlm;
37816 + down(&dlm->res_lock);
37818 + if (r->count == 0) {
37819 + DLM_ASSERT(list_empty(&r->locks), );
37820 + DLM_ASSERT(list_empty(&r->async_locks), );
37821 + list_del(&r->list);
37824 + up(&dlm->res_lock);
37827 +static inline void hold_resource(struct dlm_resource *r)
37829 + down(&r->dlm->res_lock);
37831 + up(&r->dlm->res_lock);
37834 +static inline int ranges_overlap(uint64_t start1, uint64_t end1,
37835 + uint64_t start2, uint64_t end2)
37837 + if (end1 < start2 || start1 > end2)
37843 + * overlap_type - returns a value based on the type of overlap
37844 + * @s1 - start of new lock range
37845 + * @e1 - end of new lock range
37846 + * @s2 - start of existing lock range
37847 + * @e2 - end of existing lock range
37851 +static int overlap_type(uint64_t s1, uint64_t e1, uint64_t s2, uint64_t e2)
37860 + if (s1 == s2 && e1 == e2)
37868 + else if (s1 == s2 && e1 < e2)
37876 + else if (s1 > s2 && e1 == e2)
37884 + else if (s1 > s2 && e1 < e2)
37888 + * ---r1--- or ---r1--- or ---r1---
37889 + * --r2-- --r2-- --r2--
37892 + else if (s1 <= s2 && e1 >= e2)
37900 + else if (s1 > s2 && e1 > e2)
37908 + else if (s1 < s2 && e1 < e2)
37917 +/* shrink the range start2:end2 by the partially overlapping start:end */
37919 +static int shrink_range2(uint64_t *start2, uint64_t *end2,
37920 + uint64_t start, uint64_t end)
37924 + if (*start2 < start)
37925 + *end2 = start - 1;
37926 + else if (*end2 > end)
37927 + *start2 = end + 1;
37933 +static int shrink_range(struct posix_lock *po, uint64_t start, uint64_t end)
37935 + return shrink_range2(&po->start, &po->end, start, end);
37938 +static void put_lock(dlm_lock_t *lp)
37940 + struct posix_lock *po = lp->posix;
37943 + if (po->count == 0) {
37949 +static int create_lock(struct dlm_resource *r, unsigned long owner, int ex,
37950 + uint64_t start, uint64_t end, dlm_lock_t **lpp)
37953 + struct posix_lock *po;
37956 + error = create_lp(r->dlm, &r->name, &lp);
37960 + po = kmalloc(sizeof(struct posix_lock), GFP_KERNEL);
37965 + memset(po, 0, sizeof(struct posix_lock));
37969 + po->resource = r;
37971 + po->start = start;
37973 + po->owner = owner;
37975 + list_add_tail(&po->list, &r->locks);
37981 +static unsigned int make_flags_posix(dlm_lock_t *lp, int wait)
37983 + unsigned int lkf = 0;
37985 + if (wait == NO_WAIT || wait == X_WAIT)
37986 + lkf |= DLM_LKF_NOQUEUE;
37988 + if (lp->lksb.sb_lkid != 0) {
37989 + lkf |= DLM_LKF_CONVERT;
37990 + if (wait == WAIT)
37991 + lkf |= DLM_LKF_EXPEDITE;
37996 +static void do_range_lock(dlm_lock_t *lp)
37998 + struct dlm_range range = { lp->posix->start, lp->posix->end };
37999 + do_lock(lp, &range);
38002 +static void request_lock(dlm_lock_t *lp, int wait)
38004 + log_debug("req %x,%"PRIx64" %s %"PRIx64"-%"PRIx64" %u w %u",
38005 + lp->lockname.ln_type, lp->lockname.ln_number,
38006 + lp->posix->ex ? "ex" : "sh", lp->posix->start,
38007 + lp->posix->end, current->pid, wait);
38009 + set_bit(LFL_IDLOCK, &lp->flags);
38010 + lp->req = lp->posix->ex ? DLM_LOCK_EX : DLM_LOCK_PR;
38011 + lp->lkf = make_flags_posix(lp, wait);
38013 + do_range_lock(lp);
38016 +static void add_async(struct posix_lock *po, struct dlm_resource *r)
38018 + spin_lock(&r->async_spin);
38019 + list_add_tail(&po->async_list, &r->async_locks);
38020 + spin_unlock(&r->async_spin);
38023 +static void del_async(struct posix_lock *po, struct dlm_resource *r)
38025 + spin_lock(&r->async_spin);
38026 + list_del(&po->async_list);
38027 + spin_unlock(&r->async_spin);
38030 +static int wait_async(dlm_lock_t *lp)
38032 + wait_for_completion(&lp->uast_wait);
38033 + del_async(lp->posix, lp->posix->resource);
38034 + return lp->lksb.sb_status;
38037 +static void wait_async_list(struct dlm_resource *r, unsigned long owner)
38039 + struct posix_lock *po;
38040 + int error, found;
38044 + spin_lock(&r->async_spin);
38045 + list_for_each_entry(po, &r->async_locks, async_list) {
38046 + if (po->owner != owner)
38051 + spin_unlock(&r->async_spin);
38054 + DLM_ASSERT(po->lp, );
38055 + error = wait_async(po->lp);
38056 + DLM_ASSERT(!error, );
38061 +static void update_lock(dlm_lock_t *lp, int wait)
38063 + request_lock(lp, wait);
38064 + add_async(lp->posix, lp->posix->resource);
38066 + if (wait == NO_WAIT || wait == X_WAIT) {
38067 + int error = wait_async(lp);
38068 + DLM_ASSERT(!error, printk("error=%d\n", error););
38072 +static void add_lock(struct dlm_resource *r, unsigned long owner, int wait,
38073 + int ex, uint64_t start, uint64_t end)
38078 + error = create_lock(r, owner, ex, start, end, &lp);
38079 + DLM_ASSERT(!error, );
38081 + hold_resource(r);
38082 + update_lock(lp, wait);
38085 +static int remove_lock(dlm_lock_t *lp)
38087 + struct dlm_resource *r = lp->posix->resource;
38089 + log_debug("remove %x,%"PRIx64" %u",
38090 + r->name.ln_type, r->name.ln_number, current->pid);
38098 +/* RN within RE (and starts or ends on RE boundary)
38099 + 1. add new lock for non-overlap area of RE, orig mode
38100 + 2. convert RE to RN range and mode */
38102 +static int lock_case1(struct posix_lock *po, struct dlm_resource *r,
38103 + unsigned long owner, int wait, int ex, uint64_t start,
38106 + uint64_t start2, end2;
38108 + /* non-overlapping area start2:end2 */
38109 + start2 = po->start;
38111 + shrink_range2(&start2, &end2, start, end);
38113 + po->start = start;
38118 + add_lock(r, owner, X_WAIT, SH, start2, end2);
38119 + update_lock(po->lp, wait);
38121 + add_lock(r, owner, WAIT, EX, start2, end2);
38122 + update_lock(po->lp, X_WAIT);
38127 +/* RN within RE (RE overlaps RN on both sides)
38128 + 1. add new lock for front fragment, orig mode
38129 + 2. add new lock for back fragment, orig mode
38130 + 3. convert RE to RN range and mode */
38132 +static int lock_case2(struct posix_lock *po, struct dlm_resource *r,
38133 + unsigned long owner, int wait, int ex, uint64_t start,
38137 + add_lock(r, owner, X_WAIT, SH, po->start, start-1);
38138 + add_lock(r, owner, X_WAIT, SH, end+1, po->end);
38140 + po->start = start;
38144 + update_lock(po->lp, wait);
38146 + add_lock(r, owner, WAIT, EX, po->start, start-1);
38147 + add_lock(r, owner, WAIT, EX, end+1, po->end);
38149 + po->start = start;
38153 + update_lock(po->lp, X_WAIT);
38158 +/* returns ranges from exist list in order of their start values */
38160 +static int next_exist(struct list_head *exist, uint64_t *start, uint64_t *end)
38162 + struct posix_lock *po;
38163 + int first = TRUE, first_call = FALSE;
38165 + if (!*start && !*end)
38166 + first_call = TRUE;
38168 + list_for_each_entry(po, exist, list) {
38169 + if (!first_call && (po->start <= *start))
38173 + *start = po->start;
38176 + } else if (po->start < *start) {
38177 + *start = po->start;
38182 + return (first ? -1 : 0);
38185 +/* adds locks in gaps between existing locks from start to end */
38187 +static int fill_gaps(struct list_head *exist, struct dlm_resource *r,
38188 + unsigned long owner, int wait, int ex, uint64_t start,
38191 + uint64_t exist_start = 0, exist_end = 0;
38193 + /* cover gaps in front of each existing lock */
38195 + if (next_exist(exist, &exist_start, &exist_end))
38197 + if (start < exist_start)
38198 + add_lock(r, owner, wait, ex, start, exist_start-1);
38199 + start = exist_end + 1;
38202 + /* cover gap after last existing lock */
38203 + if (exist_end < end)
38204 + add_lock(r, owner, wait, ex, exist_end+1, end);
38209 +/* RE within RN (possibly more than one RE lock, all within RN) */
38211 +static int lock_case3(struct list_head *exist, struct dlm_resource *r,
38212 + unsigned long owner, int wait, int ex, uint64_t start,
38215 + struct posix_lock *po, *safe;
38217 + fill_gaps(exist, r, owner, wait, ex, start, end);
38222 + /* update existing locks to new mode and put back in locks list */
38223 + list_for_each_entry_safe(po, safe, exist, list) {
38224 + list_move_tail(&po->list, &r->locks);
38225 + if (po->ex == ex)
38228 + update_lock(po->lp, wait);
38234 +/* RE within RN (possibly more than one RE lock, one RE partially overlaps RN)
38235 + 1. add new locks with new mode for RN gaps not covered by RE's
38236 + 2. convert RE locks' mode to new mode
38237 + other steps deal with the partial-overlap fragment and depend on whether
38238 + the request is sh->ex or ex->sh */
38240 +static int lock_case4(struct posix_lock *opo, struct list_head *exist,
38241 + struct dlm_resource *r, unsigned long owner, int wait,
38242 + int ex, uint64_t start, uint64_t end)
38244 + struct posix_lock *po, *safe;
38245 + uint64_t over_start = 0, over_end = 0;
38246 + uint64_t frag_start = 0, frag_end = 0;
38248 + /* fragment (non-overlap) range of opo */
38249 + if (opo->start < start) {
38250 + frag_start = opo->start;
38251 + frag_end = start - 1;
38253 + frag_start = end + 1;
38254 + frag_end = opo->end;
38257 + /* overlap range of opo */
38258 + if (opo->start < start) {
38259 + over_start = start;
38260 + over_end = opo->end;
38262 + over_start = opo->start;
38266 + /* cut off the non-overlap portion of opo so fill_gaps will work */
38267 + opo->start = over_start;
38268 + opo->end = over_end;
38270 + fill_gaps(exist, r, owner, wait, ex, start, end);
38272 + /* update existing locks to new mode and put back in locks list */
38273 + list_for_each_entry_safe(po, safe, exist, list) {
38274 + list_move_tail(&po->list, &r->locks);
38277 + if (po->ex == ex)
38280 + update_lock(po->lp, wait);
38283 + /* deal with the RE that partially overlaps the requested range */
38285 + if (ex == opo->ex)
38289 + /* 1. add a shared lock in the non-overlap range
38290 + 2. convert RE to overlap range and requested mode */
38292 + add_lock(r, owner, X_WAIT, SH, frag_start, frag_end);
38294 + opo->start = over_start;
38295 + opo->end = over_end;
38298 + update_lock(opo->lp, wait);
38300 + /* 1. request a shared lock in the overlap range
38301 + 2. convert RE to non-overlap range
38302 + 3. wait for shared lock to complete */
38304 + add_lock(r, owner, WAIT, SH, over_start, over_end);
38306 + opo->start = frag_start;
38307 + opo->end = frag_end;
38309 + update_lock(opo->lp, X_WAIT);
38315 +/* go through r->locks to find what needs to be done to extend,
38316 + shrink, shift, split, etc existing locks (this often involves adding new
38317 + locks in addition to modifying existing locks. */
38319 +static int plock_internal(struct dlm_resource *r, unsigned long owner,
38320 + int wait, int ex, uint64_t start, uint64_t end)
38322 + LIST_HEAD(exist);
38323 + struct posix_lock *po, *safe, *case4_po = NULL;
38326 + list_for_each_entry_safe(po, safe, &r->locks, list) {
38327 + if (po->owner != owner)
38329 + if (!ranges_overlap(po->start, po->end, start, end))
38332 + /* existing range (RE) overlaps new range (RN) */
38334 + switch(overlap_type(start, end, po->start, po->end)) {
38337 + if (po->ex == ex)
38340 + /* ranges the same - just update the existing lock */
38342 + update_lock(po->lp, wait);
38346 + if (po->ex == ex)
38349 + error = lock_case1(po, r, owner, wait, ex, start, end);
38353 + if (po->ex == ex)
38356 + error = lock_case2(po, r, owner, wait, ex, start, end);
38360 + list_move_tail(&po->list, &exist);
38364 + DLM_ASSERT(!case4_po, );
38366 + list_move_tail(&po->list, &exist);
38376 + error = lock_case4(case4_po, &exist, r, owner, wait, ex,
38378 + else if (!list_empty(&exist))
38379 + error = lock_case3(&exist, r, owner, wait, ex, start, end);
38381 + add_lock(r, owner, wait, ex, start, end);
38387 +static int punlock_internal(struct dlm_resource *r, unsigned long owner,
38388 + uint64_t start, uint64_t end)
38390 + struct posix_lock *po, *safe;
38393 + list_for_each_entry_safe(po, safe, &r->locks, list) {
38394 + if (po->owner != owner)
38396 + if (!ranges_overlap(po->start, po->end, start, end))
38399 + /* existing range (RE) overlaps new range (RN) */
38401 + switch(overlap_type(start, end, po->start, po->end)) {
38404 + /* ranges the same - just remove the existing lock */
38406 + list_del(&po->list);
38407 + remove_lock(po->lp);
38411 + /* RN within RE and starts or ends on RE boundary -
38412 + * shrink and update RE */
38414 + shrink_range(po, start, end);
38415 + update_lock(po->lp, X_WAIT);
38419 + /* RN within RE - shrink and update RE to be front
38420 + * fragment, and add a new lock for back fragment */
38422 + add_lock(r, owner, po->ex ? WAIT : X_WAIT, po->ex,
38425 + po->end = start - 1;
38426 + update_lock(po->lp, X_WAIT);
38430 + /* RE within RN - remove RE, then continue checking
38431 + * because RN could cover other locks */
38433 + list_del(&po->list);
38434 + remove_lock(po->lp);
38438 + /* front of RE in RN, or end of RE in RN - shrink and
38439 + * update RE, then continue because RN could cover
38442 + shrink_range(po, start, end);
38443 + update_lock(po->lp, X_WAIT);
38456 +int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
38457 + unsigned long owner, int wait, int ex, uint64_t start,
38460 + dlm_t *dlm = (dlm_t *) lockspace;
38461 + struct dlm_resource *r;
38464 + log_debug("en plock %u %x,%"PRIx64"", current->pid,
38465 + name->ln_type, name->ln_number);
38467 + error = get_resource(dlm, name, CREATE, &r);
38472 + /* Wait, without holding any locks, until this plock request is not
38473 + blocked by plocks of *other* *local* processes. Then, none of the
38474 + dlm requests below will wait on a lock from a local process.
38476 + This should not be necessary since we wait for completion after
38477 + up(). This means a local process p1 can unlock lkb X while local p2
38478 + is waiting for X (in wait_async_list). */
38479 + error = wait_local(r, owner, wait, ex, start, end);
38485 + error = lock_resource(r);
38489 + /* check_conflict() checks for conflicts with plocks from other local
38490 + processes and other nodes. */
38492 + if (!wait && check_conflict(dlm, r, name, owner, start, end, ex)) {
38494 + unlock_resource(r);
38498 + /* If NO_WAIT all requests should return immediately.
38499 + If WAIT all requests go on r->async_locks which we wait on in
38500 + wait_async_locks(). This means DLM should not return -EAGAIN and we
38501 + should never block waiting for a plock to be released (by a local or
38502 + remote process) until we call wait_async_list(). */
38504 + error = plock_internal(r, owner, wait, ex, start, end);
38505 + unlock_resource(r);
38507 + /* wait_async_list() must follow the up() because we must be able
38508 + to punlock a range on this resource while there's a blocked plock
38509 + request to prevent deadlock between nodes (and processes). */
38513 + wait_async_list(r, owner);
38516 + log_debug("ex plock %u error %d", current->pid, error);
38520 +int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
38521 + unsigned long owner, uint64_t start, uint64_t end)
38523 + dlm_t *dlm = (dlm_t *) lockspace;
38524 + struct dlm_resource *r;
38527 + log_debug("en punlock %u %x,%"PRIx64"", current->pid,
38528 + name->ln_type, name->ln_number);
38530 + error = get_resource(dlm, name, NO_CREATE, &r);
38535 + error = lock_resource(r);
38539 + error = punlock_internal(r, owner, start, end);
38540 + unlock_resource(r);
38544 + wait_async_list(r, owner);
38547 + log_debug("ex punlock %u error %d", current->pid, error);
38551 +static void query_ast(void *astargs)
38553 + dlm_lock_t *lp = (dlm_lock_t *) astargs;;
38554 + complete(&lp->uast_wait);
38557 +static int get_conflict_global(dlm_t *dlm, struct lm_lockname *name,
38558 + unsigned long owner, uint64_t *start,
38559 + uint64_t *end, int *ex, unsigned long *rowner)
38562 + struct dlm_queryinfo qinfo;
38563 + struct dlm_lockinfo *lki;
38564 + int query = 0, s, error;
38566 + /* acquire a null lock on which base the query */
38568 + error = create_lp(dlm, name, &lp);
38572 + lp->req = DLM_LOCK_NL;
38573 + set_bit(LFL_IDLOCK, &lp->flags);
38574 + do_lock(lp, NULL);
38575 + wait_for_completion(&lp->uast_wait);
38577 + /* do query, repeating if insufficient space */
38579 + query = DLM_LOCK_THIS | DLM_QUERY_QUEUE_GRANTED |
38580 + DLM_QUERY_LOCKS_HIGHER;
38582 + for (s = 16; s < dlm->max_nodes + 1; s += 16) {
38584 + lki = kmalloc(s * sizeof(struct dlm_lockinfo), GFP_KERNEL);
38589 + memset(lki, 0, s * sizeof(struct dlm_lockinfo));
38590 + memset(&qinfo, 0, sizeof(qinfo));
38591 + qinfo.gqi_locksize = s;
38592 + qinfo.gqi_lockinfo = lki;
38594 + init_completion(&lp->uast_wait);
38595 + error = dlm_query(dlm->gdlm_lsp, &lp->lksb, query, &qinfo,
38596 + query_ast, (void *) lp);
38601 + wait_for_completion(&lp->uast_wait);
38602 + error = lp->lksb.sb_status;
38607 + if (error != -E2BIG)
38611 + /* check query results for blocking locks */
38613 + for (s = 0; s < qinfo.gqi_lockcount; s++) {
38615 + lki = &qinfo.gqi_lockinfo[s];
38617 + if (!ranges_overlap(*start, *end, lki->lki_grrange.ra_start,
38618 + lki->lki_grrange.ra_end))
38621 + if (lki->lki_node == dlm->our_nodeid)
38624 + if (lki->lki_grmode == DLM_LOCK_EX || *ex) {
38625 + *start = lki->lki_grrange.ra_start;
38626 + *end = lki->lki_grrange.ra_end;
38627 + *ex = (lki->lki_grmode == DLM_LOCK_EX) ? 1 : 0;
38628 + *rowner = lki->lki_node;
38634 + kfree(qinfo.gqi_lockinfo);
38643 +static int get_conflict_local(dlm_t *dlm, struct dlm_resource *r,
38644 + struct lm_lockname *name, unsigned long owner,
38645 + uint64_t *start, uint64_t *end, int *ex,
38646 + unsigned long *rowner)
38648 + struct posix_lock *po;
38649 + int found = FALSE;
38651 + list_for_each_entry(po, &r->locks, list) {
38652 + if (po->owner == owner)
38654 + if (!ranges_overlap(po->start, po->end, *start, *end))
38657 + if (*ex || po->ex) {
38658 + *start = po->start;
38661 + *rowner = po->owner;
38669 +int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
38670 + unsigned long owner, uint64_t *start, uint64_t *end,
38671 + int *ex, unsigned long *rowner)
38673 + dlm_t *dlm = (dlm_t *) lockspace;
38674 + struct dlm_resource *r;
38675 + int error, found;
38677 + error = get_resource(dlm, name, NO_CREATE, &r);
38680 + found = get_conflict_local(dlm, r, name, owner, start, end, ex,
38688 + error = get_conflict_global(dlm, name, owner, start, end, ex, rowner);
38693 +static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
38694 + struct lm_lockname *name, unsigned long owner,
38695 + uint64_t start, uint64_t end, int ex)
38697 + uint64_t get_start = start, get_end = end;
38698 + unsigned long get_owner = 0;
38699 + int get_ex = ex, error;
38701 + error = get_conflict_local(dlm, r, name, owner,
38702 + &get_start, &get_end, &get_ex, &get_owner);
38706 + error = get_conflict_global(dlm, name, owner,
38707 + &get_start, &get_end, &get_ex, &get_owner);
38709 + log_debug("check_conflict %d %"PRIx64"-%"PRIx64" %"PRIx64"-%"PRIx64" "
38710 + "ex %d %d own %lu %lu pid %u", error, start, end,
38711 + get_start, get_end, ex, get_ex, owner, get_owner,
38716 diff -urN linux-orig/fs/gfs_locking/lock_dlm/thread.c linux-patched/fs/gfs_locking/lock_dlm/thread.c
38717 --- linux-orig/fs/gfs_locking/lock_dlm/thread.c 1969-12-31 18:00:00.000000000 -0600
38718 +++ linux-patched/fs/gfs_locking/lock_dlm/thread.c 2004-06-16 12:03:17.967822065 -0500
38720 +/******************************************************************************
38721 +*******************************************************************************
38723 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
38724 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
38726 +** This copyrighted material is made available to anyone wishing to use,
38727 +** modify, copy, or redistribute it subject to the terms and conditions
38728 +** of the GNU General Public License v.2.
38730 +*******************************************************************************
38731 +******************************************************************************/
38733 +#include "lock_dlm.h"
38736 + * Run in dlm_async thread
38740 + * queue_submit - add lock request to queue for dlm_async thread
38743 + * A lock placed on this queue is re-submitted to DLM as soon as
38744 + * dlm_async thread gets to it.
38747 +static void queue_submit(dlm_lock_t *lp)
38749 + dlm_t *dlm = lp->dlm;
38751 + spin_lock(&dlm->async_lock);
38752 + list_add_tail(&lp->slist, &dlm->submit);
38753 + set_bit(LFL_SLIST, &lp->flags);
38754 + spin_unlock(&dlm->async_lock);
38755 + wake_up(&dlm->wait);
38759 + * process_blocking - processing of blocking callback
38764 +static void process_blocking(dlm_lock_t *lp, int bast_mode)
38766 + dlm_t *dlm = lp->dlm;
38769 + switch (make_lmstate(bast_mode)) {
38770 + case LM_ST_EXCLUSIVE:
38771 + cb = LM_CB_NEED_E;
38773 + case LM_ST_DEFERRED:
38774 + cb = LM_CB_NEED_D;
38776 + case LM_ST_SHARED:
38777 + cb = LM_CB_NEED_S;
38780 + DLM_ASSERT(0, printk("unknown bast mode %u\n", lp->bast_mode););
38783 + dlm->fscb(dlm->fsdata, cb, &lp->lockname);
38787 + * process_complete - processing of completion callback for a lock request
38792 +static void process_complete(dlm_lock_t *lp)
38794 + dlm_t *dlm = lp->dlm;
38795 + struct lm_async_cb acb;
38796 + int16_t prev_mode = lp->cur;
38798 + memset(&acb, 0, sizeof(acb));
38801 + * This is an AST for an unlock.
38804 + if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
38806 + /* FIXME: Add an assertion to catch NOFAIL promotions from
38807 + * non-NL modes? */
38809 + if (lp->lksb.sb_status == -DLM_ECANCEL) {
38811 + /* lp->cur remains the same, is there anything to clear
38812 + * or reset to put this lp into an "ordinary" state? */
38814 + printk("lock_dlm: -DLM_ECANCEL num=%x,%"PRIx64"\n",
38815 + lp->lockname.ln_type, lp->lockname.ln_number);
38817 + DLM_ASSERT(lp->lksb.sb_status == -DLM_EUNLOCK,
38818 + printk("num=%x,%"PRIx64" status=%d\n",
38819 + lp->lockname.ln_type,
38820 + lp->lockname.ln_number,
38821 + lp->lksb.sb_status););
38822 + lp->cur = DLM_LOCK_IV;
38825 + complete(&lp->uast_wait);
38830 + * A canceled lock request. The lock was just taken off the delayed
38831 + * list and was never even submitted to dlm.
38834 + if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
38835 + lp->req = lp->cur;
38836 + acb.lc_ret |= LM_OUT_CANCELED;
38841 + * An error occured.
38844 + if (lp->lksb.sb_status) {
38845 + lp->req = lp->cur;
38846 + if (lp->cur == DLM_LOCK_IV)
38847 + lp->lksb.sb_lkid = 0;
38849 + if ((lp->lksb.sb_status == -EAGAIN) &&
38850 + (lp->lkf & DLM_LKF_NOQUEUE)) {
38851 + /* a "normal" error */
38853 + printk("lock_dlm: process_complete error id=%x "
38854 + "status=%d\n", lp->lksb.sb_lkid,
38855 + lp->lksb.sb_status);
38860 + * This is an AST for an EX->EX conversion for sync_lvb from GFS.
38863 + if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
38864 + complete(&lp->uast_wait);
38869 + * A lock has been demoted to NL because it initially completed during
38870 + * BLOCK_LOCKS. Now it must be requested in the originally requested
38874 + if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
38876 + DLM_ASSERT(lp->req == DLM_LOCK_NL,);
38877 + DLM_ASSERT(lp->prev_req > DLM_LOCK_NL,);
38879 + lp->cur = DLM_LOCK_NL;
38880 + lp->req = lp->prev_req;
38881 + lp->prev_req = DLM_LOCK_IV;
38882 + lp->lkf &= ~DLM_LKF_CONVDEADLK;
38883 + lp->lkf |= DLM_LKF_QUECVT;
38885 + set_bit(LFL_NOCACHE, &lp->flags);
38887 + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
38888 + !test_bit(LFL_NOBLOCK, &lp->flags))
38889 + queue_delayed(lp, QUEUE_LOCKS_BLOCKED);
38891 + queue_submit(lp);
38896 + * A request is granted during dlm recovery. It may be granted
38897 + * because the locks of a failed node were cleared. In that case,
38898 + * there may be inconsistent data beneath this lock and we must wait
38899 + * for recovery to complete to use it. When gfs recovery is done this
38900 + * granted lock will be converted to NL and then reacquired in this
38904 + if (test_bit(DFL_BLOCK_LOCKS, &dlm->flags) &&
38905 + !test_bit(LFL_NOBLOCK, &lp->flags) &&
38906 + lp->req != DLM_LOCK_NL) {
38908 + lp->cur = lp->req;
38909 + lp->prev_req = lp->req;
38910 + lp->req = DLM_LOCK_NL;
38911 + lp->lkf |= DLM_LKF_CONVERT;
38912 + lp->lkf &= ~DLM_LKF_CONVDEADLK;
38913 + lp->lkf &= ~DLM_LKF_QUECVT;
38915 + set_bit(LFL_REREQUEST, &lp->flags);
38916 + queue_submit(lp);
38921 + * DLM demoted the lock to NL before it was granted so GFS must be
38922 + * told it cannot cache data for this lock.
38925 + if (lp->lksb.sb_flags == DLM_SBF_DEMOTED)
38926 + set_bit(LFL_NOCACHE, &lp->flags);
38931 + * This is an internal lock_dlm lock used for managing JIDs.
38934 + if (test_bit(LFL_IDLOCK, &lp->flags)) {
38935 + clear_bit(LFL_NOBLOCK, &lp->flags);
38936 + lp->cur = lp->req;
38937 + complete(&lp->uast_wait);
38942 + * Normal completion of a lock request. Tell GFS it now has the lock.
38945 + clear_bit(LFL_NOBLOCK, &lp->flags);
38946 + lp->cur = lp->req;
38948 + acb.lc_name = lp->lockname;
38949 + acb.lc_ret |= make_lmstate(lp->cur);
38951 + if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
38952 + (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
38953 + acb.lc_ret |= LM_OUT_CACHEABLE;
38955 + dlm->fscb(dlm->fsdata, LM_CB_ASYNC, &acb);
38959 + * no_work - determine if there's work for the dlm_async thread
38962 + * Returns: 1 if no work, 0 otherwise
38965 +static __inline__ int no_work(dlm_t * dlm)
38969 + spin_lock(&dlm->async_lock);
38971 + ret = list_empty(&dlm->complete) &&
38972 + list_empty(&dlm->blocking) &&
38973 + list_empty(&dlm->submit) &&
38974 + list_empty(&dlm->starts) && !test_bit(DFL_MG_FINISH, &dlm->flags);
38976 + spin_unlock(&dlm->async_lock);
38982 + * dlm_async - thread for a variety of asynchronous processing
38985 + * Returns: 0 on success, -EXXX on failure
38988 +static int dlm_async(void *data)
38990 + dlm_t *dlm = (dlm_t *) data;
38991 + dlm_lock_t *lp = NULL;
38992 + dlm_start_t *ds = NULL;
38993 + uint8_t complete, blocking, submit, start, finish;
38994 + DECLARE_WAITQUEUE(wait, current);
38996 + daemonize("lock_dlm");
38997 + atomic_inc(&dlm->threads);
39000 + current->state = TASK_INTERRUPTIBLE;
39001 + add_wait_queue(&dlm->wait, &wait);
39002 + if (no_work(dlm))
39004 + remove_wait_queue(&dlm->wait, &wait);
39005 + current->state = TASK_RUNNING;
39007 + complete = blocking = submit = start = finish = 0;
39009 + spin_lock(&dlm->async_lock);
39011 + if (!list_empty(&dlm->complete)) {
39012 + lp = list_entry(dlm->complete.next, dlm_lock_t, clist);
39013 + list_del(&lp->clist);
39014 + clear_bit(LFL_CLIST, &lp->flags);
39016 + } else if (!list_empty(&dlm->blocking)) {
39017 + lp = list_entry(dlm->blocking.next, dlm_lock_t, blist);
39018 + list_del(&lp->blist);
39019 + clear_bit(LFL_BLIST, &lp->flags);
39020 + blocking = lp->bast_mode;
39021 + lp->bast_mode = 0;
39022 + } else if (!list_empty(&dlm->submit)) {
39023 + lp = list_entry(dlm->submit.next, dlm_lock_t, slist);
39024 + list_del(&lp->slist);
39025 + clear_bit(LFL_SLIST, &lp->flags);
39027 + } else if (!list_empty(&dlm->starts)) {
39028 + ds = list_entry(dlm->starts.next, dlm_start_t, list);
39029 + list_del(&ds->list);
39031 + } else if (test_and_clear_bit(DFL_MG_FINISH, &dlm->flags)) {
39035 + spin_unlock(&dlm->async_lock);
39038 + process_complete(lp);
39040 + else if (blocking)
39041 + process_blocking(lp, blocking);
39044 + process_submit(lp);
39047 + process_start(dlm, ds);
39050 + process_finish(dlm);
39054 + while (!test_bit(DFL_THREAD_STOP, &dlm->flags));
39056 + atomic_dec(&dlm->threads);
39061 + * init_async_thread
39064 + * Returns: 0 on success, -EXXX on failure
39067 +int init_async_thread(dlm_t * dlm)
39071 + clear_bit(DFL_THREAD_STOP, &dlm->flags);
39072 + atomic_set(&dlm->threads, 0);
39074 + error = kernel_thread(dlm_async, dlm, 0);
39078 + error = kernel_thread(dlm_async, dlm, 0);
39080 + release_async_thread(dlm);
39084 + while (atomic_read(&dlm->threads) != 2)
39090 + printk("lock_dlm: can't start async thread %d\n", error);
39095 + * release_async_thread
39100 +void release_async_thread(dlm_t * dlm)
39102 + set_bit(DFL_THREAD_STOP, &dlm->flags);
39103 + while (atomic_read(&dlm->threads)) {
39104 + wake_up(&dlm->wait);
39108 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h
39109 --- linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h 1969-12-31 18:00:00.000000000 -0600
39110 +++ linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h 2004-06-16 12:03:21.956895230 -0500
39112 +/******************************************************************************
39113 +*******************************************************************************
39115 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
39116 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
39118 +** This copyrighted material is made available to anyone wishing to use,
39119 +** modify, copy, or redistribute it subject to the terms and conditions
39120 +** of the GNU General Public License v.2.
39122 +*******************************************************************************
39123 +******************************************************************************/
39124 +#ifndef __gio_wiretypes_h__
39125 +#define __gio_wiretypes_h__
39127 +/* an attempt to do something about tracking changes to the protocol over
39129 + * If I was really cute, this would be effectivily a checksum of this file.
39131 +#define GIO_WIREPROT_VERS (0x67000010)
39133 +/*****************Error codes.
39134 + * everyone uses these same error codes.
39136 +#define gio_Err_Ok (0)
39137 +#define gio_Err_BadLogin (1001)
39138 +#define gio_Err_BadCluster (1003)
39139 +#define gio_Err_BadConfig (1004)
39140 +#define gio_Err_BadGeneration (1005)
39141 +#define gio_Err_BadWireProto (1019)
39143 +#define gio_Err_NotAllowed (1006)
39144 +#define gio_Err_Unknown_Cs (1007)
39145 +#define gio_Err_BadStateChg (1008)
39146 +#define gio_Err_MemoryIssues (1009)
39148 +#define gio_Err_PushQu (1010) /* client should never see this one */
39149 +#define gio_Err_TryFailed (1011)
39150 +#define gio_Err_AlreadyPend (1013)
39151 +#define gio_Err_Canceled (1015)
39153 +#define gio_Err_NoSuchFS (1016)
39154 +#define gio_Err_NoSuchJID (1017)
39155 +#define gio_Err_NoSuchName (1018)
39157 +/* next free error code: 1002 1012 1014 1020 */
39160 + * Error: just sort of a generic error code thing.
39162 + * uint32: opcode that this is in reply to. (can be zeros)
39163 + * uint32: error code
39165 +#define gulm_err_reply (0x67455252) /* gERR */
39167 +#define gulm_nop (0x674e4f50) /* gNOP */
39169 +/********************* Core *****************/
39173 + * uint32: proto version
39174 + * string: cluster ID
39175 + * string: My Name
39176 + * uint64: generation number
39177 + * uint32: config CRC
39181 + * uint64: generation number
39182 + * uint32: error code
39185 + * If I am the Master or Arbitrating and there are no errors, A
39186 + * serialization of the current nodelist follows. And a client or slave
39187 + * is connecting (not resources).
39189 + * logout request:
39191 + * string: node name
39192 + * uint8: S/P/A/M/R
39193 + * logout reply: Don't seem to use this....
39195 + * uint32: error code
39197 + * resource login request:
39199 + * uint32: proto version
39200 + * string: cluster ID
39201 + * string: resource name
39202 + * uint32: options
39203 + * login reply (gCL1) is sent in return.
39207 + * string: My Name
39210 + * uint32: error code
39212 + * Membership Request
39214 + * string: node name
39216 + * Membership update
39218 + * string: node name
39220 + * uint8: Current State
39222 + * Membership list request info.
39225 + * Membership list info.
39227 + * list_start_marker
39228 + * string: node name
39231 + * uint8: laststate
39232 + * uint8: mode (S/P/A/M/C)
39233 + * uint32: missed beats
39234 + * uint64: last beat
39235 + * uint64: delay avg
39236 + * uint64: max delay
39237 + * list_stop_marker
39239 + * Request Resource info
39242 + * Resource list info
39244 + * list_start_marker
39246 + * list_stop_marker
39248 + * Force node into Expired:
39250 + * string: node name
39252 + * Core state request:
39255 + * Core state changes:
39257 + * uint8: state (slave, pending, arbitrating, master)
39258 + * If state == Slave, then the next two will follow.
39260 + * string: MasterName
39262 + * Core shutdown req:
39265 + * Switch core from current state into Pending:
39269 +#define gulm_core_login_req (0x67434c00) /* gCL0 */
39270 +#define gulm_core_login_rpl (0x67434c01) /* gCL1 */
39271 +#define gulm_core_logout_req (0x67434c02) /* gCL2 */
39272 +#define gulm_core_logout_rpl (0x67434c03) /* gCL3 */
39273 +#define gulm_core_reslgn_req (0x67434c04) /* gCL4 */
39274 +#define gulm_core_beat_req (0x67434200) /* gCB0 */
39275 +#define gulm_core_beat_rpl (0x67434201) /* gCB1 */
39276 +#define gulm_core_mbr_req (0x67434d41) /* gCMA */
39277 +#define gulm_core_mbr_updt (0x67434d55) /* gCMU */
39278 +#define gulm_core_mbr_lstreq (0x67434d6c) /* gCMl */
39279 +#define gulm_core_mbr_lstrpl (0x67434d4c) /* gCML */
39280 +#define gulm_core_mbr_force (0x67434645) /* gCFE */
39281 +#define gulm_core_res_req (0x67435200) /* gCR0 */
39282 +#define gulm_core_res_list (0x67435201) /* gCR1 */
39283 +#define gulm_core_state_req (0x67435352) /* gCSR */
39284 +#define gulm_core_state_chgs (0x67435343) /* gCSC */
39285 +#define gulm_core_shutdown (0x67435344) /* gCSD */
39286 +#define gulm_core_forcepend (0x67435350) /* gCSP */
39288 +/* in the st field */
39289 +#define gio_Mbr_Logged_in (0x05)
39290 +#define gio_Mbr_Logged_out (0x06)
39291 +#define gio_Mbr_Expired (0x07)
39292 +#define gio_Mbr_Killed (0x08)
39293 +#define gio_Mbr_OM_lgin (0x09)
39295 +/* in the ama field */
39296 +#define gio_Mbr_ama_Slave (0x01)
39297 +#define gio_Mbr_ama_Master (0x02)
39298 +#define gio_Mbr_ama_Pending (0x03)
39299 +#define gio_Mbr_ama_Arbitrating (0x04)
39300 +#define gio_Mbr_ama_Resource (0x05)
39301 +#define gio_Mbr_ama_Client (0x06)
39302 +/* the Client entery is ONLY for mode tracking.
39303 + * nodelist reply is the only place it is used.
39306 +/* options that affect behavors on services. (resources) */
39307 +#define gulm_svc_opt_important (0x00000001)
39309 +/********************* Info Traffic *****************
39311 + * Note that for many of these, they can be sent to all of the servers and
39312 + * will get sane replies. Some of these can only be sent to specific
39324 + * The stats reply is a set of string pairs. This way the server can send
39325 + * whatever things it wants, and the same client code will work for
39330 + * string: verb flags (with -/+) to [un]set
39332 + * We don't bother with a reply for this. If the server got it, it works.
39333 + * If it didn't, it cannot send an error back anyways.
39338 + * Tells the server to close this connection cleanly. We're done with
39339 + * it. This is *not* the same as loging out. You must login before you
39340 + * can logout. And many commands sent from gulm_tool happen without
39341 + * logging in. These commands would be useful for clients in many cases,
39342 + * so I don't want to put a close at the end of them, but if I don't,
39343 + * there will be error messages printed on the console when gulm_tool
39345 + * So we need a way to close a connection cleanly that has not been
39348 + * request slave list:
39350 + * slave list replay:
39354 + * uint32: poller idx
39357 +#define gulm_info_stats_req (0x67495300) /* gIS0 */
39358 +#define gulm_info_stats_rpl (0x67495301) /* gIS1 */
39359 +#define gulm_info_set_verbosity (0x67495600) /* gIV0 */
39360 +#define gulm_socket_close (0x67534300) /* gSC0 */
39361 +#define gulm_info_slave_list_req (0x67494c00) /* gIL0 */
39362 +#define gulm_info_slave_list_rpl (0x67494c01) /* gIL1 */
39364 +/********************* Lock Traffic *****************
39365 + * All lock traffic.
39369 + * uint32: proto version
39370 + * string: node name
39371 + * uint8: Client/Slave
39374 + * uint32: error code
39375 + * uint8: Slave/Master
39376 + * xdr of current lock state if no errors and master sending reply
39377 + * and you're a slave.
39384 + * select lockspace:
39386 + * raw: usually just four bytes for lockspace name.
39387 + * but can be most anything.
39394 + * raw: lvb -- Only exists if hasLVB flag is true.
39400 + * uint32: error code
39401 + * raw: lvb -- Only exists if hasLVB flag is true.
39403 + * lock state update:
39405 + * string: node name
39409 + * raw: lvb -- Only exists if hasLVB flag is true.
39415 + * raw: lvb -- Only exists if action is SyncLVB
39420 + * uint32: error code
39424 + * string: node name
39427 + * raw: lvb -- Only exists if action is SyncLVB
39429 + * Slave Update Rply: -- for both actions and requests.
39433 + * Drop lock Callback:
39438 + * Drop all locks callback: This is the highwater locks thing
39441 + * Drop expired locks:
39443 + * string: node name if NULL, then drap all exp for mask.
39444 + * raw: keymask if keymask & key == key, then dropexp on this lock.
39450 + * list start mark
39451 + * uint8: key length
39454 + * uint8: lvb length
39455 + * if lvb length > 0, raw: LVB
39456 + * uint32: Holder count
39457 + * list start mark
39458 + * string: holders
39460 + * uint32: LVB holder count
39461 + * list start mark
39462 + * string: LVB Holders
39464 + * uint32: Expired holder count
39465 + * list start mark
39466 + * string: ExpHolders
39471 +#define gulm_lock_login_req (0x674C4C00) /* gLL0 */
39472 +#define gulm_lock_login_rpl (0x674C4C01) /* gLL1 */
39473 +#define gulm_lock_logout_req (0x674C4C02) /* gLL2 */
39474 +#define gulm_lock_logout_rpl (0x674C4C03) /* gLL3 */
39475 +#define gulm_lock_sel_lckspc (0x674C5300) /* gLS0 */
39476 +#define gulm_lock_state_req (0x674C5200) /* gLR0 */
39477 +#define gulm_lock_state_rpl (0x674C5201) /* gLR1 */
39478 +#define gulm_lock_state_updt (0x674C5255) /* gLRU */
39479 +#define gulm_lock_action_req (0x674C4100) /* gLA0 */
39480 +#define gulm_lock_action_rpl (0x674C4101) /* gLA1 */
39481 +#define gulm_lock_action_updt (0x674C4155) /* gLAU */
39482 +#define gulm_lock_update_rpl (0x674c5552) /* gLUR */
39483 +#define gulm_lock_cb_state (0x674C4300) /* gLC0 */
39484 +#define gulm_lock_cb_dropall (0x674C4302) /* gLC2 */
39485 +#define gulm_lock_drop_exp (0x674C454F) /* gLEO */
39486 +#define gulm_lock_dump_req (0x674c4400) /* gLD0 */
39487 +#define gulm_lock_dump_rpl (0x674c4401) /* gLD1 */
39488 +#define gulm_lock_rerunqueues (0x674c5152) /* gLQR */
39490 +/* marks for the login */
39491 +#define gio_lck_st_Slave (0x00)
39492 +#define gio_lck_st_Client (0x01)
39494 +/* state change requests */
39495 +#define gio_lck_st_Unlock (0x00)
39496 +#define gio_lck_st_Exclusive (0x01)
39497 +#define gio_lck_st_Deferred (0x02)
39498 +#define gio_lck_st_Shared (0x03)
39500 +#define gio_lck_st_Cancel (0x09)
39501 +#define gio_lck_st_HoldLVB (0x0b)
39502 +#define gio_lck_st_UnHoldLVB (0x0c)
39503 +#define gio_lck_st_SyncLVB (0x0d)
39506 +#define gio_lck_fg_Do_CB (0x00000001)
39507 +#define gio_lck_fg_Try (0x00000002)
39508 +#define gio_lck_fg_Any (0x00000004)
39509 +#define gio_lck_fg_NoExp (0x00000008)
39510 +#define gio_lck_fg_hasLVB (0x00000010)
39511 +#define gio_lck_fg_Cachable (0x00000020)
39512 +#define gio_lck_fg_Piority (0x00000040)
39514 +#endif /*__gio_wiretypes_h__*/
39515 +/* vim: set ai cin et sw=3 ts=3 : */
39516 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm.h linux-patched/fs/gfs_locking/lock_gulm/gulm.h
39517 --- linux-orig/fs/gfs_locking/lock_gulm/gulm.h 1969-12-31 18:00:00.000000000 -0600
39518 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm.h 2004-06-16 12:03:21.957894998 -0500
39520 +/******************************************************************************
39521 +*******************************************************************************
39523 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
39524 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
39526 +** This copyrighted material is made available to anyone wishing to use,
39527 +** modify, copy, or redistribute it subject to the terms and conditions
39528 +** of the GNU General Public License v.2.
39530 +*******************************************************************************
39531 +******************************************************************************/
39533 +#ifndef GULM_DOT_H
39534 +#define GULM_DOT_H
39536 +#define GULM_RELEASE_NAME "v6.0.0"
39538 +#ifdef MODVERSIONS
39539 +#include <linux/modversions.h>
39540 +#endif /* MODVERSIONS */
39541 +#include <linux/module.h>
39542 +#include <linux/slab.h>
39543 +#include <linux/vmalloc.h>
39544 +#include <asm/uaccess.h>
39545 +#include <linux/spinlock.h>
39546 +#include <asm/atomic.h>
39547 +#include <linux/config.h>
39548 +#include <linux/version.h>
39549 +#include <linux/smp_lock.h>
39550 +#include <linux/ctype.h>
39551 +#include <linux/string.h>
39561 +#if (BITS_PER_LONG == 64)
39562 +#define PRIu64 "lu"
39563 +#define PRId64 "ld"
39564 +#define PRIo64 "lo"
39565 +#define PRIx64 "lx"
39566 +#define PRIX64 "lX"
39567 +#define SCNu64 "lu"
39568 +#define SCNd64 "ld"
39569 +#define SCNo64 "lo"
39570 +#define SCNx64 "lx"
39571 +#define SCNX64 "lX"
39573 +#define PRIu64 "Lu"
39574 +#define PRId64 "Ld"
39575 +#define PRIo64 "Lo"
39576 +#define PRIx64 "Lx"
39577 +#define PRIX64 "LX"
39578 +#define SCNu64 "Lu"
39579 +#define SCNd64 "Ld"
39580 +#define SCNo64 "Lo"
39581 +#define SCNx64 "Lx"
39582 +#define SCNX64 "LX"
39585 +#include <linux/list.h>
39588 +#define MAX(a,b) ((a>b)?a:b)
39591 +#define MIN(a,b) ((a<b)?a:b)
39593 +/* Extern Macro */
39596 +#define EXTERN extern
39601 +#define INIT(X) =X
39604 +/* Static Macro */
39605 +#ifndef DEBUG_SYMBOLS
39606 +#define STATIC static
39611 +/* Divide x by y. Round up if there is a remainder. */
39612 +#define DIV_RU(x, y) (((x) + (y) - 1) / (y))
39614 +#include <linux/lm_interface.h>
39616 +#include "gulm_prints.h"
39618 +#include "libgulm.h"
39620 +#include "handler.h"
39622 +/* Some fixed length constants.
39623 + * Some of these should be made dynamic in size in the future.
39625 +#define GIO_KEY_SIZE (46)
39626 +#define GIO_LVB_SIZE (32)
39627 +#define GIO_NAME_SIZE (32)
39628 +#define GIO_NAME_LEN (GIO_NAME_SIZE-1)
39630 +/* What we know about this filesytem */
39631 +struct gulm_fs_s {
39632 + struct list_head fs_list;
39633 + char fs_name[GIO_NAME_SIZE]; /* lock table name */
39635 + lm_callback_t cb; /* file system callback function */
39636 + lm_fsdata_t *fsdata; /* private file system data */
39638 + callback_qu_t cq;
39641 + uint32_t lvb_size;
39643 + struct semaphore get_lock; /* I am not 100% sure this is needed.
39644 + * But it only hurts performance,
39645 + * not correctness if it is
39646 + * useless. Sometime post52, need
39647 + * to investigate.
39650 + /* Stuff for the first mounter lock and state */
39651 + int firstmounting;
39652 + /* the recovery done func needs to behave slightly differnt when we are
39653 + * the first node in an fs.
39656 + void *mountlock; /* this lock holds the Firstmounter state of the FS */
39657 + /* this is because all lock traffic is async, and really at this point
39658 + * in time we want a sync behavor, so I'm left with doing something to
39661 + * this works, but it is crufty, but I don't want to build a huge
39662 + * queuing system for one lock that we touch twice at the beginning and
39663 + * once on the end.
39665 + * I should change the firstmounter lock to work like the journal locks
39666 + * and the node locks do. Things are a lot cleaner now with the libgulm
39667 + * interface than before. (when the firstmounter lock code was written)
39669 + struct completion sleep;
39671 + /* Stuff for JID mapping locks */
39672 + uint32_t JIDcount; /* how many JID locks are there. */
39674 +typedef struct gulm_fs_s gulm_fs_t;
39676 +/* What we know about each locktable.
39677 + * only one now-a-days. (the LTPX)
39679 +typedef struct lock_table_s {
39680 + uint32_t magic_one;
39683 + struct task_struct *recver_task;
39684 + struct completion startup;
39685 + struct semaphore sender;
39687 + struct task_struct *sender_task;
39688 + wait_queue_head_t send_wchan;
39689 + spinlock_t queue_sender;
39690 + struct list_head to_be_sent;
39693 + spinlock_t *hshlk;
39694 + struct list_head *lkhsh;
39697 + * it may be wise to make some of these into atomic numbers.
39698 + * or something. or not.
39700 + uint32_t locks_total;
39701 + uint32_t locks_unl;
39702 + uint32_t locks_exl;
39703 + uint32_t locks_shd;
39704 + uint32_t locks_dfr;
39705 + uint32_t locks_lvbs;
39706 + atomic_t locks_pending;
39707 + /* cannot count expired here. clients don't know this */
39709 + uint32_t lops; /* just incr on each op */
39713 +typedef struct gulm_cm_s {
39714 + uint8_t myName[64];
39715 + uint8_t clusterID[256]; /* doesn't need to be 256. */
39716 + uint8_t loaded; /* True|False whether we grabbed the config data */
39719 + uint32_t handler_threads; /* howmany to have */
39720 + uint32_t verbosity;
39722 + uint64_t GenerationID;
39724 + lock_table_t ltpx;
39726 + gulm_interface_p hookup;
39730 +/* things about each lock. */
39731 +typedef struct gulm_lock_s {
39732 + struct list_head gl_list;
39735 + uint32_t magic_one;
39736 + gulm_fs_t *fs; /* which filesystem we belong to. */
39737 + uint8_t key[GIO_KEY_SIZE];
39739 + uint8_t last_suc_state; /* last state we succesfully got. */
39742 + /* this is true when there is a lock request sent out for this lock.
39743 + * All it really means is that if we've lost the master, and reconnect
39744 + * to another, this lock needs to have it's request resent.
39746 + * This now has two stages. Since a lock could be pending, but still in
39747 + * the send queue. So we don't want to resend requests that haven't
39750 + * we don't handle the master losses here any more. LTPX does that for
39751 + * us. Should consider removing the dupicated code then.
39753 + int actuallypending; /* may need to be atomic */
39754 + int in_to_be_sent;
39756 + enum { glck_nothing, glck_action, glck_state } req_type;
39757 + /* these three for the lock req. We save them here so we can rebuild
39758 + * the lock request if there was a server failover. (?still needed?)
39760 + unsigned int cur_state;
39761 + unsigned int req_state;
39762 + unsigned int flags;
39764 + /* these three for actions. First is the action, next is result, last is
39765 + * what threads wait on for the reply.
39768 + int result; /* ok, both are using this. */
39769 + struct completion actsleep;
39773 +/*****************************************************************************/
39774 +/* cross pollenate prototypes */
39776 +/* from gulm_lt.c */
39777 +void lt_logout (void);
39778 +int lt_login (void);
39779 +int get_mount_lock (gulm_fs_t * fs, int *first);
39780 +int downgrade_mount_lock (gulm_fs_t * fs);
39781 +int drop_mount_lock (gulm_fs_t * fs);
39782 +int send_drop_all_exp (lock_table_t * lt);
39783 +int send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name);
39785 +/*from gulm_core.c */
39786 +void cm_logout (void);
39787 +int cm_login (void);
39788 +void delete_ipnames (struct list_head *namelist);
39790 +/* from gulm_fs.c */
39791 +void init_gulm_fs (void);
39792 +void request_journal_replay (uint8_t * name);
39793 +void passup_droplocks (void);
39794 +gulm_fs_t *get_fs_by_name (uint8_t * name);
39795 +void dump_internal_lists (void);
39796 +void gulm_recovery_done (lm_lockspace_t * lockspace,
39797 + unsigned int jid, unsigned int message);
39798 +void gulm_unmount (lm_lockspace_t * lockspace);
39799 +void gulm_others_may_mount (lm_lockspace_t * lockspace);
39800 +int gulm_mount (char *table_name, char *host_data,
39801 + lm_callback_t cb, lm_fsdata_t * fsdata,
39802 + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
39804 +extern struct lm_lockops gulm_ops;
39806 +#endif /* GULM_DOT_H */
39807 +/* vim: set ai cin noet sw=8 ts=8 : */
39808 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c
39809 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c 1969-12-31 18:00:00.000000000 -0600
39810 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c 2004-06-16 12:03:21.957894998 -0500
39812 +/******************************************************************************
39813 +*******************************************************************************
39815 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
39816 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
39818 +** This copyrighted material is made available to anyone wishing to use,
39819 +** modify, copy, or redistribute it subject to the terms and conditions
39820 +** of the GNU General Public License v.2.
39822 +*******************************************************************************
39823 +******************************************************************************/
39827 +#include <linux/kernel.h>
39828 +#include <linux/fs.h>
39829 +#include <linux/slab.h>
39830 +#include <linux/file.h>
39831 +#define __KERNEL_SYSCALLS__
39832 +#include <linux/unistd.h>
39835 +#include "utils_tostr.h"
39837 +extern gulm_cm_t gulm_cm;
39839 +/* private vars. */
39840 +int cm_thd_running;
39841 +struct completion cm_thd_startup;
39842 +struct task_struct *cm_thd_task;
39847 +gulm_core_login_reply (void *misc, uint64_t gen, uint32_t error,
39848 + uint32_t rank, uint8_t corestate)
39850 + if (error != 0) {
39851 + log_err ("Core returned error %d:%s.\n", error,
39852 + gio_Err_to_str (error));
39853 + cm_thd_running = FALSE;
39857 + if( gulm_cm.GenerationID != 0 ) {
39858 + GULM_ASSERT(gulm_cm.GenerationID == gen,
39859 + printk("us: %"PRIu64" them: %"PRIu64"\n",
39860 + gulm_cm.GenerationID,gen);
39863 + gulm_cm.GenerationID = gen;
39865 + error = lt_login ();
39866 + if (error != 0) {
39867 + log_err ("lt_login failed. %d\n", error);
39868 + lg_core_logout (gulm_cm.hookup); /* XXX is this safe? */
39872 + log_msg (lgm_Network2, "Logged into local core.\n");
39878 + * gulm_core_logout_reply -
39885 +gulm_core_logout_reply (void *misc)
39887 + log_msg (lgm_Network2, "Logged out of local core.\n");
39894 +gulm_core_nodechange (void *misc, char *nodename,
39895 + struct in6_addr *nodeip, uint8_t nodestate)
39897 + if (nodestate == lg_core_Fenced) {
39898 + request_journal_replay (nodename);
39900 + /* if me and state is logout, Need to close out things if we can.
39902 + if (gulm_cm.starts && nodestate == lg_core_Logged_out &&
39903 + strcmp(gulm_cm.myName, nodename) == 0 ) {
39905 + cm_thd_running = FALSE;
39906 + lg_core_logout (gulm_cm.hookup);
39912 +int gulm_core_statechange (void *misc, uint8_t corestate,
39913 + struct in6_addr *masterip, char *mastername)
39915 + int *cst = (int *)misc;
39916 + if( misc != NULL ) {
39917 + if( corestate != lg_core_Slave &&
39918 + corestate != lg_core_Master ) {
39930 +gulm_core_error (void *misc, uint32_t err)
39932 + log_err ("Got error code %d %#x back fome some reason!\n", err, err);
39936 +static lg_core_callbacks_t core_cb = {
39937 + login_reply:gulm_core_login_reply,
39938 + logout_reply:gulm_core_logout_reply,
39939 + nodechange:gulm_core_nodechange,
39940 + statechange:gulm_core_statechange,
39941 + error:gulm_core_error
39945 + * cm_io_recving_thread -
39952 +cm_io_recving_thread (void *data)
39956 + daemonize ("gulm_res_recvd");
39957 + cm_thd_task = current;
39958 + complete (&cm_thd_startup);
39960 + while (cm_thd_running) {
39961 + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL);
39964 + ("Got an error in gulm_res_recvd err: %d\n", err);
39965 + if (!cm_thd_running)
39968 + * Pause a bit, then try to log back into the local
39969 + * lock_gulmd. Keep doing this until an outside force
39970 + * stops us. (which I don't think there is any at this
39971 + * point. forceunmount would be one, if we ever do
39974 + * If we are still in the gulm_mount() function, we
39975 + * should not retry. We should just exit.
39977 + current->state = TASK_INTERRUPTIBLE;
39978 + schedule_timeout (3 * HZ);
39981 + lg_core_login (gulm_cm.hookup, TRUE)) != 0) {
39983 + ("Got a %d trying to login to lock_gulmd. Is it running?\n",
39985 + current->state = TASK_INTERRUPTIBLE;
39986 + schedule_timeout (3 * HZ);
39989 + } /* while( gulm_cm.cm_thd_running ) */
39991 + complete (&cm_thd_startup);
40002 + if (cm_thd_running) {
40003 + cm_thd_running = FALSE;
40004 + lg_core_logout (gulm_cm.hookup);
40006 + /* wait for thread to finish */
40007 + wait_for_completion (&cm_thd_startup);
40023 + cm_thd_running = FALSE;
40024 + init_completion (&cm_thd_startup);
40026 + err = lg_core_login (gulm_cm.hookup, TRUE);
40029 + ("Got a %d trying to login to lock_gulmd. Is it running?\n",
40033 + /* handle login reply. which will start the lt thread. */
40034 + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, NULL);
40039 + /* do not pass go until Slave(client) or Master */
40041 + lg_core_corestate(gulm_cm.hookup);
40042 + err = lg_core_handle_messages (gulm_cm.hookup, &core_cb, &cst);
40047 + current->state = TASK_INTERRUPTIBLE;
40048 + schedule_timeout (3 * HZ);
40049 + /* if interrupted, exit */
40053 + /* start recver thread. */
40054 + cm_thd_running = TRUE;
40055 + err = kernel_thread (cm_io_recving_thread, NULL, 0);
40057 + log_err ("Failed to start gulm_res_recvd. (%d)\n", err);
40060 + wait_for_completion (&cm_thd_startup);
40066 +/* vim: set ai cin noet sw=8 ts=8 : */
40067 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c
40068 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c 1969-12-31 18:00:00.000000000 -0600
40069 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c 2004-06-16 12:03:21.957894998 -0500
40071 +/******************************************************************************
40072 +*******************************************************************************
40074 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
40075 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
40077 +** This copyrighted material is made available to anyone wishing to use,
40078 +** modify, copy, or redistribute it subject to the terms and conditions
40079 +** of the GNU General Public License v.2.
40081 +*******************************************************************************
40082 +******************************************************************************/
40086 +#include <linux/kernel.h>
40087 +#include <linux/fs.h>
40088 +#include <linux/slab.h>
40089 +#include <linux/file.h>
40090 +#define __KERNEL_SYSCALLS__
40091 +#include <linux/unistd.h>
40094 +#include "load_info.h"
40095 +#include "handler.h"
40096 +#include "gulm_procinfo.h"
40097 +#include "gulm_jid.h"
40099 +/* things about myself */
40100 +extern gulm_cm_t gulm_cm;
40102 +/* globals for this file.*/
40103 +uint32_t filesystems_count = 0;
40104 +LIST_HEAD (filesystems_list);
40105 +struct semaphore filesystem_lck; /* we use a sema instead of a spin here because
40106 + * all of the interruptible things we do inside
40108 + * If i stop doing nasty things within this it doesn't need
40111 +struct semaphore start_stop_lock;
40112 +atomic_t start_stop_cnt;
40118 +init_gulm_fs (void)
40120 + init_MUTEX (&filesystem_lck);
40121 + init_MUTEX (&start_stop_lock);
40122 + atomic_set (&start_stop_cnt, 0);
40125 +/*****************************************************************************/
40132 +request_journal_replay_per_fs (void *d)
40134 + struct rjrpf_s *rf = (struct rjrpf_s *) d;
40136 + unsigned int ujid;
40138 + /* lookup jid <=> name mapping */
40139 + if (find_jid_by_name_and_mark_replay (rf->fs, rf->name, &jid) != 0) {
40140 + log_msg (lgm_JIDMap,
40141 + "In fs (%s), no jid for name (%s) was found.\n",
40142 + rf->fs->fs_name, rf->name);
40144 + log_msg (lgm_JIDMap,
40145 + "In fs (%s), jid %d was found for name (%s).\n",
40146 + rf->fs->fs_name, jid, rf->name);
40148 + /* all that the replay journal call back into gfs does is malloc
40149 + * some memory and add it to a list. So we really don't need to
40150 + * queue that action. Since that is what gfs is doing.
40152 + * This will need to change if gfs changes.
40154 + * Basically, we assume that the callback is non-blocking.
40157 + rf->fs->cb (rf->fs->fsdata, LM_CB_NEED_RECOVERY, &ujid);
40160 + kfree (rf->name);
40166 + * request_journal_replay - give a journal replay request to mounted filesystems
40167 + * @name: < the name of the node that died.
40173 +request_journal_replay (uint8_t * name)
40175 + struct list_head *tmp;
40177 + struct rjrpf_s *rf;
40179 + log_msg (lgm_Always, "Checking for journals for node \"%s\"\n",
40182 + down (&filesystem_lck);
40184 + list_for_each (tmp, &filesystems_list) {
40185 + fs = list_entry (tmp, gulm_fs_t, fs_list);
40187 + /* we don't want to process replay requests when we are
40188 + * still in the first mounter state. All the journals are
40189 + * getting replayed anyways, and there could be some issue
40190 + * with stuff happening twice.
40192 + if (fs->firstmounting)
40195 + /* due to the way the new jid mapping code works, we had to
40196 + * move it out of here.
40199 + rf = kmalloc (sizeof (struct rjrpf_s), GFP_KERNEL);
40200 + GULM_ASSERT (rf != NULL,);
40203 + rf->name = kmalloc (strlen (name) + 1, GFP_KERNEL);
40204 + GULM_ASSERT (rf->name != NULL,);
40205 + memcpy (rf->name, name, strlen (name) + 1);
40207 + qu_function_call (&fs->cq, request_journal_replay_per_fs, rf);
40210 + up (&filesystem_lck);
40214 + * passup_droplocks -
40217 +passup_droplocks (void)
40219 + struct list_head *tmp;
40221 + down (&filesystem_lck);
40222 + list_for_each (tmp, &filesystems_list) {
40223 + fs = list_entry (tmp, gulm_fs_t, fs_list);
40224 + qu_drop_req (&fs->cq, fs->cb, fs->fsdata, LM_CB_DROPLOCKS, 0,
40226 + /* If this decides to block someday, we need to change this function.
40229 + up (&filesystem_lck);
40233 + * dump_internal_lists -
40237 +dump_internal_lists (void)
40239 + struct list_head *tmp;
40241 + down (&filesystem_lck);
40242 + list_for_each (tmp, &filesystems_list) {
40243 + fs = list_entry (tmp, gulm_fs_t, fs_list);
40244 + log_msg (lgm_Always, "Handler queue for %s\n", fs->fs_name);
40245 + display_handler_queue (&fs->cq);
40246 + /* other lists? */
40248 + up (&filesystem_lck);
40252 + * get_fs_by_name -
40256 + * Returns: gulm_fs_t
40259 +get_fs_by_name (uint8_t * name)
40261 + struct list_head *tmp;
40262 + gulm_fs_t *fs = NULL;
40263 + down (&filesystem_lck);
40264 + list_for_each (tmp, &filesystems_list) {
40265 + fs = list_entry (tmp, gulm_fs_t, fs_list);
40266 + if (strcmp (name, fs->fs_name) == 0) {
40267 + up (&filesystem_lck);
40271 + up (&filesystem_lck);
40275 +/*****************************************************************************/
40280 + * quick check to see if there was leaking
40281 + * should I panic on these? or just complain?
40286 +clear_locks (void)
40289 + lock_table_t *lt = &gulm_cm.ltpx;
40291 + for (i = 0; i < lt->hashbuckets; i++) {
40292 + struct list_head *lcktmp, *lckfoo;
40293 + spin_lock (<->hshlk[i]);
40294 + list_for_each_safe (lcktmp, lckfoo, <->lkhsh[i]) {
40295 + gulm_lock_t *lck = NULL;
40296 + lck = list_entry (lcktmp, gulm_lock_t, gl_list);
40297 + /* need to relelase it. umm, should any even exist? */
40298 + log_err ("AH! Rogue lock buffer! refcount:%d\n",
40299 + atomic_read (&lck->count));
40302 + log_err ("AH! Rogue lock buffer with LVB!\n");
40303 + kfree (lck->lvb);
40306 + list_del (lcktmp);
40310 + spin_unlock (<->hshlk[i]);
40312 + kfree (lt->hshlk);
40313 + lt->hshlk = NULL;
40314 + kfree (lt->lkhsh);
40315 + lt->lkhsh = NULL;
40318 +/*****************************************************************************/
40320 + * start_gulm_threads -
40327 +start_gulm_threads (char *csnm, char *host_data)
40331 + down (&start_stop_lock);
40332 + atomic_inc (&start_stop_cnt);
40333 + if (atomic_read (&start_stop_cnt) == 1) {
40334 + /* first one. get stuff going */
40335 + strncpy (gulm_cm.clusterID, csnm, 255);
40336 + gulm_cm.clusterID[255] = '\0';
40338 + error = lg_initialize (&gulm_cm.hookup, gulm_cm.clusterID,
40339 + "GFS Kernel Interface");
40340 + if (error != 0) {
40341 + log_err ("lg_initialize failed, %d\n", error);
40344 + gulm_cm.starts = TRUE;
40346 + error = load_info (host_data);
40347 + if (error != 0) {
40348 + log_err ("load_info failed. %d\n", error);
40354 + error = cm_login ();
40355 + if (error != 0) {
40356 + log_err ("cm_login failed. %d\n", error);
40360 + /* lt_login() is called after the success packet for cm_login()
40365 + up (&start_stop_lock);
40370 + * stop_gulm_threads -
40373 +stop_gulm_threads (void)
40375 + down (&start_stop_lock);
40376 + atomic_dec (&start_stop_cnt);
40377 + if (atomic_read (&start_stop_cnt) == 0) {
40378 + /* last one, put it all away. */
40382 + lg_release (gulm_cm.hookup);
40383 + gulm_cm.hookup = NULL;
40384 + gulm_cm.loaded = FALSE;
40385 + gulm_cm.GenerationID = 0;
40387 + up (&start_stop_lock);
40390 +/*****************************************************************************/
40394 + * @table_name: clusterID:FS_Name
40396 + * @cb: GFS callback function
40397 + * @fsdata: opaque GFS handle
40398 + * @lockstruct: the structure of crap to fill in
40400 + * Returns: 0 on success, -EXXX on failure
40403 +gulm_mount (char *table_name, char *host_data,
40404 + lm_callback_t cb, lm_fsdata_t * fsdata,
40405 + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
40408 + char work[256], *tbln;
40411 + struct list_head *lltmp;
40413 + strncpy (work, table_name, 256);
40415 + tbln = strstr (work, ":");
40416 + if (tbln == NULL) {
40418 + ("Malformed table name. Couldn't find separator ':' between "
40419 + "clusterID and lockspace name.\n");
40425 + /* make sure that the cluster name exists. */
40426 + if (strlen (work) <= 0) {
40427 + log_err ("Cluster name \"%s\" is too short.\n", work);
40431 + if (strlen (work) > 16) {
40432 + log_err ("Cluster name \"%s\" is too long.\n", work);
40437 + /* the second one is an artifact of the way I use the name.
40438 + * A better fix to this will happen when I actually get dynamic key
40439 + * lengths working.
40441 + if (strlen (tbln) > MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13))) {
40443 + ("Warning! lockspace name (%s) is longer than %d chars!\n",
40444 + tbln, MIN (GIO_NAME_LEN, (GIO_KEY_SIZE - 13)));
40448 + if (strlen (tbln) <= 0) {
40449 + log_err ("Table name \"%s\" is too short.\n", tbln);
40454 + /* Check to make sure this lock table isn't already being used */
40455 + down (&filesystem_lck);
40456 + list_for_each (lltmp, &filesystems_list) {
40457 + gulm = list_entry (lltmp, gulm_fs_t, fs_list);
40458 + if (!strncmp (gulm->fs_name, tbln, GIO_NAME_LEN)) {
40459 + log_err ("\"%s\" is already in use\n", tbln);
40461 + up (&filesystem_lck);
40465 + up (&filesystem_lck);
40467 + /* Set up our main structure */
40469 + gulm = kmalloc (sizeof (gulm_fs_t), GFP_KERNEL);
40471 + log_err ("out of memory\n");
40475 + memset (gulm, 0, sizeof (gulm_fs_t));
40477 + INIT_LIST_HEAD (&gulm->fs_list);
40479 + strncpy (gulm->fs_name, tbln, GIO_NAME_LEN);
40481 + gulm->fsdata = fsdata;
40482 + gulm->lvb_size = min_lvb_size;
40483 + init_completion (&gulm->sleep);
40484 + init_MUTEX (&gulm->get_lock);
40486 + if ((error = start_gulm_threads (work, host_data)) != 0) {
40487 + log_err ("Got a %d trying to start the threads.\n", error);
40488 + goto fail_free_gulm;
40492 + start_callback_qu (&gulm->cq, gulm_cm.handler_threads)) < 0) {
40493 + log_err ("fsid=%s: Failed to start the callback handler.\n",
40495 + goto fail_free_gulm;
40498 + /* the mount lock HAS to be the first thing done in the LTs for this fs. */
40499 + error = get_mount_lock (gulm, &first);
40500 + if (error != 0) {
40502 + ("fsid=%s: Error %d while trying to get the mount lock\n",
40503 + gulm->fs_name, error);
40504 + goto fail_callback;
40507 + jid_lockstate_reserve (gulm, first);
40508 + jid_fs_init (gulm);
40509 + get_journalID (gulm);
40511 + /* things act a bit different until the first mounter is finished.
40514 + gulm->firstmounting = TRUE;
40517 + down (&filesystem_lck);
40518 + list_add (&gulm->fs_list, &filesystems_list);
40519 + filesystems_count++;
40520 + up (&filesystem_lck);
40522 + log_msg (lgm_JIDMap, "fsid=%s: We will be using jid %d\n",
40523 + gulm->fs_name, gulm->fsJID);
40525 + if (add_to_proc (gulm) != 0) {
40526 + /* ignored for now */
40529 + lockstruct->ls_jid = gulm->fsJID;
40530 + lockstruct->ls_first = first;
40531 + lockstruct->ls_lvb_size = gulm->lvb_size;
40532 + lockstruct->ls_lockspace = gulm;
40533 + lockstruct->ls_ops = &gulm_ops;
40534 +#ifdef USE_SYNC_LOCKING
40535 + lockstruct->ls_flags = 0;
40537 + log_msg (lgm_Network2, "Done: %s, sync mode\n", table_name);
40539 + lockstruct->ls_flags = LM_LSFLAG_ASYNC;
40541 + log_msg (lgm_Network2, "Done: %s, async mode\n", table_name);
40544 + gulm_cm.starts = FALSE;
40548 + stop_callback_qu (&gulm->cq);
40552 + stop_gulm_threads ();
40556 + gulm_cm.starts = FALSE;
40557 + log_msg (lgm_Always, "fsid=%s: Exiting gulm_mount with errors %d\n",
40558 + table_name, error);
40563 + * gulm_others_may_mount
40564 + * @lockspace: handle to specific lock space
40566 + * GFS calls this function if it was the first mounter after it's done
40567 + * checking all the journals.
40571 +gulm_others_may_mount (lm_lockspace_t * lockspace)
40573 + gulm_fs_t *fs = (gulm_fs_t *) lockspace;
40575 + lock_table_t *lt = &gulm_cm.ltpx;
40577 + /* first send the drop all exp message.
40579 + err = send_drop_exp (fs, lt, NULL);
40582 + ("fsid=%s: Problems sending DropExp request to LTPX: %d\n",
40583 + fs->fs_name, err);
40585 + /* then move the FirstMountLock to shared so others can mount. */
40586 + err = downgrade_mount_lock (fs);
40589 + log_err ("fsid=%s: error sending Fs_FinMount_Req.(%d)\n",
40590 + fs->fs_name, err);
40593 + /* first mounter is all done. let the gulm_recovery_done function
40594 + * behave as normal now.
40596 + fs->firstmounting = FALSE;
40601 + * @lockspace: handle to specific lock space
40605 +gulm_unmount (lm_lockspace_t * lockspace)
40607 + gulm_fs_t *gulm_fs = (gulm_fs_t *) lockspace;
40609 + down (&filesystem_lck);
40610 + list_del (&gulm_fs->fs_list);
40611 + --filesystems_count;
40612 + up (&filesystem_lck);
40614 + /* close and release stuff */
40615 + drop_mount_lock (gulm_fs);
40616 + put_journalID (gulm_fs);
40617 + jid_fs_release (gulm_fs);
40618 + jid_lockstate_release (gulm_fs);
40620 + stop_callback_qu (&gulm_fs->cq);
40622 + remove_from_proc (gulm_fs);
40626 + stop_gulm_threads ();
40631 + * gulm_recovery_done -
40638 +gulm_recovery_done (lm_lockspace_t * lockspace, unsigned int jid,
40639 + unsigned int message)
40641 + gulm_fs_t *fs = (gulm_fs_t *) lockspace;
40643 + uint8_t name[256];
40645 + if (message != LM_RD_SUCCESS) {
40646 + /* Need to start thinking about how I want to use this... */
40650 + if (jid == fs->fsJID) { /* this may be drifting crud through. */
40651 + /* hey! its me! */
40652 + strncpy (name, gulm_cm.myName, 256);
40653 + } else if (lookup_name_by_jid (fs, jid, name) != 0) {
40654 + log_msg (lgm_JIDMap,
40655 + "fsid=%s: Could not find a client for jid %d\n",
40656 + fs->fs_name, jid);
40659 + if (strlen (name) == 0) {
40660 + log_msg (lgm_JIDMap, "fsid=%s: No one mapped to jid %d\n",
40661 + fs->fs_name, jid);
40664 + log_msg (lgm_JIDMap, "fsid=%s: Found %s for jid %d\n",
40665 + fs->fs_name, name, jid);
40667 + err = send_drop_exp (fs, &gulm_cm.ltpx, name);
40669 + if (jid != fs->fsJID) {
40670 + /* rather dumb to do this to ourselves right after we mount... */
40671 + log_msg (lgm_JIDMap,
40672 + "fsid=%s: Clearing JID %d for use by others\n",
40673 + fs->fs_name, jid);
40674 + release_JID (fs, jid, FALSE);
40677 + /* If someone died while replaying someoneelse's journal, there will be
40678 + * stale expired jids.
40680 + check_for_stale_expires (fs);
40683 +/* vim: set ai cin noet sw=8 ts=8 : */
40684 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c
40685 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c 1969-12-31 18:00:00.000000000 -0600
40686 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c 2004-06-16 12:03:21.957894998 -0500
40688 +/******************************************************************************
40689 +*******************************************************************************
40691 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
40692 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
40694 +** This copyrighted material is made available to anyone wishing to use,
40695 +** modify, copy, or redistribute it subject to the terms and conditions
40696 +** of the GNU General Public License v.2.
40698 +*******************************************************************************
40699 +******************************************************************************/
40703 +#include <linux/kernel.h>
40704 +#include <linux/fs.h>
40705 +#include <linux/slab.h>
40706 +#include <linux/file.h>
40707 +#define __KERNEL_SYSCALLS__
40708 +#include <linux/unistd.h>
40712 +extern gulm_cm_t gulm_cm;
40714 +/****************************************************************************/
40718 + * Header lock: "JHeader" + \0\0\0 + fsname
40719 + * lvb: <uint32> :number of JIDs
40720 + * Mappinglock: "JM" + <uint32> + \0\0\0\0 + fsname
40721 + * lvb: [012] + <node name>
40723 + * 1: replaying journal
40725 + * list lock : "JL" + "listlock" + fsname
40726 + * Node Locks : "JN" + <nodename[8]> + fsname
40729 +#define jid_header_lvb_size (8)
40731 +struct jid_lookup_item_s {
40732 + struct list_head jp_list;
40737 + struct completion waitforit;
40739 +typedef struct jid_lookup_item_s jid_lookup_item_t;
40741 +LIST_HEAD (jid_pending_locks);
40742 +spinlock_t jid_pending;
40743 +struct semaphore jid_listlock;
40751 + spin_lock_init (&jid_pending);
40752 + init_MUTEX (&jid_listlock);
40756 + * jid_get_header_name -
40761 + * key is buffer to write to, keylen is size of buffer on input, and real
40762 + * length on output.
40767 +jid_get_header_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen)
40770 + len = strlen (fsname);
40771 + if ((len + 11) > *keylen)
40773 + memcpy (key, "JHeader\0\0\0", 10);
40774 + memcpy (&key[10], fsname, len + 1);
40775 + *keylen = len + 11;
40780 +jid_get_listlock_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen)
40783 + len = strlen (fsname);
40784 + if ((len + 11) > *keylen)
40786 + memcpy (key, "JLlistlock", 10);
40787 + memcpy (&key[10], fsname, len + 1);
40788 + *keylen = len + 11;
40793 + * jid_get_lock_name -
40799 + * key is buffer to write to, keylen is size of buffer on input, and real
40800 + * length on output.
40805 +jid_get_lock_name (uint8_t * fsname, uint32_t jid, uint8_t * key,
40806 + uint16_t * keylen)
40809 + len = strlen (fsname);
40810 + if ((len + 11) > *keylen)
40814 + key[5] = (jid >> 24) & 0xff;
40815 + key[4] = (jid >> 16) & 0xff;
40816 + key[3] = (jid >> 8) & 0xff;
40817 + key[2] = (jid >> 0) & 0xff;
40822 + memcpy (&key[10], fsname, len + 1);
40823 + *keylen = len + 11;
40835 +jid_hold_lvb (uint8_t * key, uint16_t keylen)
40837 + jid_lookup_item_t jp;
40838 + GULM_ASSERT (keylen > 6,);
40840 + jp.keylen = keylen;
40843 + INIT_LIST_HEAD (&jp.jp_list);
40844 + init_completion (&jp.waitforit);
40846 + spin_lock (&jid_pending);
40847 + list_add (&jp.jp_list, &jid_pending_locks);
40848 + spin_unlock (&jid_pending);
40850 + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_HoldLVB,
40853 + wait_for_completion (&jp.waitforit);
40857 +jid_unhold_lvb (uint8_t * key, uint16_t keylen)
40859 + jid_lookup_item_t jp;
40860 + GULM_ASSERT (keylen > 6,);
40862 + jp.keylen = keylen;
40865 + INIT_LIST_HEAD (&jp.jp_list);
40866 + init_completion (&jp.waitforit);
40868 + spin_lock (&jid_pending);
40869 + list_add (&jp.jp_list, &jid_pending_locks);
40870 + spin_unlock (&jid_pending);
40872 + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_UnHoldLVB,
40875 + wait_for_completion (&jp.waitforit);
40879 +jid_sync_lvb (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
40881 + jid_lookup_item_t jp;
40882 + GULM_ASSERT (keylen > 6,);
40884 + jp.keylen = keylen;
40887 + INIT_LIST_HEAD (&jp.jp_list);
40888 + init_completion (&jp.waitforit);
40890 + spin_lock (&jid_pending);
40891 + list_add (&jp.jp_list, &jid_pending_locks);
40892 + spin_unlock (&jid_pending);
40894 + lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_SyncLVB,
40897 + wait_for_completion (&jp.waitforit);
40901 + * jid_action_reply -
40905 + * called from the lock handler callback.
40910 +jid_action_reply (uint8_t * key, uint16_t keylen)
40912 + struct list_head *tmp, *nxt;
40913 + jid_lookup_item_t *jp, *fnd = NULL;
40914 + spin_lock (&jid_pending);
40915 + list_for_each_safe (tmp, nxt, &jid_pending_locks) {
40916 + jp = list_entry (tmp, jid_lookup_item_t, jp_list);
40917 + if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
40923 + spin_unlock (&jid_pending);
40926 + complete (&fnd->waitforit);
40930 + * jid_get_lock_state_inr -
40941 +jid_get_lock_state_inr (uint8_t * key, uint16_t keylen, uint8_t state,
40942 + uint32_t flags, uint8_t * lvb, uint16_t lvblen)
40944 + jid_lookup_item_t jp;
40945 + GULM_ASSERT (keylen > 6,);
40947 + jp.keylen = keylen;
40949 + jp.lvblen = lvblen;
40950 + INIT_LIST_HEAD (&jp.jp_list);
40951 + init_completion (&jp.waitforit);
40953 + spin_lock (&jid_pending);
40954 + list_add (&jp.jp_list, &jid_pending_locks);
40955 + spin_unlock (&jid_pending);
40957 + lg_lock_state_req (gulm_cm.hookup, key, keylen, state, flags, lvb, lvblen);
40959 + wait_for_completion (&jp.waitforit);
40963 + * jid_get_lock_state_lvb -
40973 +jid_get_lock_state_lvb (uint8_t * key, uint16_t keylen, uint8_t state,
40974 + uint8_t * lvb, uint16_t lvblen)
40976 + jid_get_lock_state_inr (key, keylen, state, 0, lvb, lvblen);
40979 + * jid_get_lock_state -
40987 +jid_get_lock_state (uint8_t * key, uint16_t keylen, uint8_t state)
40989 + jid_get_lock_state_inr (key, keylen, state, 0, NULL, 0);
40993 + * jid_state_reply -
41002 +jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
41004 + struct list_head *tmp, *nxt;
41005 + jid_lookup_item_t *jp, *fnd = NULL;
41006 + spin_lock (&jid_pending);
41007 + list_for_each_safe (tmp, nxt, &jid_pending_locks) {
41008 + jp = list_entry (tmp, jid_lookup_item_t, jp_list);
41009 + if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
41015 + spin_unlock (&jid_pending);
41017 + if (fnd != NULL) {
41018 + if (lvb != NULL && fnd->lvb != NULL)
41019 + memcpy (fnd->lvb, lvb, MIN (fnd->lvblen, lvblen));
41020 + complete (&fnd->waitforit);
41024 +/****************************************************************************/
41027 + * jid_hold_list_lock -
41030 + * only make one call to this per node.
41035 +jid_hold_list_lock (gulm_fs_t * fs)
41037 + uint8_t key[GIO_KEY_SIZE];
41040 + down (&jid_listlock);
41042 + keylen = sizeof (key);
41043 + jid_get_listlock_name (fs->fs_name, key, &keylen);
41044 + jid_get_lock_state (key, keylen, lg_lock_state_Exclusive);
41049 + * jid_release_list_lock -
41056 +jid_release_list_lock (gulm_fs_t * fs)
41058 + uint8_t key[GIO_KEY_SIZE];
41061 + keylen = sizeof (key);
41062 + jid_get_listlock_name (fs->fs_name, key, &keylen);
41063 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41065 + up (&jid_listlock);
41069 + * jid_rehold_lvbs -
41075 +jid_rehold_lvbs (gulm_fs_t * fs)
41078 + uint32_t oldjcnt;
41079 + uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size];
41080 + uint16_t keylen = GIO_KEY_SIZE;
41082 + oldjcnt = fs->JIDcount;
41084 + jid_get_header_name (fs->fs_name, key, &keylen);
41085 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb,
41086 + jid_header_lvb_size);
41087 + fs->JIDcount = (uint32_t) (lvb[0]) << 0;
41088 + fs->JIDcount |= (uint32_t) (lvb[1]) << 8;
41089 + fs->JIDcount |= (uint32_t) (lvb[2]) << 16;
41090 + fs->JIDcount |= (uint32_t) (lvb[3]) << 24;
41092 + for (i = oldjcnt; i < fs->JIDcount; i++) {
41093 + keylen = sizeof (key);
41094 + jid_get_lock_name (fs->fs_name, i, key, &keylen);
41095 + jid_hold_lvb (key, keylen);
41101 +jid_grow_space (gulm_fs_t * fs)
41103 + uint8_t key[GIO_KEY_SIZE], lvb[jid_header_lvb_size];
41104 + uint16_t keylen = GIO_KEY_SIZE;
41107 + keylen = sizeof (key);
41108 + jid_get_header_name (fs->fs_name, key, &keylen);
41109 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb,
41110 + jid_header_lvb_size);
41111 + jidc = (uint32_t) (lvb[0]) << 0;
41112 + jidc |= (uint32_t) (lvb[1]) << 8;
41113 + jidc |= (uint32_t) (lvb[2]) << 16;
41114 + jidc |= (uint32_t) (lvb[3]) << 24;
41116 + lvb[3] = (jidc >> 24) & 0xff;
41117 + lvb[2] = (jidc >> 16) & 0xff;
41118 + lvb[1] = (jidc >> 8) & 0xff;
41119 + lvb[0] = (jidc >> 0) & 0xff;
41120 + jid_sync_lvb (key, keylen, lvb, jid_header_lvb_size);
41121 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41122 + /* do an unlock here, so that when rehold grabs it shared, there is no
41126 + jid_rehold_lvbs (fs);
41130 + * lookup_name_by_jid -
41139 +lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name)
41141 + uint8_t key[GIO_KEY_SIZE], lvb[64];
41142 + uint16_t keylen = 64;
41145 + if (jid >= fs->JIDcount) {
41150 + jid_hold_list_lock (fs);
41152 + jid_get_lock_name (fs->fs_name, jid, key, &keylen);
41153 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, 64);
41155 + if (lvb[0] != 0) {
41156 + memcpy (name, &lvb[1], strlen (&lvb[1]) + 1);
41161 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41163 + jid_release_list_lock (fs);
41174 + * actually may only need to et first byte to zero
41179 +release_JID (gulm_fs_t * fs, uint32_t jid, int nop)
41181 + uint8_t key[GIO_KEY_SIZE], lvb[64];
41182 + uint16_t keylen = 64;
41184 + /* there is no such, so this becomes a nop. */
41185 + if (jid >= fs->JIDcount)
41188 + jid_hold_list_lock (fs);
41190 + jid_get_lock_name (fs->fs_name, jid, key, &keylen);
41191 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb, 64);
41193 + jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2);
41194 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41196 + jid_release_list_lock (fs);
41203 +put_journalID (gulm_fs_t * fs)
41205 + release_JID (fs, fs->fsJID, TRUE);
41209 + * get_journalID -
41213 + * This is broken.
41218 +get_journalID (gulm_fs_t * fs)
41221 + uint8_t key[GIO_KEY_SIZE], lvb[64];
41223 + int first_clear = -1;
41226 + jid_hold_list_lock (fs);
41228 + /* find an empty space, or ourselves again */
41229 + for (i = 0; i < fs->JIDcount; i++) {
41230 + keylen = sizeof (key);
41231 + jid_get_lock_name (fs->fs_name, i, key, &keylen);
41232 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41234 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41235 + if (first_clear == -1 && lvb[0] == 0 ) {
41237 + } else if (strcmp (gulm_cm.myName, &lvb[1]) == 0) {
41242 + if (first_clear >= 0) {
41243 + /* take the jid we have found */
41244 + keylen = sizeof (key);
41245 + jid_get_lock_name (fs->fs_name, first_clear, key, &keylen);
41246 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41249 + memcpy (&lvb[1], gulm_cm.myName, strlen (gulm_cm.myName) + 1);
41250 + jid_sync_lvb (key, keylen, lvb, strlen (gulm_cm.myName) + 2);
41251 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41253 + fs->fsJID = first_clear;
41256 + /* unlock the header lock */
41257 + jid_release_list_lock (fs);
41259 + if (first_clear < 0) {
41260 + /* nothing found, grow and try again. */
41261 + jid_grow_space (fs);
41268 + * find_jid_by_name_and_mark_replay -
41277 +find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name,
41280 + uint32_t i, found = -1;
41281 + uint8_t key[GIO_KEY_SIZE], lvb[64];
41284 + /* grab list lock */
41285 + jid_hold_list_lock (fs);
41287 + for (i = 0; i < fs->JIDcount; i++) {
41288 + keylen = sizeof (key);
41289 + jid_get_lock_name (fs->fs_name, i, key, &keylen);
41290 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
41292 + if (strcmp (name, &lvb[1]) == 0) {
41296 + jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2);
41297 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41300 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41303 + /* unlock the list lock */
41304 + jid_release_list_lock (fs);
41310 + * Check_for_replays -
41317 +check_for_stale_expires (gulm_fs_t * fs)
41320 + uint8_t key[GIO_KEY_SIZE], lvb[64];
41322 + unsigned int ujid;
41324 + /* grab list lock */
41325 + jid_hold_list_lock (fs);
41327 + for (i = 0; i < fs->JIDcount; i++) {
41328 + keylen = sizeof (key);
41329 + jid_get_lock_name (fs->fs_name, i, key, &keylen);
41330 + jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb,
41332 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41334 + if (lvb[0] == 1) {
41335 + log_msg (lgm_JIDMap,
41336 + "fsid=%s: stale JID %d found\n",
41339 + fs->cb (fs->fsdata, LM_CB_NEED_RECOVERY, &ujid);
41343 + /* unlock the list lock */
41344 + jid_release_list_lock (fs);
41353 +jid_fs_init (gulm_fs_t * fs)
41355 + uint8_t key[GIO_KEY_SIZE];
41356 + uint16_t keylen = GIO_KEY_SIZE;
41358 + fs->JIDcount = 0;
41360 + jid_get_header_name (fs->fs_name, key, &keylen);
41361 + jid_hold_lvb (key, keylen);
41362 + jid_rehold_lvbs (fs);
41366 + * jid_fs_release -
41371 +jid_fs_release (gulm_fs_t * fs)
41374 + uint8_t key[GIO_KEY_SIZE];
41376 + for (i = 0; i < fs->JIDcount; i++) {
41377 + keylen = sizeof (key);
41378 + jid_get_lock_name (fs->fs_name, i, key, &keylen);
41379 + jid_unhold_lvb (key, keylen);
41381 + keylen = sizeof (key);
41382 + jid_get_header_name (fs->fs_name, key, &keylen);
41383 + jid_unhold_lvb (key, keylen);
41384 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41388 + * jid_unlock_callback -
41391 + * *MUST* be called from a Handler thread.
41396 +jid_unlock_callback (void *d)
41398 + gulm_fs_t *fs = (gulm_fs_t *) d;
41399 + jid_rehold_lvbs (fs);
41403 + * jid_header_lock_drop -
41410 +jid_header_lock_drop (uint8_t * key, uint16_t keylen)
41413 + /* make sure this is the header lock.... */
41414 + if (key[1] == 'H' && (fs = get_fs_by_name (&key[10])) != NULL) {
41415 + qu_function_call (&fs->cq, jid_unlock_callback, fs);
41419 +/****************************************************************************/
41421 + * jid_get_lsresv_name -
41430 +jid_get_lsresv_name (char *fsname, uint8_t * key, uint16_t * keylen)
41436 + len = strlen (gulm_cm.myName) + 1;
41437 + memset (&key[2], 0, 8);
41438 + memcpy ((&key[2]), gulm_cm.myName, MIN (len, 8));
41439 + /* fsname starts at byte 10 so the dropexp pattern will find it. */
41440 + memcpy ((&key[10]), fsname, strlen (fsname) + 1);
41442 + *keylen = 10 + strlen (fsname) + 1;
41448 + * jid_lockstate_reserve -
41455 +jid_lockstate_reserve (gulm_fs_t * fs, int first)
41457 + uint8_t key[GIO_KEY_SIZE];
41460 + jid_get_lsresv_name (fs->fs_name, key, &keylen);
41462 + /* if we are expired, this will block until someone else has cleaned our
41465 + * Will very well may need to put in some kind of timeout otherwise this
41466 + * may do a forever lockup much like the FirstMounter lock had.
41468 + jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
41469 + first?lg_lock_flag_IgnoreExp:0, NULL, 0);
41474 + * jid_lockstate_release -
41481 +jid_lockstate_release (gulm_fs_t * fs)
41483 + uint8_t key[GIO_KEY_SIZE];
41486 + jid_get_lsresv_name (fs->fs_name, key, &keylen);
41488 + jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
41493 +/* vim: set ai cin noet sw=8 ts=8 : */
41494 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h
41495 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h 1969-12-31 18:00:00.000000000 -0600
41496 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h 2004-06-16 12:03:21.957894998 -0500
41498 +/******************************************************************************
41499 +*******************************************************************************
41501 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
41502 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
41504 +** This copyrighted material is made available to anyone wishing to use,
41505 +** modify, copy, or redistribute it subject to the terms and conditions
41506 +** of the GNU General Public License v.2.
41508 +*******************************************************************************
41509 +******************************************************************************/
41511 +#ifndef __GULM_JID_H__
41512 +#define __GULM_JID_H__
41514 +void jid_init (void);
41515 +void jid_fs_init (gulm_fs_t * fs);
41516 +void jid_fs_release (gulm_fs_t * fs);
41517 +int get_journalID (gulm_fs_t * fs);
41518 +int lookup_jid_by_name (gulm_fs_t * fs, uint8_t * name, uint32_t * injid);
41519 +int lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name);
41520 +void release_JID (gulm_fs_t * fs, uint32_t jid, int owner);
41521 +void put_journalID (gulm_fs_t * fs);
41522 +void check_for_stale_expires (gulm_fs_t * fs);
41525 + find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, uint32_t * jid);
41527 +void jid_start_journal_reply (gulm_fs_t * fs, uint32_t jid);
41528 +void jid_finish_journal_reply (gulm_fs_t * fs, uint32_t jid);
41530 +void jid_lockstate_reserve (gulm_fs_t * fs, int first);
41531 +void jid_lockstate_release (gulm_fs_t * fs);
41533 +/* to be called from the lg_lock callbacks. */
41534 +void jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb,
41535 + uint16_t lvblen);
41536 +void jid_action_reply (uint8_t * key, uint16_t keylen);
41537 +void jid_header_lock_drop (uint8_t * key, uint16_t keylen);
41538 +#endif /*__GULM_JID_H__*/
41539 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h
41540 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 1969-12-31 18:00:00.000000000 -0600
41541 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 2004-06-16 12:03:21.957894998 -0500
41543 +/******************************************************************************
41544 +*******************************************************************************
41546 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
41547 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
41549 +** This copyrighted material is made available to anyone wishing to use,
41550 +** modify, copy, or redistribute it subject to the terms and conditions
41551 +** of the GNU General Public License v.2.
41553 +*******************************************************************************
41554 +******************************************************************************/
41556 +#ifndef __gulm_log_msg_bits_h__
41557 +#define __gulm_log_msg_bits_h__
41558 +/* log_msg bit flags
41559 + * These got thier own file so I can easily include them in both user and
41562 +#define lgm_Always (0x00000000) /*Print Message no matter what */
41563 +#define lgm_Network (0x00000001)
41564 +#define lgm_Network2 (0x00000002)
41565 +#define lgm_Stomith (0x00000004)
41566 +#define lgm_Heartbeat (0x00000008)
41567 +#define lgm_locking (0x00000010)
41568 +#define lgm_FuncDebug (0x00000020)
41569 +#define lgm_Forking (0x00000040)
41570 +#define lgm_JIDMap (0x00000080)
41571 +#define lgm_Subscribers (0x00000100)
41572 +#define lgm_LockUpdates (0x00000200)
41573 +#define lgm_LoginLoops (0x00000400)
41574 +#define lgm_Network3 (0x00000800)
41575 +#define lgm_JIDUpdates (0x00001000)
41576 +#define lgm_ServerState (0x00002000)
41578 +#define lgm_ReallyAll (0xffffffff)
41580 +#define lgm_BitFieldSize (32)
41582 +#endif /*__gulm_log_msg_bits_h__*/
41583 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c
41584 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c 1969-12-31 18:00:00.000000000 -0600
41585 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c 2004-06-16 12:03:21.957894998 -0500
41587 +/******************************************************************************
41588 +*******************************************************************************
41590 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
41591 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
41593 +** This copyrighted material is made available to anyone wishing to use,
41594 +** modify, copy, or redistribute it subject to the terms and conditions
41595 +** of the GNU General Public License v.2.
41597 +*******************************************************************************
41598 +******************************************************************************/
41602 +#include <linux/kernel.h>
41603 +#include <linux/fs.h>
41604 +#include <linux/slab.h>
41605 +#include <linux/file.h>
41606 +#define __KERNEL_SYSCALLS__
41607 +#include <linux/unistd.h>
41610 +#include "handler.h"
41611 +#include "utils_tostr.h"
41612 +#include "gulm_jid.h"
41614 +extern gulm_cm_t gulm_cm;
41616 +/****************************************************************************/
41617 +/* A bunch of prints that hopefully contain more information that is also
41620 + * these are a mess.
41624 + * lck_key_to_hex -
41627 + * @workspace: <> place to put string. !! better be 2x len !!
41633 +lck_key_to_hex (uint8_t * key, uint16_t len, char *workspace)
41636 + for (i = 0; i < len; i++)
41637 + sprintf (&workspace[i * 2], "%02x", (key[i] & 0xff));
41638 + return workspace;
41641 +static void __inline__
41642 +db_lck_entered (gulm_lock_t * lck)
41644 + char bb[GIO_KEY_SIZE * 2 + 3];
41645 + lck_key_to_hex (lck->key, lck->keylen, bb);
41646 + printk ("Started lock 0x%s cur:%#x req:%#x flags:%#x\n", bb,
41647 + lck->cur_state, lck->req_state, lck->flags);
41649 +static void __inline__
41650 +db_lck_exited (gulm_lock_t * lck)
41652 + char bb[GIO_KEY_SIZE * 2 + 3];
41653 + lck_key_to_hex (lck->key, lck->keylen, bb);
41654 + printk ("Finished lock 0x%s result:%#x\n", bb, lck->result);
41657 +static void __inline__
41658 +dump_gulm_lock_t (gulm_lock_t * lck)
41660 + char bb[GIO_KEY_SIZE * 2 + 3];
41662 + lck_key_to_hex (lck->key, lck->keylen, bb);
41663 + log_msg (lgm_Always, " key = 0x%s\n", bb);
41664 + log_msg (lgm_Always, " req_type = %#x\n", lck->req_type);
41665 + log_msg (lgm_Always, " last_suc_state = %#x\n", lck->last_suc_state);
41666 + log_msg (lgm_Always, " actuallypending = %d\n", lck->actuallypending);
41667 + log_msg (lgm_Always, " in_to_be_sent = %d\n", lck->in_to_be_sent);
41668 + log_msg (lgm_Always, " cur_state = %d\n", lck->cur_state);
41669 + log_msg (lgm_Always, " req_state = %d\n", lck->req_state);
41670 + log_msg (lgm_Always, " flags = %#x\n", lck->flags);
41671 + log_msg (lgm_Always, " action = %d\n", lck->action);
41672 + log_msg (lgm_Always, " result = %d\n", lck->result);
41675 +/* DEBUG_BY_LOCK is gone. I may later add something back if needed.
41677 + * I love the idea of being able to log only certain locks, I just cannot
41678 + * think of an easy way to do it. The best I can come up with is some
41679 + * pattern (or set of) that are used to decide which locks get logged. But
41680 + * that could be expensive if the pattern is checked everytime, and won't
41681 + * behave as expected if only applied in get_lock.
41684 +/* The old log functions.
41685 + * These need their own sort of clean up someday as well.
41687 +#define log_msg_lk(key, keylen, fmt, args...) {\
41688 + uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41689 + lck_key_to_hex( key, keylen, bb); \
41690 + printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \
41693 +#define log_err_lk(key, keylen, fmt, args...) {\
41694 + uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41695 + lck_key_to_hex( key, keylen, bb); \
41696 + printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \
41699 +#define log_msg_lck(lck, fmt, args...) {\
41700 + uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41701 + lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \
41702 + printk(PROTO_NAME ": On lock 0x%s " fmt , bb , ## args ); \
41705 +#define log_err_lck(lck, fmt, args...) {\
41706 + uint8_t bb[GIO_KEY_SIZE*2 +3]; \
41707 + lck_key_to_hex( (lck)->key, (lck)->keylen, bb); \
41708 + printk(KERN_ERR PROTO_NAME ": ERROR On lock 0x%s " fmt , bb , ## args ); \
41712 +static void __inline__
41713 +print_lk_lvb (uint8_t * key, uint8_t * lvb, uint8_t st, uint8_t * dir)
41715 + uint8_t bk[GIO_KEY_SIZE * 2 + 3];
41716 + uint8_t bl[GIO_LVB_SIZE * 2 + 3];
41718 + for (i = 0; i < GIO_KEY_SIZE; i++)
41719 + sprintf (&bk[(i * 2)], "%02x", (key[i]) & 0xff);
41720 + for (i = 0; i < GIO_LVB_SIZE; i++)
41721 + sprintf (&bl[(i * 2)], "%02x", (lvb[i]) & 0xff);
41722 + printk (PROTO_NAME ": On lock 0x%s with state %d\n\t%s LVB 0x%s\n",
41723 + bk, st, dir, bl);
41726 +#define lvb_log_msg_lk(k, fmt, args...) log_msg_lk( k , fmt , ## args )
41727 +#define lvb_log_msg(fmt, args...) log_msg(lgm_Always , fmt , ## args )
41728 +#else /*DEBUG_LVB */
41729 +#define print_lk_lvb(k,l,s,d)
41730 +#define lvb_log_msg_lk(k, fmt, args...)
41731 +#define lvb_log_msg(fmt, args...)
41732 +#endif /*DEBUG_LVB */
41734 +/****************************************************************************/
41736 + * find_and_mark_lock -
41741 + * looks for a lock struct of key. If found, marks it.
41743 + * Returns: TRUE or FALSE
41746 +find_and_mark_lock (uint8_t * key, uint8_t keylen, gulm_lock_t ** lockp)
41748 + int found = FALSE;
41750 + gulm_lock_t *lck = NULL;
41751 + struct list_head *tmp;
41753 + /* now find the lock */
41754 + bkt = hash_lock_key (key, keylen);
41755 + bkt %= gulm_cm.ltpx.hashbuckets;
41757 + spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
41758 + list_for_each (tmp, &gulm_cm.ltpx.lkhsh[bkt]) {
41759 + lck = list_entry (tmp, gulm_lock_t, gl_list);
41760 + if (memcmp (lck->key, key, keylen) == 0) {
41762 + atomic_inc (&lck->count);
41766 + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
41778 + * like above, but since we have the lock, don't search for it.
41783 +mark_lock (gulm_lock_t * lck)
41785 + atomic_inc (&lck->count);
41789 + * unmark_and_release_lock -
41792 + * decrement the counter on a lock, freeing it if it reaches 0.
41793 + * (also removes it from the hash table)
41795 + * TRUE if lock was freed.
41797 + * Returns: TRUE or FALSE
41800 +unmark_and_release_lock (gulm_lock_t * lck)
41803 + int deld = FALSE;
41805 + bkt = hash_lock_key (lck->key, lck->keylen);
41806 + bkt %= gulm_cm.ltpx.hashbuckets;
41807 + spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
41808 + if (atomic_dec_and_test (&lck->count)) {
41809 + list_del (&lck->gl_list);
41812 + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
41814 + gulm_cm.ltpx.locks_total--;
41815 + gulm_cm.ltpx.locks_unl--;
41816 + if (lck->lvb != NULL) {
41817 + kfree (lck->lvb);
41825 +/****************************************************************************/
41828 +gulm_key_to_lm_lockname (uint8_t * key, struct lm_lockname *lockname)
41830 + (*lockname).ln_number = (u64) (key[9]) << 0;
41831 + (*lockname).ln_number |= (u64) (key[8]) << 8;
41832 + (*lockname).ln_number |= (u64) (key[7]) << 16;
41833 + (*lockname).ln_number |= (u64) (key[6]) << 24;
41834 + (*lockname).ln_number |= (u64) (key[5]) << 32;
41835 + (*lockname).ln_number |= (u64) (key[4]) << 40;
41836 + (*lockname).ln_number |= (u64) (key[3]) << 48;
41837 + (*lockname).ln_number |= (u64) (key[2]) << 56;
41838 + (*lockname).ln_type = key[1];
41842 +do_drop_lock_req (gulm_fs_t * fs, uint8_t state, uint8_t key[GIO_KEY_SIZE])
41844 + unsigned int type;
41845 + struct lm_lockname lockname;
41846 + /* i might want to shove most of this function into the new lockcallback
41851 + /* don't do callbacks on the gulm mount lock.
41852 + * I need to someday come up with a cleaner way of seperating the
41853 + * firstmounter lock and the rest of gfs's locks.
41854 + * i duno, this first byte is pretty clean.
41856 + if (key[0] != 'G') {
41861 + case lg_lock_state_Unlock:
41862 + type = LM_CB_DROPLOCKS;
41864 + case lg_lock_state_Exclusive:
41865 + type = LM_CB_NEED_E;
41867 + case lg_lock_state_Shared:
41868 + type = LM_CB_NEED_S;
41870 + case lg_lock_state_Deferred:
41871 + type = LM_CB_NEED_D;
41874 + type = LM_CB_DROPLOCKS;
41877 + gulm_key_to_lm_lockname (key, &lockname);
41879 + qu_drop_req (&fs->cq, fs->cb, fs->fsdata, type,
41880 + lockname.ln_type, lockname.ln_number);
41884 + * send_async_reply -
41891 +send_async_reply (gulm_lock_t * lck)
41893 + gulm_fs_t *fs = lck->fs;
41894 + struct lm_lockname lockname;
41896 + if (lck->key[0] == 'F') {
41897 + /* whee! it is the first mounter lock. two things:
41898 + * A: gfs could care less about this.
41899 + * B: we need to up the sleeper in the fs. (hack)
41901 + complete (&fs->sleep);
41905 + gulm_key_to_lm_lockname (lck->key, &lockname);
41907 + qu_async_rpl (&fs->cq, fs->cb, fs->fsdata, &lockname, lck->result);
41911 + * send_drop_exp_inter -
41919 +send_drop_exp_inter (gulm_fs_t * fs, lock_table_t * lt, char *name)
41922 + uint8_t mask[GIO_KEY_SIZE];
41924 + memset (mask, 0, GIO_KEY_SIZE);
41925 + /* pack key mask */
41926 + mask[0] = 0xff; /* minor lock type. 'G', 'F', 'J'. */
41927 + mask[1] = 0xff; /* GFS lock type. */
41928 + mask[2] = 0xff; /* next 8 are lock number */
41936 + /* Now stick the fsname into the remaining space. */
41937 + len = strlen (fs->fs_name);
41938 + strncpy (&mask[10], fs->fs_name, GIO_KEY_SIZE - 16);
41939 + len += 11; /* 10 for the encoded buf, 1 for the '\0' after the fs name */
41941 + err = lg_lock_drop_exp (gulm_cm.hookup, name, mask, len);
41947 + * send_lock_action -
41954 +send_lock_action (gulm_lock_t * lck, uint8_t action)
41958 + GULM_ASSERT (lck->req_type == glck_action, dump_gulm_lock_t (lck););
41960 + err = lg_lock_action_req (gulm_cm.hookup, lck->key, lck->keylen, action,
41961 + lck->lvb, lck->fs->lvb_size);
41963 + log_err ("Issues sending action request. %d\n", err);
41969 + * send_lock_req -
41976 +send_lock_req (gulm_lock_t * lck)
41978 + gulm_fs_t *fs = lck->fs;
41980 + uint32_t flags = 0;
41983 + GULM_ASSERT (lck->req_type == glck_state, dump_gulm_lock_t (lck););
41985 + switch (lck->req_state) {
41986 + case LM_ST_EXCLUSIVE:
41987 + state = lg_lock_state_Exclusive;
41989 + case LM_ST_DEFERRED:
41990 + state = lg_lock_state_Deferred;
41992 + case LM_ST_SHARED:
41993 + state = lg_lock_state_Shared;
41995 + case LM_ST_UNLOCKED:
41996 + state = lg_lock_state_Unlock;
41999 + GULM_ASSERT (0, log_err ("fsid=%s: Anit no lock state %d.\n",
42000 + fs->fs_name, lck->req_state););
42003 + if (lck->flags & LM_FLAG_TRY) {
42004 + flags |= lg_lock_flag_Try;
42006 + if (lck->flags & LM_FLAG_TRY_1CB) {
42007 + flags |= lg_lock_flag_Try | lg_lock_flag_DoCB;
42009 + if (lck->flags & LM_FLAG_NOEXP) {
42010 + flags |= lg_lock_flag_IgnoreExp;
42012 + if (lck->flags & LM_FLAG_ANY) {
42013 + flags |= lg_lock_flag_Any;
42015 + if (lck->flags & LM_FLAG_PRIORITY) {
42016 + flags |= lg_lock_flag_Piority;
42018 + if (lck->lvb != NULL) {
42019 + print_lk_lvb (lck->key, lck->lvb, lck->req_state, "Sending");
42022 + err = lg_lock_state_req (gulm_cm.hookup, lck->key, lck->keylen,
42023 + state, flags, lck->lvb, lck->fs->lvb_size);
42025 + log_err ("Issues sending state request. %d\n", err);
42031 + * toggle_lock_counters -
42033 + * called after a succesful request to change lock state. Decrements
42034 + * counts for what the lock was, and increments for what it is now.
42037 +toggle_lock_counters (lock_table_t * lt, int old, int new)
42039 + /* what we had it in */
42041 + case LM_ST_EXCLUSIVE:
42044 + case LM_ST_DEFERRED:
42047 + case LM_ST_SHARED:
42050 + case LM_ST_UNLOCKED:
42054 + /* what we have it in */
42056 + case LM_ST_EXCLUSIVE:
42059 + case LM_ST_DEFERRED:
42062 + case LM_ST_SHARED:
42065 + case LM_ST_UNLOCKED:
42072 + * calc_lock_result -
42078 + * This calculates the correct result to return for gfs lock requests.
42083 +calc_lock_result (gulm_lock_t * lck,
42084 + uint8_t state, uint32_t error, uint32_t flags)
42086 + gulm_fs_t *fs = lck->fs;
42087 + lock_table_t *lt = &gulm_cm.ltpx;
42088 + int result = -69;
42090 + /* adjust result based on success status. */
42093 + /* set result to current lock state. */
42094 + if (!(lck->flags & LM_FLAG_ANY)) {
42095 + /* simple case, we got what we asked for. */
42096 + result = lck->req_state;
42098 + /* complex case, we got something else, but we said that was ok */
42100 + case lg_lock_state_Shared:
42101 + result = LM_ST_SHARED;
42103 + case lg_lock_state_Deferred:
42104 + result = LM_ST_DEFERRED;
42107 + case lg_lock_state_Exclusive:
42108 + case lg_lock_state_Unlock:
42110 + dump_gulm_lock_t (lck);
42112 + ("fsid=%s: lock state %d is invalid on "
42113 + "ANY flag return\n", fs->fs_name,
42120 + dump_gulm_lock_t (lck);
42121 + log_err_lck (lck,
42122 + "fsid=%s: Anit no lock state %d.\n",
42123 + fs->fs_name, state);
42129 + /* toggle counters.
42130 + * due to ANY flag, new state may not be req_state.
42132 + toggle_lock_counters (lt, lck->cur_state, result);
42134 + /* if no internal unlocks, it is cachable. */
42135 + if (result != LM_ST_UNLOCKED && (flags & lg_lock_flag_Cachable))
42136 + result |= LM_OUT_CACHEABLE;
42138 + /* record and move on
42140 + lck->last_suc_state = result & LM_OUT_ST_MASK;
42142 + case lg_err_Canceled:
42143 + result = LM_OUT_CANCELED | lck->cur_state;
42145 + case lg_err_TryFailed:
42146 + result = lck->cur_state; /* if we didn't get it. */
42164 +my_strdup (char *s)
42168 + len = strlen (s) + 1;
42169 + tmp = kmalloc (len, GFP_KERNEL);
42172 + memcpy (tmp, s, len);
42176 +/* Instead of directly calling the send function below, the functions will
42177 + * create of of these.
42178 + * Which exist only because I cannot stick the lock_t onto two lists
42181 + * this could use some clean up.
42183 +typedef struct send_req_s {
42184 + struct list_head sr_list;
42185 + enum { sr_lock, sr_act, sr_cancel, sr_drop } type;
42186 + gulm_lock_t *who;
42188 + lock_table_t *lt;
42193 + * alloc_send_req -
42197 + * Returns: send_req_t
42200 +alloc_send_req (void)
42203 + tmp = kmalloc (sizeof (send_req_t), GFP_KERNEL);
42204 + GULM_ASSERT (tmp != NULL,); /* so evil.... */
42209 + * send_drop_exp -
42218 +send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name)
42222 + sr = alloc_send_req ();
42223 + INIT_LIST_HEAD (&sr->sr_list);
42224 + sr->type = sr_drop;
42228 + if (name != NULL) {
42229 + sr->name = my_strdup (name);
42234 + spin_lock (<->queue_sender);
42235 + list_add (&sr->sr_list, <->to_be_sent);
42236 + spin_unlock (<->queue_sender);
42238 + wake_up (<->send_wchan);
42243 + * add_lock_to_send_req_queue -
42251 +add_lock_to_send_req_queue (lock_table_t * lt, gulm_lock_t * lck, int type)
42255 + sr = alloc_send_req ();
42256 + INIT_LIST_HEAD (&sr->sr_list);
42262 + if (type != sr_cancel)
42263 + lck->in_to_be_sent = TRUE;
42267 + spin_lock (<->queue_sender);
42268 + list_add (&sr->sr_list, <->to_be_sent);
42269 + spin_unlock (<->queue_sender);
42271 + wake_up (<->send_wchan);
42281 +static __inline__ int
42282 +queue_empty (lock_table_t * lt)
42285 + spin_lock (<->queue_sender);
42286 + ret = list_empty (<->to_be_sent);
42287 + spin_unlock (<->queue_sender);
42292 + * lt_io_sender_thread -
42295 + * Right now, only gfs lock requests should go through this thread.
42296 + * Must look, May not even need this.
42297 + * well, it is nice to get the socket io off of what ever process the user
42298 + * is running that is going through gfs into here. ?is it?
42304 +lt_io_sender_thread (void *data)
42306 + lock_table_t *lt = (lock_table_t *) data;
42307 + struct list_head *tmp;
42308 + send_req_t *sr = NULL;
42311 + daemonize ("gulm_LT_sender");
42312 + lt->sender_task = current;
42313 + complete (<->startup);
42315 + while (lt->running) {
42317 + DECLARE_WAITQUEUE (__wait_chan, current);
42318 + current->state = TASK_INTERRUPTIBLE;
42319 + add_wait_queue (<->send_wchan, &__wait_chan);
42320 + if (queue_empty (lt))
42322 + remove_wait_queue (<->send_wchan, &__wait_chan);
42323 + current->state = TASK_RUNNING;
42325 + if (!lt->running)
42328 + /* check to make sure socket is ok. */
42329 + down (<->sender);
42331 + /* pop next item to be sent
42332 + * (it will get pushed back if there was problems.)
42334 + spin_lock (<->queue_sender);
42335 + if (list_empty (<->to_be_sent)) {
42336 + spin_unlock (<->queue_sender);
42337 + up (<->sender);
42340 + tmp = (<->to_be_sent)->prev;
42342 + spin_unlock (<->queue_sender);
42343 + sr = list_entry (tmp, send_req_t, sr_list);
42346 + if (sr->type == sr_lock) {
42347 + err = send_lock_req (sr->who);
42349 + sr->who->in_to_be_sent = FALSE;
42350 + unmark_and_release_lock (sr->who);
42352 + } else if (sr->type == sr_act) {
42353 + err = send_lock_action (sr->who, sr->who->action);
42355 + sr->who->in_to_be_sent = FALSE;
42356 + unmark_and_release_lock (sr->who);
42358 + } else if (sr->type == sr_cancel) {
42360 + lg_lock_cancel_req (gulm_cm.hookup, sr->who->key,
42361 + sr->who->keylen);
42363 + unmark_and_release_lock (sr->who);
42364 + } else if (sr->type == sr_drop) {
42365 + /* XXX sr->lt isn't really needed.
42366 + * just lt should be fine.
42367 + * look into it someday.
42369 + err = send_drop_exp_inter (sr->fs, sr->lt, sr->name);
42371 + log_err ("Unknown send_req type! %d\n", sr->type);
42373 + up (<->sender);
42375 + /* if no errors, remove from queue. */
42377 + if (sr->type == sr_drop && sr->name != NULL)
42378 + kfree (sr->name);
42382 + /* if errors, re-queue.
42383 + * the send_* funcs already reported the error, so we won't
42386 + spin_lock (<->queue_sender);
42387 + /* reset the pointers. otherwise things get weird. */
42388 + INIT_LIST_HEAD (&sr->sr_list);
42389 + list_add_tail (&sr->sr_list, <->to_be_sent);
42390 + spin_unlock (<->queue_sender);
42392 + current->state = TASK_INTERRUPTIBLE;
42393 + schedule_timeout (3 * HZ);
42395 + /* gotta break shit up.
42396 + * else this loops hard and fast.
42399 + } /* while( lt->running ) */
42401 + complete (<->startup);
42406 + * cancel_pending_sender -
42409 + * want to cancel a lock request that we haven't sent to the server yet.
42411 + * this must skip over unlock requests. (never cancel unlocks)
42416 +cancel_pending_sender (gulm_lock_t * lck)
42418 + lock_table_t *lt = &gulm_cm.ltpx;
42419 + struct list_head *tmp, *nxt;
42421 + int found = FALSE;
42423 + spin_lock (<->queue_sender);
42425 + list_for_each_safe (tmp, nxt, <->to_be_sent) {
42426 + sr = list_entry (tmp, send_req_t, sr_list);
42427 + if (sr->who == lck) { /* good enough? */
42428 + if (lck->req_type == sr_cancel)
42430 + if (lck->req_state == LM_ST_UNLOCKED)
42431 + continue; /*donot cancel unlocks */
42435 + lck->in_to_be_sent = FALSE;
42437 + /* Now we need to tell the waiting lock req that it got canceled.
42438 + * basically, we need to fake a lg_err_Canceled return....
42440 + lck->result = LM_OUT_CANCELED | lck->cur_state;
42441 + lck->actuallypending = FALSE;
42442 + lck->req_type = glck_nothing;
42443 + atomic_dec (<->locks_pending);
42444 +#ifndef USE_SYNC_LOCKING
42445 + send_async_reply (lck);
42447 + complete (&lck->actsleep);
42449 + unmark_and_release_lock (lck);
42454 + spin_unlock (<->queue_sender);
42459 + * gulm_lt_login_reply -
42468 +gulm_lt_login_reply (void *misc, uint32_t error, uint8_t which)
42470 + if (error != 0) {
42471 + gulm_cm.ltpx.running = FALSE;
42472 + log_err ("LTPX: Got a %d from the login request.\n", error);
42474 + log_msg (lgm_Network2, "Logged into local LTPX.\n");
42480 + * gulm_lt_logout_reply -
42487 +gulm_lt_logout_reply (void *misc)
42489 + gulm_cm.ltpx.running = FALSE;
42490 + log_msg (lgm_Network2, "Logged out of local LTPX.\n");
42495 + * gulm_lt_lock_state -
42509 +gulm_lt_lock_state (void *misc, uint8_t * key, uint16_t keylen,
42510 + uint8_t state, uint32_t flags, uint32_t error,
42511 + uint8_t * LVB, uint16_t LVBlen)
42513 + gulm_lock_t *lck;
42515 + if (key[0] == 'J') {
42516 + jid_state_reply (key, keylen, LVB, LVBlen);
42520 + if (!find_and_mark_lock (key, keylen, &lck)) {
42521 + log_err_lk (key, keylen, "Got a lock state reply for a lock "
42522 + "that we don't know of. state:%#x flags:%#x error:%#x\n",
42523 + state, flags, error);
42527 + lck->result = calc_lock_result (lck, state, error, flags);
42529 + if ((lck->result & LM_OUT_ST_MASK) != LM_ST_UNLOCKED &&
42530 + lck->lvb != NULL) {
42531 + memcpy (lck->lvb, LVB, MIN (lck->fs->lvb_size, LVBlen));
42534 + lck->actuallypending = FALSE;
42535 + lck->req_type = glck_nothing;
42536 + atomic_dec (&gulm_cm.ltpx.locks_pending);
42537 +#ifndef USE_SYNC_LOCKING
42538 + send_async_reply (lck);
42540 + complete (&lck->actsleep);
42543 + if (error != 0 && error != lg_err_TryFailed && error != lg_err_Canceled)
42544 + log_msg_lck (lck, "Error: %d:%s (req:%#x rpl:%#x lss:%#x)\n",
42545 + error, gio_Err_to_str (error),
42546 + lck->req_state, state, lck->last_suc_state);
42548 + unmark_and_release_lock (lck);
42553 + * gulm_lt_lock_action -
42564 +gulm_lt_lock_action (void *misc, uint8_t * key, uint16_t keylen,
42565 + uint8_t action, uint32_t error)
42567 + gulm_lock_t *lck;
42569 + if (key[0] == 'J') {
42570 + jid_action_reply (key, keylen);
42574 + if (!find_and_mark_lock (key, keylen, &lck)) {
42575 + log_err_lk (key, keylen, "Got a lock action reply for a lock "
42576 + "that we don't know of. action:%#x error:%#x\n",
42581 + if (action == lg_lock_act_HoldLVB ||
42582 + action == lg_lock_act_UnHoldLVB || action == lg_lock_act_SyncLVB) {
42584 + lck->result = error;
42585 + if (error != lg_err_Ok) {
42586 + log_err ("on action reply act:%d err:%d\n", action,
42589 + lck->req_type = glck_nothing;
42590 + lck->actuallypending = FALSE;
42591 + complete (&lck->actsleep);
42593 + log_err_lck (lck, "Got strange Action %#x\n", action);
42595 + unmark_and_release_lock (lck);
42600 + * gulm_lt_drop_lock_req -
42610 +gulm_lt_drop_lock_req (void *misc, uint8_t * key, uint16_t keylen,
42613 + gulm_lock_t *lck;
42615 + if (key[0] == 'J') {
42616 + jid_header_lock_drop (key, keylen);
42620 + if (!find_and_mark_lock (key, keylen, &lck)) {
42621 + log_err_lk (key, keylen, "Got a drop lcok request for a lock "
42622 + "that we don't know of. state:%#x\n", state);
42626 + do_drop_lock_req (lck->fs, state, key);
42628 + unmark_and_release_lock (lck);
42633 + * gulm_lt_drop_all -
42640 +gulm_lt_drop_all (void *misc)
42642 + passup_droplocks ();
42647 + * gulm_lt_error -
42655 +gulm_lt_error (void *misc, uint32_t err)
42657 + log_err ("LTPX: RANDOM ERROR %d\n", err);
42661 +static lg_lockspace_callbacks_t lock_cb = {
42662 + login_reply:gulm_lt_login_reply,
42663 + logout_reply:gulm_lt_logout_reply,
42664 + lock_state:gulm_lt_lock_state,
42665 + lock_action:gulm_lt_lock_action,
42666 + drop_lock_req:gulm_lt_drop_lock_req,
42667 + drop_all:gulm_lt_drop_all,
42668 + error:gulm_lt_error
42672 + * lt_io_recving_thread -
42679 +lt_io_recving_thread (void *data)
42681 + lock_table_t *lt = &gulm_cm.ltpx;
42684 + daemonize ("gulm_LT_recver");
42685 + lt->recver_task = current;
42686 + complete (<->startup);
42688 + while (lt->running) {
42689 + err = lg_lock_handle_messages (gulm_cm.hookup, &lock_cb, NULL);
42691 + log_err ("gulm_LT_recver err %d\n", err);
42692 + lt->running = FALSE; /* should stop the sender thread. */
42693 + wake_up (<->send_wchan);
42696 + } /* while( lt->running ) */
42698 + complete (<->startup);
42703 + * lt_logout - log out of all of the lock tables
42708 + lock_table_t *lt = &gulm_cm.ltpx;
42711 + if (lt->running) {
42712 + lt->running = FALSE;
42714 + /* stop sender thread */
42715 + wake_up (<->send_wchan);
42716 + wait_for_completion (<->startup);
42718 + /* stop recver thread */
42719 + down (<->sender);
42720 + err = lg_lock_logout (gulm_cm.hookup);
42721 + up (<->sender);
42723 + /* wait for thread to finish */
42724 + wait_for_completion (<->startup);
42730 + * lt_login - login to lock tables.
42738 + lock_table_t *lt = &gulm_cm.ltpx;
42742 + ("Trying to log into LTPX when it appears to be logged in!\n");
42744 + err = lg_lock_login (gulm_cm.hookup, "GFS ");
42746 + log_err ("Failed to send login request. %d\n", err);
42750 + /* start recver thread. */
42751 + lt->running = TRUE;
42752 + err = kernel_thread (lt_io_recving_thread, lt, 0);
42754 + log_err ("Failed to start gulm_lt_IOd. (%d)\n", err);
42757 + wait_for_completion (<->startup);
42759 + /* start sender thread */
42760 + err = kernel_thread (lt_io_sender_thread, lt, 0);
42762 + log_err ("Failed to start gulm_LT_sender. (%d)\n", err);
42765 + wait_for_completion (<->startup);
42770 + log_msg (lgm_Always, "Exiting lt_login. err:%d\n", err);
42774 +/****************************************************************************/
42777 + * internal_gulm_get_lock -
42784 + * Returns: 0 on success, -EXXX on failure
42787 +internal_gulm_get_lock (gulm_fs_t * fs, uint8_t * key, uint8_t keylen,
42788 + gulm_lock_t ** lockp)
42790 + int found = FALSE;
42792 + gulm_lock_t *lck = NULL;
42794 + found = find_and_mark_lock (key, keylen, &lck);
42796 + /* malloc space */
42798 + GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,);
42800 + lck = kmalloc (sizeof (gulm_lock_t), GFP_KERNEL);
42801 + if (lck == NULL) {
42803 + ("fsid=%s: Out of memory for lock struct in get_lock!\n",
42807 + memset (lck, 0, sizeof (gulm_lock_t));
42808 + INIT_LIST_HEAD (&lck->gl_list);
42809 + atomic_set (&lck->count, 1);
42810 + lck->magic_one = 0xAAAAAAAA;
42812 + memcpy (lck->key, key, keylen);
42813 + lck->keylen = keylen;
42815 + init_completion (&lck->actsleep);
42816 + lck->actuallypending = FALSE;
42817 + lck->in_to_be_sent = FALSE;
42819 + lck->action = -1;
42820 + lck->req_type = glck_nothing;
42821 + lck->last_suc_state = LM_ST_UNLOCKED;
42823 + gulm_cm.ltpx.locks_total++;
42824 + gulm_cm.ltpx.locks_unl++;
42826 + bkt = hash_lock_key (key, keylen);
42827 + bkt %= gulm_cm.ltpx.hashbuckets;
42829 + spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
42830 + list_add (&lck->gl_list, &gulm_cm.ltpx.lkhsh[bkt]);
42831 + spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
42840 + * gulm_get_lock -
42845 + * Returns: 0 on success, -EXXX on failure
42848 +gulm_get_lock (lm_lockspace_t * lockspace, struct lm_lockname *name,
42849 + lm_lock_t ** lockp)
42852 + gulm_fs_t *fs = (gulm_fs_t *) lockspace;
42853 + uint8_t key[GIO_KEY_SIZE];
42855 + /* i could add a per fs lock to force only one gulm_get_lock at a time.
42857 + down (&fs->get_lock);
42859 + memset (key, 0, GIO_KEY_SIZE);
42860 + /* pack lockname */
42861 + key[0] = 'G'; /* G: fs lock, F: First mounter, J: JID mapping lock */
42862 + key[1] = name->ln_type & 0xff;
42863 + key[2] = (name->ln_number >> 56) & 0xff;
42864 + key[3] = (name->ln_number >> 48) & 0xff;
42865 + key[4] = (name->ln_number >> 40) & 0xff;
42866 + key[5] = (name->ln_number >> 32) & 0xff;
42867 + key[6] = (name->ln_number >> 24) & 0xff;
42868 + key[7] = (name->ln_number >> 16) & 0xff;
42869 + key[8] = (name->ln_number >> 8) & 0xff;
42870 + key[9] = (name->ln_number >> 0) & 0xff;
42872 + /* Now stick the fsname into the remaining space. */
42873 + len = strlen (fs->fs_name);
42874 + strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 16);
42876 + len = MIN (len, GIO_KEY_SIZE - 16);
42877 + len += 11; /* 10 for the encoded buf, 1 for the '\0' after the fs name */
42878 + err = internal_gulm_get_lock (fs, key, len, (gulm_lock_t **) lockp);
42880 + up (&fs->get_lock);
42886 + * gulm_put_lock -
42893 +gulm_put_lock (lm_lock_t * lock)
42895 + gulm_lock_t *lck = (gulm_lock_t *) lock;
42896 + lock_table_t *lt = &gulm_cm.ltpx;
42897 + gulm_fs_t *fs = lck->fs;
42899 + down (&fs->get_lock);
42901 + GULM_ASSERT (lt != NULL,);
42903 + if (lck->last_suc_state != LM_ST_UNLOCKED) {
42904 + log_err_lck (lck,
42905 + "fsid=%s: gulm_put_lock called on a lock that is not unlocked!"
42906 + " Current state:%#x\n", lck->fs->fs_name,
42907 + lck->last_suc_state);
42908 + /* I'm still not sure about this one. We should never see it, so I
42909 + * don't think it is that big of a deal, but i duno.
42911 + * Maybe should just make it an assertion.
42913 + * with the mark/unmark code, is it even a concern?
42917 + unmark_and_release_lock (lck);
42918 + /* lck = NULL; */
42920 + up (&fs->get_lock);
42925 +valid_trasition (unsigned int cur, unsigned int req)
42927 + int lock_state_changes[16] = { /* unl exl def shr */
42928 + FALSE, TRUE, TRUE, TRUE, /* unl */
42929 + TRUE, FALSE, TRUE, TRUE, /* exl */
42930 + TRUE, TRUE, FALSE, TRUE, /* def */
42931 + TRUE, TRUE, TRUE, FALSE /* shr */
42933 + GULM_ASSERT (cur < 4
42934 + && req < 4, log_err ("cur:%d req:%d\n", cur, req););
42936 + return (lock_state_changes[4 * cur + req]);
42940 + * verify_gulm_lock_t -
42943 + * wonder if I should add some other checks.
42948 +verify_gulm_lock_t (gulm_lock_t * lck)
42950 + if (lck == NULL) {
42951 + log_err ("Lock pointer was NULL!\n");
42954 + if (lck->fs == NULL) {
42955 + log_err ("This lock has no filesystem!!!\n");
42972 +gulm_lock (lm_lock_t * lock, unsigned int cur_state,
42973 + unsigned int req_state, unsigned int flags)
42975 + gulm_lock_t *lck = NULL;
42977 + lock_table_t *lt;
42979 + /* verify vars. */
42980 + lck = (gulm_lock_t *) lock;
42981 + if (verify_gulm_lock_t (lck) != 0) {
42984 + lt = &gulm_cm.ltpx;
42987 + GULM_ASSERT (valid_trasition (cur_state, req_state),
42988 + log_err_lck (lck, "want %d with %s thinks:%d\n", req_state,
42989 + (LM_FLAG_TRY & flags) ? "try" : (LM_FLAG_NOEXP
42991 + "noexp" : "no flags", cur_state);
42994 + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
42996 + /* save the details of this request. */
42997 + lck->req_type = glck_state;
42999 + lck->cur_state = cur_state;
43000 + lck->req_state = req_state;
43001 + lck->flags = flags;
43003 + /* moving these here fixes a race on the s390 that ben found.
43004 + * basically, the request was sent to the server, the server receives
43005 + * it, the server processes, the server sends a reply, the client
43006 + * receives the reply, and the client tries to processe the reply before
43007 + * this thread could mark it as actuallypending.
43009 + lck->actuallypending = TRUE;
43010 + atomic_inc (<->locks_pending);
43011 + add_lock_to_send_req_queue (lt, lck, sr_lock);
43014 +#ifdef USE_SYNC_LOCKING
43015 + wait_for_completion (&lck->actsleep);
43018 +#ifdef USE_SYNC_LOCKING
43019 + return lck->result;
43021 + return LM_OUT_ASYNC;
43034 +gulm_unlock (lm_lock_t * lock, unsigned int cur_state)
43037 + e = gulm_lock (lock, cur_state, LM_ST_UNLOCKED, 0);
43047 +gulm_cancel (lm_lock_t * lock)
43049 + gulm_lock_t *lck;
43051 + lock_table_t *lt;
43053 + /* verify vars. */
43054 + lck = (gulm_lock_t *) lock;
43055 + if (verify_gulm_lock_t (lck) != 0) {
43058 + lt = &gulm_cm.ltpx;
43061 + if (lck->actuallypending) {
43062 + if (lck->in_to_be_sent) {
43063 + /* this should pull the req out of the send queue and have it
43064 + * return with a cancel code without going to the server.
43066 + cancel_pending_sender (lck);
43068 + add_lock_to_send_req_queue (lt, lck, sr_cancel);
43071 + log_msg_lck (lck, "Cancel called with no pending request.\n");
43077 + * gulm_hold_lvb -
43082 + * Returns: 0 on success, -EXXX on failure
43085 +gulm_hold_lvb (lm_lock_t * lock, char **lvbp)
43087 + gulm_lock_t *lck;
43089 + lock_table_t *lt;
43092 + /* verify vars. */
43093 + lck = (gulm_lock_t *) lock;
43094 + if (verify_gulm_lock_t (lck) != 0) {
43097 + lt = &gulm_cm.ltpx;
43100 + /* what where these for? */
43101 + GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,
43102 + log_msg_lck (lck, "Bad gulm_lock magic.\n"););
43103 + GULM_ASSERT (lt->magic_one == 0xAAAAAAAA,
43104 + log_msg_lck (lck, "Bad lock_table magic.\n"););
43106 + lvb_log_msg_lk (lck->key, "Entering gulm_hold_lvb\n");
43108 + GULM_ASSERT (lck->lvb == NULL,
43109 + log_msg_lck (lck,
43110 + "fsid=%s: Lvb data wasn't null! must be held "
43111 + "already.\n", fs->fs_name);
43114 + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43116 + lck->lvb = kmalloc (fs->lvb_size, GFP_KERNEL);
43117 + if (lck->lvb == NULL) {
43121 + memset (lck->lvb, 0, fs->lvb_size);
43123 + lck->req_type = glck_action;
43124 + lck->action = lg_lock_act_HoldLVB;
43126 + lck->actuallypending = TRUE;
43127 + add_lock_to_send_req_queue (lt, lck, sr_act);
43129 + wait_for_completion (&lck->actsleep);
43131 + if (lck->result != lg_err_Ok) {
43132 + log_err ("fsid=%s: Got error %d on hold lvb request.\n",
43133 + fs->fs_name, lck->result);
43134 + kfree (lck->lvb);
43139 + lt->locks_lvbs++;
43141 + *lvbp = lck->lvb;
43143 + lvb_log_msg_lk (lck->key, "fsid=%s: Exiting gulm_hold_lvb\n",
43148 + log_msg (lgm_Always,
43149 + "fsid=%s: Exiting gulm_hold_lvb with errors (%d)\n",
43150 + fs->fs_name, err);
43155 + * gulm_unhold_lvb -
43163 +gulm_unhold_lvb (lm_lock_t * lock, char *lvb)
43165 + gulm_lock_t *lck = NULL;
43167 + lock_table_t *lt;
43169 + /* verify vars. */
43170 + lck = (gulm_lock_t *) lock;
43171 + if (verify_gulm_lock_t (lck) != 0) {
43174 + lt = &gulm_cm.ltpx;
43177 + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43179 + if (lck->lvb != lvb) {
43180 + log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
43181 + fs->fs_name, lck->lvb, lvb);
43185 + lvb_log_msg_lk (lck->key, "Entering gulm_unhold_lvb\n");
43187 + lck->req_type = glck_action;
43188 + lck->action = lg_lock_act_UnHoldLVB;
43190 + lck->actuallypending = TRUE;
43191 + add_lock_to_send_req_queue (lt, lck, sr_act);
43193 + wait_for_completion (&lck->actsleep);
43195 + /* XXX ummm, is it sane to not free the memory if the command fails?
43196 + * gfs will still think that the lvb was dropped sucessfully....
43197 + * (it assumes it is always sucessful)
43198 + * Maybe I should retry the drop request then?
43200 + if (lck->result != lg_err_Ok) {
43201 + log_err ("fsid=%s: Got error %d on unhold LVB request.\n",
43202 + lck->fs->fs_name, lck->result);
43204 + if (lck->lvb != NULL)
43205 + kfree (lck->lvb);
43207 + lt->locks_lvbs--;
43210 + lvb_log_msg ("Exiting gulm_unhold_lvb\n");
43214 + * gulm_sync_lvb -
43218 + * umm, is this even used anymore? yes.
43223 +gulm_sync_lvb (lm_lock_t * lock, char *lvb)
43225 + gulm_lock_t *lck = NULL;
43227 + lock_table_t *lt;
43229 + /* verify vars. */
43230 + lck = (gulm_lock_t *) lock;
43231 + if (verify_gulm_lock_t (lck) != 0) {
43234 + lt = &gulm_cm.ltpx;
43237 + GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
43239 + /* this check is also in the server, so it isn't really needed here. */
43240 + if (lck->last_suc_state != LM_ST_EXCLUSIVE) {
43241 + log_err ("sync_lvb: You must hold the lock Exclusive first.\n");
43242 + goto exit; /*cannot do anything */
43244 + if (lck->lvb == NULL) {
43245 + log_err ("sync_lvb: You forgot to call hold lvb first.\n");
43248 + if (lck->lvb != lvb) {
43249 + log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
43250 + fs->fs_name, lck->lvb, lvb);
43254 + lvb_log_msg_lk (lck->key, "Entering gulm_sync_lvb\n");
43256 + lck->req_type = glck_action;
43257 + lck->action = lg_lock_act_SyncLVB;
43259 + lck->actuallypending = TRUE;
43260 + add_lock_to_send_req_queue (lt, lck, sr_act);
43262 + wait_for_completion (&lck->actsleep);
43264 + /* XXX? retry if I get an error? */
43265 + if (lck->result != lg_err_Ok) {
43266 + log_err_lck (lck,
43267 + "fsid=%s: Got error %d:%s on Sync LVB request.\n",
43268 + fs->fs_name, lck->result,
43269 + gio_Err_to_str (lck->result));
43272 + lvb_log_msg ("Exiting gulm_sync_lvb\n");
43275 +/*****************************************************************************/
43277 +gulm_plock_get (lm_lockspace_t * lockspace,
43278 + struct lm_lockname *name, unsigned long owner,
43279 + uint64_t * start, uint64_t * end, int *exclusive,
43280 + unsigned long *rowner)
43286 +gulm_plock (lm_lockspace_t * lockspace,
43287 + struct lm_lockname *name, unsigned long owner,
43288 + int wait, int exclusive, uint64_t start, uint64_t end)
43294 +gulm_punlock (lm_lockspace_t * lockspace,
43295 + struct lm_lockname *name, unsigned long owner,
43296 + uint64_t start, uint64_t end)
43301 +/****************************************************************************/
43302 +/****************************************************************************/
43303 +/****************************************************************************/
43304 +/* should move the firstmounter lock stuff into its own file perhaps? */
43306 + * get_special_lock -
43307 + * @fs: <> filesystem we're getting special lock for
43309 + * Returns: gulm_lock_t
43311 +STATIC gulm_lock_t *
43312 +get_special_lock (gulm_fs_t * fs)
43315 + gulm_lock_t *lck = NULL;
43316 + uint8_t key[GIO_KEY_SIZE];
43318 + /* pack lockname */
43319 + memset (key, 0, GIO_KEY_SIZE);
43320 + /* The F at the beginning doesn't mash with the G that prefixes every fs
43323 + memcpy (key, "FirstMount", 10);
43324 + len = strlen (fs->fs_name);
43325 + strncpy (&key[10], fs->fs_name, GIO_KEY_SIZE - 21);
43326 + len = MIN (len, GIO_KEY_SIZE - 21);
43329 + err = internal_gulm_get_lock (fs, key, len, &lck);
43331 + /* return pointer */
43336 + * do_lock_time_out -
43339 + * after timeout, set cancel request on the handler queue. (since we cannot
43340 + * call it from within the timer code.
43344 +do_lock_time_out (unsigned long d)
43346 + gulm_lock_t *lck = (gulm_lock_t *) d;
43347 + qu_function_call (&lck->fs->cq, gulm_cancel, lck);
43351 + * get_mount_lock -
43355 + * Get the Firstmount lock.
43356 + * We try to grab it Exl. IF we get that, then we are the first client
43357 + * mounting this fs. Otherwise we grab it shared to show that there are
43358 + * clients using this fs.
43363 +get_mount_lock (gulm_fs_t * fs, int *first)
43366 + struct timer_list locktimeout;
43367 + gulm_lock_t *lck = NULL;
43369 + * first we need to get the lock into the hash.
43370 + * then we can try to get it Exl with try and noexp.
43371 + * if the try fails, grab it shared.
43374 + lck = get_special_lock (fs); /* there is only a mount lock. */
43375 + if (lck == NULL) {
43380 + fs->mountlock = lck;
43382 + *first = FALSE; /* assume we're not first */
43384 + err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_EXCLUSIVE,
43385 + LM_FLAG_TRY | LM_FLAG_NOEXP);
43386 +#ifndef USE_SYNC_LOCKING
43387 + wait_for_completion (&fs->sleep);
43390 + if ((lck->result & LM_OUT_ST_MASK) == LM_ST_EXCLUSIVE) {
43391 + /* we got the lock, we're the first mounter. */
43393 + log_msg (lgm_locking, "fsid=%s: Got mount lock Exclusive.\n",
43396 + } else if ((lck->result & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
43397 + log_msg (lgm_locking,
43398 + "fsid=%s: Didn't get mount lock Exl, someone else "
43399 + "was first, trying for shared.\n", fs->fs_name);
43401 + /* the try failed, pick it up shared. */
43402 + /* There was a case (bug #220) where we could hang here.
43404 + * To handle this, we put up a timer for a couple of
43405 + * minutes. That if it trips, it cancels our shared
43406 + * request. Which we then see, so we go back and try the
43407 + * EXL again. If the Firstmounter is fine and is just
43408 + * taking a damn long time to do its work, this just ends
43409 + * back here, no worse for the wear.
43411 + * Another way to do this, is to wait for a killed message
43412 + * for the master. When we get that, && we're pending
43413 + * shared here, send the gulm_canel for the mounter lock.
43414 + * (too bad we are not in the fs list yet at this point.
43415 + * (well, maybe that *isn't* a bad thing))
43417 + init_timer (&locktimeout);
43418 + locktimeout.function = do_lock_time_out;
43419 + locktimeout.data = (unsigned long) lck;
43420 + mod_timer (&locktimeout, jiffies + (120 * HZ));
43421 + err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_SHARED, 0);
43422 +#ifndef USE_SYNC_LOCKING
43423 + wait_for_completion (&fs->sleep);
43425 + del_timer (&locktimeout);
43427 + if ((lck->result & LM_OUT_ST_MASK) == LM_ST_SHARED) {
43428 + /* kewl we got it. */
43429 + log_msg (lgm_locking,
43430 + "fsid=%s: Got mount lock shared.\n",
43435 + log_msg (lgm_locking,
43436 + "fsid=%s: Shared req timed out, trying Exl again.\n",
43438 + goto try_it_again;
43441 + log_err ("Exit get_mount_lock err=%d\n", err);
43446 + * downgrade_mount_lock -
43449 + * drop the Firstmount lock down to shared. This lets other mount.
43454 +downgrade_mount_lock (gulm_fs_t * fs)
43457 + gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
43458 + /* we were first, so we have it exl.
43459 + * shift it to shared so others may mount.
43461 + err = gulm_lock (lck, LM_ST_EXCLUSIVE, LM_ST_SHARED, LM_FLAG_NOEXP);
43462 +#ifndef USE_SYNC_LOCKING
43463 + wait_for_completion (&fs->sleep);
43466 + if ((lck->result & LM_OUT_ST_MASK) != LM_ST_SHARED) {
43468 + ("fsid=%s: Couldn't downgrade mount lock to shared!!!!!\n",
43475 + * drop_mount_lock - drop our hold on the firstmount lock.
43476 + * @fs: <> the filesystem pointer.
43481 +drop_mount_lock (gulm_fs_t * fs)
43484 + gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
43486 + if (fs->mountlock == NULL) {
43487 + log_err ("fsid=%s: There's no Mount lock!!!!!\n", fs->fs_name);
43490 + err = gulm_unlock (lck, LM_ST_SHARED);
43491 +#ifndef USE_SYNC_LOCKING
43492 + wait_for_completion (&fs->sleep);
43495 + if (lck->result != LM_ST_UNLOCKED)
43496 + log_err ("fsid=%s: Couldn't unlock mount lock!!!!!!\n",
43498 + gulm_put_lock (fs->mountlock);
43499 + fs->mountlock = NULL;
43503 +/*****************************************************************************/
43504 +struct lm_lockops gulm_ops = {
43505 + lm_proto_name:PROTO_NAME,
43506 + lm_mount:gulm_mount,
43507 + lm_others_may_mount:gulm_others_may_mount,
43508 + lm_unmount:gulm_unmount,
43509 + lm_get_lock:gulm_get_lock,
43510 + lm_put_lock:gulm_put_lock,
43511 + lm_lock:gulm_lock,
43512 + lm_unlock:gulm_unlock,
43513 + lm_cancel:gulm_cancel,
43514 + lm_hold_lvb:gulm_hold_lvb,
43515 + lm_unhold_lvb:gulm_unhold_lvb,
43516 + lm_sync_lvb:gulm_sync_lvb,
43517 + lm_plock_get:gulm_plock_get,
43518 + lm_plock:gulm_plock,
43519 + lm_punlock:gulm_punlock,
43520 + lm_recovery_done:gulm_recovery_done,
43521 + lm_owner:THIS_MODULE,
43523 +/* vim: set ai cin noet sw=8 ts=8 : */
43524 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h
43525 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h 1969-12-31 18:00:00.000000000 -0600
43526 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h 2004-06-16 12:03:21.957894998 -0500
43528 +/******************************************************************************
43529 +*******************************************************************************
43531 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
43532 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
43534 +** This copyrighted material is made available to anyone wishing to use,
43535 +** modify, copy, or redistribute it subject to the terms and conditions
43536 +** of the GNU General Public License v.2.
43538 +*******************************************************************************
43539 +******************************************************************************/
43541 +#ifndef __gulm_prints_h__
43542 +#define __gulm_prints_h__
43543 +#include "gulm_log_msg_bits.h"
43545 +#define PROTO_NAME "lock_gulm"
43547 +#ifdef GULM_ASSERT
43548 +#undef GULM_ASSERT
43550 +#define GULM_ASSERT(x, do) \
43554 + printk("\n"PROTO_NAME": Assertion failed on line %d of file %s\n" \
43555 + PROTO_NAME": assertion: \"%s\"\n", \
43556 + __LINE__, __FILE__, #x ); \
43558 + panic("\n"PROTO_NAME": Record message above and reboot.\n"); \
43562 +#define log_msg(v, fmt, args...) if(((v)&gulm_cm.verbosity)==(v)||(v)==lgm_Always) {\
43563 + printk(PROTO_NAME ": " fmt, ## args); \
43565 +#define log_err(fmt, args...) {\
43566 + printk(KERN_ERR PROTO_NAME ": ERROR " fmt, ## args); \
43569 +#define log_nop(fmt, args...)
43570 +#define TICK printk("TICK==>" PROTO_NAME ": [%s:%d] pid:%ld\n",__FILE__,__LINE__,osi_pid())
43572 +#endif /*__gulm_prints_h__*/
43573 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c
43574 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.c 1969-12-31 18:00:00.000000000 -0600
43575 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.c 2004-06-16 12:03:21.957894998 -0500
43577 +/******************************************************************************
43578 +*******************************************************************************
43580 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
43581 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
43583 +** This copyrighted material is made available to anyone wishing to use,
43584 +** modify, copy, or redistribute it subject to the terms and conditions
43585 +** of the GNU General Public License v.2.
43587 +*******************************************************************************
43588 +******************************************************************************/
43591 +#include <linux/kernel.h>
43592 +#include <linux/proc_fs.h>
43595 +extern gulm_cm_t gulm_cm;
43597 +struct proc_dir_entry *gulm_proc_dir;
43598 +struct proc_dir_entry *gulm_fs_proc_dir;
43600 +/* the read operating function. */
43602 +gulm_fs_proc_read (char *buf, char **start, off_t off, int count, int *eof,
43605 + gulm_fs_t *fs = (gulm_fs_t *) data;
43606 + count = 0; /* ignore how much it wants */
43608 + count += sprintf (buf + count, "Filesystem: %s\nJID: %d\n"
43609 + "handler_queue_cur: %d\n"
43610 + "handler_queue_max: %d\n",
43611 + fs->fs_name, fs->fsJID,
43612 + fs->cq.task_count, fs->cq.task_max);
43615 + if (off >= count)
43617 + *start = buf + off;
43618 + return (count - off);
43621 +/* read the stuff for all */
43623 +gulm_core_proc_read (char *buf, char **start, off_t off, int count,
43624 + int *eof, void *data)
43626 + count = 0; /* ignore how much it wants */
43628 + count = sprintf (buf,
43629 + "cluster id: %s\n"
43630 + "my name: %s\n", gulm_cm.clusterID, gulm_cm.myName);
43633 + if (off >= count)
43635 + *start = buf + off;
43636 + return (count - off);
43640 +gulm_lt_proc_read (char *buf, char **start, off_t off, int count,
43641 + int *eof, void *data)
43643 + lock_table_t *lt = (lock_table_t *) data;
43644 + count = 0; /* ignore how much it wants */
43646 + count += sprintf (buf + count, "\n"
43661 + atomic_read (<->locks_pending),
43662 + lt->locks_lvbs, lt->lops);
43665 + if (off >= count)
43667 + *start = buf + off;
43668 + return (count - off);
43671 +/* add entry to our proc folder
43672 + * call this on mount.
43675 +add_to_proc (gulm_fs_t * fs)
43677 + if (!(create_proc_read_entry (fs->fs_name, S_IFREG | S_IRUGO,
43678 + gulm_fs_proc_dir, gulm_fs_proc_read,
43680 + log_err ("couldn't register proc entry for %s\n", fs->fs_name);
43687 + * this on umount.
43690 +remove_from_proc (gulm_fs_t * fs)
43692 + remove_proc_entry (fs->fs_name, gulm_fs_proc_dir);
43695 + /* create our own root dir.
43699 +init_proc_dir (void)
43701 + if ((gulm_proc_dir = proc_mkdir ("gulm", &proc_root)) == NULL) {
43702 + log_err ("cannot create the gulm directory in /proc\n");
43705 + if (!(create_proc_read_entry ("core", S_IFREG | S_IRUGO, gulm_proc_dir,
43706 + gulm_core_proc_read, NULL))) {
43707 + log_err ("couldn't register proc entry for core\n");
43708 + remove_proc_entry ("gulm", &proc_root);
43711 + if ((gulm_fs_proc_dir =
43712 + proc_mkdir ("filesystems", gulm_proc_dir)) == NULL) {
43714 + ("cannot create the filesystems directory in /proc/gulm\n");
43715 + remove_proc_entry ("core", gulm_proc_dir);
43716 + remove_proc_entry ("gulm", &proc_root);
43719 + if (!(create_proc_read_entry ("lockspace", S_IFREG | S_IRUGO,
43720 + gulm_proc_dir, gulm_lt_proc_read,
43721 + (void *) &gulm_cm.ltpx))) {
43722 + remove_proc_entry ("filesystems", gulm_proc_dir);
43723 + remove_proc_entry ("core", gulm_proc_dir);
43724 + remove_proc_entry ("gulm", &proc_root);
43735 +remove_proc_dir (void)
43737 + remove_proc_entry ("lockspace", gulm_proc_dir);
43738 + remove_proc_entry ("filesystems", gulm_proc_dir);
43739 + remove_proc_entry ("core", gulm_proc_dir);
43740 + remove_proc_entry ("gulm", &proc_root);
43742 diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h
43743 --- linux-orig/fs/gfs_locking/lock_gulm/gulm_procinfo.h 1969-12-31 18:00:00.000000000 -0600
43744 +++ linux-patched/fs/gfs_locking/lock_gulm/gulm_procinfo.h 2004-06-16 12:03:21.958894765 -0500
43746 +/******************************************************************************
43747 +*******************************************************************************
43749 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
43750 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
43752 +** This copyrighted material is made available to anyone wishing to use,
43753 +** modify, copy, or redistribute it subject to the terms and conditions
43754 +** of the GNU General Public License v.2.
43756 +*******************************************************************************
43757 +******************************************************************************/
43759 +#ifndef __procinfo_h__
43760 +#define __procinfo_h__
43761 +int add_to_proc (gulm_fs_t * fs);
43762 +void remove_from_proc (gulm_fs_t * fs);
43763 +void remove_locktables_from_proc (void);
43764 +void add_locktables_to_proc (void);
43765 +int init_proc_dir (void);
43766 +void remove_proc_dir (void);
43767 +#endif /*__procinfo_h__*/
43768 diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.c linux-patched/fs/gfs_locking/lock_gulm/handler.c
43769 --- linux-orig/fs/gfs_locking/lock_gulm/handler.c 1969-12-31 18:00:00.000000000 -0600
43770 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.c 2004-06-16 12:03:21.958894765 -0500
43772 +/******************************************************************************
43773 +*******************************************************************************
43775 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
43776 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
43778 +** This copyrighted material is made available to anyone wishing to use,
43779 +** modify, copy, or redistribute it subject to the terms and conditions
43780 +** of the GNU General Public License v.2.
43782 +*******************************************************************************
43783 +******************************************************************************/
43787 +#include <linux/kernel.h>
43788 +#include <linux/slab.h>
43789 +#include <linux/fs.h>
43790 +#include <linux/smp_lock.h>
43791 +#define __KERNEL_SYSCALLS__
43792 +#include <linux/unistd.h>
43794 +#include "handler.h"
43796 +/* things about myself
43797 + * mostly just for verbosity here.
43799 +extern gulm_cm_t gulm_cm;
43801 +/* the task struct */
43802 +typedef struct runtask_s {
43803 + struct list_head rt_list;
43806 + lm_callback_t cb;
43807 + lm_fsdata_t *fsdata;
43810 + unsigned int lmtype;
43815 +#define LM_CB_GULM_FN 169
43816 +#if LM_CB_GULM_FN == LM_CB_NEED_E || \
43817 + LM_CB_GULM_FN == LM_CB_NEED_D || \
43818 + LM_CB_GULM_FN == LM_CB_NEED_S || \
43819 + LM_CB_GULM_FN == LM_CB_NEED_RECOVERY || \
43820 + LM_CB_GULM_FN == LM_CB_DROPLOCKS || \
43821 + LM_CB_GULM_FN == LM_CB_ASYNC
43822 +#error "LM_CB_GULM_FN collision with other LM_CB_*"
43825 +static __inline__ int
43826 +queue_empty (callback_qu_t * cq)
43829 + spin_lock (&cq->list_lock);
43830 + ret = list_empty (&cq->run_tasks);
43831 + spin_unlock (&cq->list_lock);
43845 + callback_qu_t *cq = (callback_qu_t *) d;
43847 + struct list_head *tmp;
43848 + struct lm_lockname lockname;
43849 + struct lm_async_cb acb;
43851 + daemonize ("gulm_Cb_Handler");
43852 + atomic_inc (&cq->num_threads);
43853 + complete (&cq->startup);
43855 + while (cq->running) {
43857 + DECLARE_WAITQUEUE (__wait_chan, current);
43858 + current->state = TASK_INTERRUPTIBLE;
43859 + add_wait_queue (&cq->waiter, &__wait_chan);
43860 + if (queue_empty (cq))
43862 + remove_wait_queue (&cq->waiter, &__wait_chan);
43863 + current->state = TASK_RUNNING;
43866 + if (!cq->running)
43868 + /* remove item from list */
43869 + spin_lock (&cq->list_lock);
43870 + if (list_empty (&cq->run_tasks)) {
43871 + spin_unlock (&cq->list_lock);
43872 + continue; /* nothing here. move on */
43874 + /* take items off the end of the list, since we add them to the
43877 + tmp = (&cq->run_tasks)->prev;
43879 + cq->task_count--;
43880 + spin_unlock (&cq->list_lock);
43882 + rt = list_entry (tmp, runtask_t, rt_list);
43884 + if (rt->type == LM_CB_ASYNC) {
43885 + acb.lc_name.ln_number = rt->lmnum;
43886 + acb.lc_name.ln_type = rt->lmtype;
43887 + acb.lc_ret = rt->result;
43888 + rt->cb (rt->fsdata, rt->type, &acb);
43889 + } else if (rt->type == LM_CB_GULM_FN) {
43890 + rt->fn (rt->fsdata);
43892 + lockname.ln_number = rt->lmnum;
43893 + lockname.ln_type = rt->lmtype;
43894 + rt->cb (rt->fsdata, rt->type, &lockname);
43899 + } /*while(running) */
43901 + atomic_dec (&cq->num_threads);
43902 + complete (&cq->startup);
43907 + * display_handler_queue -
43910 + * remember, items are added to the head, and removed from the tail.
43911 + * So the last item listed, is the next item to be handled.
43915 +display_handler_queue (callback_qu_t * cq)
43917 + struct list_head *lltmp;
43920 + log_msg (lgm_Always, "Dumping Handler queue with %d items, max %d\n",
43921 + cq->task_count, cq->task_max);
43922 + spin_lock (&cq->list_lock);
43923 + list_for_each (lltmp, &cq->run_tasks) {
43924 + rt = list_entry (lltmp, runtask_t, rt_list);
43925 + if (rt->type == LM_CB_ASYNC) {
43926 + log_msg (lgm_Always,
43927 + "%4d ASYNC (%" PRIu64 ", %u) result:%#x\n",
43928 + i, rt->lmnum, rt->lmtype, rt->result);
43929 + } else if (rt->type == LM_CB_GULM_FN) {
43930 + log_msg (lgm_Always, "%4d GULM FN func:%p data:%p\n",
43931 + i, rt->fn, rt->fsdata);
43932 + } else { /* callback. */
43933 + log_msg (lgm_Always,
43934 + "%4d CALLBACK req:%u (%" PRIu64 ", %u)\n", i,
43935 + rt->type, rt->lmnum, rt->lmtype);
43939 + spin_unlock (&cq->list_lock);
43943 + * alloc_runtask -
43944 + * Returns: runtask_t
43947 +alloc_runtask (void)
43950 + rt = kmalloc (sizeof (runtask_t), GFP_KERNEL);
43955 + * qu_function_call -
43960 + * Generic function execing on the handler thread. Mostly so I can add
43961 + * single things quick without having to build all the details into the
43962 + * handler queues.
43967 +qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data)
43970 + rt = alloc_runtask ();
43975 + rt->fsdata = data;
43976 + rt->type = LM_CB_GULM_FN;
43980 + INIT_LIST_HEAD (&rt->rt_list);
43981 + spin_lock (&cq->list_lock);
43982 + list_add (&rt->rt_list, &cq->run_tasks);
43983 + cq->task_count++;
43984 + if (cq->task_count > cq->task_max)
43985 + cq->task_max = cq->task_count;
43986 + spin_unlock (&cq->list_lock);
43987 + wake_up (&cq->waiter);
44003 +qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44004 + struct lm_lockname *lockname, int result)
44007 + rt = alloc_runtask ();
44011 + rt->fsdata = fsdata;
44012 + rt->type = LM_CB_ASYNC;
44013 + rt->lmtype = lockname->ln_type;
44014 + rt->lmnum = lockname->ln_number;
44015 + rt->result = result;
44016 + INIT_LIST_HEAD (&rt->rt_list);
44017 + spin_lock (&cq->list_lock);
44018 + list_add (&rt->rt_list, &cq->run_tasks);
44019 + cq->task_count++;
44020 + if (cq->task_count > cq->task_max)
44021 + cq->task_max = cq->task_count;
44022 + spin_unlock (&cq->list_lock);
44023 + wake_up (&cq->waiter);
44030 + * Returns: <0:Error; =0:Ok
44033 +qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44034 + int type, uint8_t lmtype, uint64_t lmnum)
44037 + rt = alloc_runtask ();
44041 + rt->fsdata = fsdata;
44043 + rt->lmtype = lmtype;
44044 + rt->lmnum = lmnum;
44046 + INIT_LIST_HEAD (&rt->rt_list);
44047 + spin_lock (&cq->list_lock);
44048 + list_add (&rt->rt_list, &cq->run_tasks);
44049 + cq->task_count++;
44050 + if (cq->task_count > cq->task_max)
44051 + cq->task_max = cq->task_count;
44052 + spin_unlock (&cq->list_lock);
44053 + wake_up (&cq->waiter);
44058 + * stop_callback_qu - stop the handler thread
44061 +stop_callback_qu (callback_qu_t * cq)
44063 + struct list_head *lltmp, *tmp;
44066 + if (cq->running) {
44067 + cq->running = FALSE;
44068 + /* make sure all thread stop.
44070 + while (atomic_read (&cq->num_threads) > 0) {
44071 + wake_up (&cq->waiter);
44072 + wait_for_completion (&cq->startup);
44074 + /* clear out any left overs. */
44075 + list_for_each_safe (tmp, lltmp, &cq->run_tasks) {
44076 + rt = list_entry (tmp, runtask_t, rt_list);
44084 + * start_callback_qu -
44086 + * Returns: <0:Error, >=0:Ok
44089 +start_callback_qu (callback_qu_t * cq, int cnt)
44092 + INIT_LIST_HEAD (&cq->run_tasks);
44093 + spin_lock_init (&cq->list_lock);
44094 + init_completion (&cq->startup);
44095 + init_waitqueue_head (&cq->waiter);
44096 + atomic_set (&cq->num_threads, 0);
44097 + cq->running = TRUE;
44098 + cq->task_count = 0;
44099 + cq->task_max = 0;
44102 + for (; cnt > 0; cnt--) {
44103 + err = kernel_thread (handler, cq, 0); /* XXX linux part */
44105 + stop_callback_qu (cq);
44106 + /* calling stop here might not behave correctly in all error
44111 + wait_for_completion (&cq->startup);
44115 diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.h linux-patched/fs/gfs_locking/lock_gulm/handler.h
44116 --- linux-orig/fs/gfs_locking/lock_gulm/handler.h 1969-12-31 18:00:00.000000000 -0600
44117 +++ linux-patched/fs/gfs_locking/lock_gulm/handler.h 2004-06-16 12:03:21.958894765 -0500
44119 +/******************************************************************************
44120 +*******************************************************************************
44122 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
44123 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
44125 +** This copyrighted material is made available to anyone wishing to use,
44126 +** modify, copy, or redistribute it subject to the terms and conditions
44127 +** of the GNU General Public License v.2.
44129 +*******************************************************************************
44130 +******************************************************************************/
44132 +#ifndef __handler_c__
44133 +#define __handler_c__
44134 +#include <linux/lm_interface.h>
44136 +struct callback_qu_s {
44137 + struct completion startup;
44141 + struct list_head run_tasks;
44142 + spinlock_t list_lock;
44143 + wait_queue_head_t waiter;
44144 + atomic_t num_threads;
44146 +typedef struct callback_qu_s callback_qu_t;
44148 +/* kinda an excess overloading */
44149 +typedef void (*gulm_fn) (void *);
44150 +int qu_function_call (callback_qu_t * cq, gulm_fn fn, void *data);
44152 +int qu_async_rpl (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44153 + struct lm_lockname *lockname, int result);
44154 +int qu_drop_req (callback_qu_t * cq, lm_callback_t cb, lm_fsdata_t * fsdata,
44155 + int type, uint8_t lmtype, uint64_t lmnum);
44156 +int start_callback_qu (callback_qu_t * cq, int cnt);
44157 +void stop_callback_qu (callback_qu_t * cq);
44158 +void display_handler_queue (callback_qu_t * cq);
44160 +#endif /*__handler_c__*/
44161 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_core.c linux-patched/fs/gfs_locking/lock_gulm/lg_core.c
44162 --- linux-orig/fs/gfs_locking/lock_gulm/lg_core.c 1969-12-31 18:00:00.000000000 -0600
44163 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_core.c 2004-06-16 12:03:21.958894765 -0500
44165 +/******************************************************************************
44166 +*******************************************************************************
44168 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
44169 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
44171 +** This copyrighted material is made available to anyone wishing to use,
44172 +** modify, copy, or redistribute it subject to the terms and conditions
44173 +** of the GNU General Public License v.2.
44175 +*******************************************************************************
44176 +******************************************************************************/
44178 +/* All of the core related functions for services are here. */
44180 +#include "lg_priv.h"
44183 + * lg_core_selector -
44184 + * @ulm_interface_p:
44190 +lg_core_selector (gulm_interface_p lgp)
44192 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44193 + /* make sure it is a gulm_interface_p. */
44194 + if (lg == NULL || lg->first_magic != LGMAGIC
44195 + || lg->last_magic != LGMAGIC)
44202 + return lg->core_fd;
44206 + * lg_core_handle_messages -
44207 + * @ulm_interface_p:
44208 + * @lg_core_callbacks_t:
44214 +lg_core_handle_messages (gulm_interface_p lgp, lg_core_callbacks_t * ccbp,
44217 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44221 + uint32_t x_code, x_error, x_rank;
44222 + struct in6_addr x_ip;
44223 + uint8_t x_state, x_mode;
44225 + /* make sure it is a gulm_interface_p. */
44228 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44231 + if (lg->core_enc == NULL || lg->core_dec == NULL)
44234 + down (&lg->core_recver);
44235 + if (lg->in_core_hm)
44237 + lg->in_core_hm = TRUE;
44238 + up (&lg->core_recver);
44240 + dec = lg->core_dec;
44242 + err = xdr_dec_uint32 (dec, &x_code);
44246 + if (gulm_core_login_rpl == x_code) {
44248 + if ((err = xdr_dec_uint64 (dec, &x_gen)) < 0)
44250 + if ((err = xdr_dec_uint32 (dec, &x_error)) < 0)
44252 + if ((err = xdr_dec_uint32 (dec, &x_rank)) < 0)
44254 + if ((err = xdr_dec_uint8 (dec, &x_state)) < 0)
44259 + if (ccbp->login_reply == NULL) {
44263 + err = ccbp->login_reply (misc, x_gen, x_error, x_rank, x_state);
44265 + } else if (gulm_core_logout_rpl == x_code) {
44266 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44268 + if (ccbp->logout_reply != NULL) {
44269 + err = ccbp->logout_reply (misc);
44272 + xdr_close (&lg->core_fd);
44273 + xdr_enc_release (lg->core_enc);
44274 + lg->core_enc = NULL;
44275 + xdr_dec_release (lg->core_dec);
44276 + lg->core_dec = NULL;
44279 + } else if (gulm_core_mbr_lstrpl == x_code) {
44280 + if (ccbp->nodelist != NULL) {
44281 + err = ccbp->nodelist (misc, lglcb_start, NULL, 0, 0);
44286 + if ((err = xdr_dec_list_start (dec)) != 0)
44288 + while (xdr_dec_list_stop (dec) != 0) {
44290 + xdr_dec_string_ag (dec, &lg->cfba,
44291 + &lg->cfba_len)) != 0)
44293 + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44295 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44297 + if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0)
44299 + if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0)
44301 + if ((err = xdr_dec_uint32 (dec, &x_rank)) != 0)
44303 + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44305 + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44307 + if ((err = xdr_dec_uint64 (dec, &x_gen)) != 0)
44310 + if (ccbp->nodelist != NULL) {
44312 + ccbp->nodelist (misc, lglcb_item,
44324 + if (ccbp->nodelist == NULL) {
44328 + err = ccbp->nodelist (misc, lglcb_stop, NULL, 0, 0);
44330 + } else if (gulm_core_state_chgs == x_code) {
44332 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44334 + if (x_state == gio_Mbr_ama_Slave) {
44335 + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44338 + xdr_dec_string_ag (dec, &lg->cfba,
44339 + &lg->cfba_len)) != 0)
44346 + if (ccbp->statechange == NULL) {
44350 + err = ccbp->statechange (misc, x_state, &x_ip, lg->cfba);
44352 + } else if (gulm_core_mbr_updt == x_code) {
44355 + xdr_dec_string_ag (dec, &lg->cfba,
44356 + &lg->cfba_len)) != 0)
44358 + if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
44360 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44366 + if (ccbp->nodechange == NULL) {
44370 + err = ccbp->nodechange (misc, lg->cfba, &x_ip, x_state);
44372 + } else if (gulm_core_res_list == x_code) {
44373 + if (ccbp->service_list != NULL) {
44375 + ccbp->service_list (misc, lglcb_start, NULL)) != 0)
44379 + if ((err = xdr_dec_list_start (dec)) != 0)
44381 + while (xdr_dec_list_stop (dec)) {
44383 + xdr_dec_string_ag (dec, &lg->cfba,
44384 + &lg->cfba_len)) != 0)
44386 + if (ccbp->service_list != NULL) {
44388 + ccbp->service_list (misc,
44400 + if (ccbp->service_list == NULL) {
44404 + err = ccbp->service_list (misc, lglcb_stop, NULL);
44406 + } else if (gulm_info_stats_rpl == x_code) {
44407 + if (ccbp->status != NULL) {
44409 + ccbp->status (misc, lglcb_start, NULL, NULL)) != 0)
44413 + if ((err = xdr_dec_list_start (dec)) != 0)
44415 + while (xdr_dec_list_stop (dec) != 0) {
44417 + xdr_dec_string_ag (dec, &lg->cfba,
44418 + &lg->cfba_len)) != 0)
44421 + xdr_dec_string_ag (dec, &lg->cfbb,
44422 + &lg->cfbb_len)) != 0)
44424 + if (ccbp->status != NULL) {
44426 + ccbp->status (misc, lglcb_item,
44428 + lg->cfbb)) != 0) {
44437 + if (ccbp->status == NULL) {
44441 + err = ccbp->status (misc, lglcb_stop, NULL, NULL);
44443 + } else if (gulm_err_reply == x_code) {
44444 + if ((err = xdr_dec_uint32 (dec, &x_code)) != 0)
44446 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44448 + if (ccbp->error == NULL) {
44452 + err = ccbp->error (misc, x_error);
44455 + /* unknown code. what to do? */
44461 + lg->in_core_hm = FALSE;
44466 + * lg_core_login -
44470 + * On any error, things are closed and released to the state of things
44471 + * before you called login.
44476 +lg_core_login (gulm_interface_p lgp, int important)
44478 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44479 + struct sockaddr_in6 adr;
44485 + /* make sure it is a gulm_interface_p. */
44488 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44491 + adr.sin6_family = AF_INET6;
44492 + adr.sin6_addr = in6addr_loopback;
44493 + adr.sin6_port = htons (lg->core_port);
44495 + if ((err = xdr_open (&cfd)) < 0) {
44499 + if ((err = xdr_connect (&adr, cfd)) < 0) {
44500 + xdr_close (&cfd);
44504 + enc = xdr_enc_init (cfd, 128);
44505 + if (enc == NULL) {
44506 + xdr_close (&cfd);
44510 + dec = xdr_dec_init (cfd, 128);
44511 + if (enc == NULL) {
44512 + xdr_enc_release (enc);
44513 + xdr_close (&cfd);
44518 + if ((err = xdr_enc_uint32 (enc, gulm_core_reslgn_req)) < 0)
44520 + if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0)
44522 + if ((err = xdr_enc_string (enc, lg->clusterID)) < 0)
44524 + if ((err = xdr_enc_string (enc, lg->service_name)) < 0)
44527 + xdr_enc_uint32 (enc,
44528 + important ? gulm_svc_opt_important : 0)) !=
44531 + if ((err = xdr_enc_flush (enc)) < 0)
44535 + xdr_dec_release (dec);
44536 + xdr_enc_release (enc);
44537 + xdr_close (&cfd);
44541 + down (&lg->core_sender);
44542 + lg->core_fd = cfd;
44543 + lg->core_enc = enc;
44544 + lg->core_dec = dec;
44545 + up (&lg->core_sender);
44551 + * lg_core_logout -
44558 +lg_core_logout (gulm_interface_p lgp)
44560 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44564 + /* make sure it is a gulm_interface_p. */
44567 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44570 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44573 + enc = lg->core_enc;
44575 + down (&lg->core_sender);
44577 + if ((err = xdr_enc_uint32 (enc, gulm_core_logout_req)) != 0)
44579 + if ((err = xdr_enc_string (enc, lg->service_name)) != 0)
44581 + if ((err = xdr_enc_uint8 (enc, gio_Mbr_ama_Resource)) != 0)
44583 + if ((err = xdr_enc_flush (enc)) != 0)
44586 + up (&lg->core_sender);
44591 + * lg_core_nodeinfo -
44599 +lg_core_nodeinfo (gulm_interface_p lgp, char *nodename)
44601 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44605 + /* make sure it is a gulm_interface_p. */
44608 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44611 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44614 + if (nodename == NULL)
44617 + enc = lg->core_enc;
44619 + down (&lg->core_sender);
44621 + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_req)) != 0)
44623 + if ((err = xdr_enc_string (enc, nodename)) != 0)
44625 + if ((err = xdr_enc_flush (enc)) != 0)
44628 + up (&lg->core_sender);
44633 + * lg_core_nodelist -
44640 +lg_core_nodelist (gulm_interface_p lgp)
44642 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44646 + /* make sure it is a gulm_interface_p. */
44649 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44652 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44655 + enc = lg->core_enc;
44657 + down (&lg->core_sender);
44659 + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_lstreq)) != 0)
44661 + if ((err = xdr_enc_flush (enc)) != 0)
44664 + up (&lg->core_sender);
44669 + * lg_core_servicelist -
44676 +lg_core_servicelist (gulm_interface_p lgp)
44678 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44682 + /* make sure it is a gulm_interface_p. */
44685 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44688 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44691 + enc = lg->core_enc;
44693 + down (&lg->core_sender);
44695 + if ((err = xdr_enc_uint32 (enc, gulm_core_res_req)) != 0)
44697 + if ((err = xdr_enc_flush (enc)) != 0)
44700 + up (&lg->core_sender);
44705 + * lg_core_corestate -
44712 +lg_core_corestate (gulm_interface_p lgp)
44714 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44718 + /* make sure it is a gulm_interface_p. */
44721 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44724 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44727 + enc = lg->core_enc;
44729 + down (&lg->core_sender);
44731 + if ((err = xdr_enc_uint32 (enc, gulm_core_state_req)) != 0)
44733 + if ((err = xdr_enc_flush (enc)) != 0)
44736 + up (&lg->core_sender);
44741 + * lg_core_shutdown -
44748 +lg_core_shutdown (gulm_interface_p lgp)
44750 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44754 + /* make sure it is a gulm_interface_p. */
44757 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44760 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44763 + enc = lg->core_enc;
44765 + down (&lg->core_sender);
44767 + if ((err = xdr_enc_uint32 (enc, gulm_core_shutdown)) != 0)
44769 + if ((err = xdr_enc_flush (enc)) != 0)
44772 + up (&lg->core_sender);
44777 + * lg_core_forceexpire -
44785 +lg_core_forceexpire (gulm_interface_p lgp, char *nodename)
44787 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44791 + /* make sure it is a gulm_interface_p. */
44794 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44797 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44800 + if (nodename == NULL)
44803 + enc = lg->core_enc;
44805 + down (&lg->core_sender);
44807 + if ((err = xdr_enc_uint32 (enc, gulm_core_mbr_force)) != 0)
44809 + if ((err = xdr_enc_string (enc, nodename)) != 0)
44811 + if ((err = xdr_enc_flush (enc)) != 0)
44814 + up (&lg->core_sender);
44819 + * lg_core_forcepending -
44826 +lg_core_forcepending (gulm_interface_p lgp)
44828 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44832 + /* make sure it is a gulm_interface_p. */
44835 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44838 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44841 + enc = lg->core_enc;
44843 + down (&lg->core_sender);
44845 + if ((err = xdr_enc_uint32 (enc, gulm_core_forcepend)) != 0)
44847 + if ((err = xdr_enc_flush (enc)) != 0)
44850 + up (&lg->core_sender);
44855 + * lg_core_status -
44862 +lg_core_status (gulm_interface_p lgp)
44864 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44868 + /* make sure it is a gulm_interface_p. */
44871 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44874 + if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
44877 + enc = lg->core_enc;
44879 + down (&lg->core_sender);
44881 + if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
44883 + if ((err = xdr_enc_flush (enc)) != 0)
44886 + up (&lg->core_sender);
44889 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c
44890 --- linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c 1969-12-31 18:00:00.000000000 -0600
44891 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c 2004-06-16 12:03:21.958894765 -0500
44893 +/******************************************************************************
44894 +*******************************************************************************
44896 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
44897 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
44899 +** This copyrighted material is made available to anyone wishing to use,
44900 +** modify, copy, or redistribute it subject to the terms and conditions
44901 +** of the GNU General Public License v.2.
44903 +*******************************************************************************
44904 +******************************************************************************/
44906 +/* all of the lock related fucntion are here. */
44907 +#include "lg_priv.h"
44910 + * lg_lock_selector -
44911 + * @ulm_interface_p:
44917 +lg_lock_selector (gulm_interface_p lgp)
44919 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44920 + /* make sure it is a gulm_interface_p. */
44921 + if (lg == NULL || lg->first_magic != LGMAGIC
44922 + || lg->last_magic != LGMAGIC)
44929 + return lg->lock_fd;
44933 + * lg_lock_handle_messages -
44934 + * @ulm_interface_p:
44935 + * @lg_lockspace_callbacks_t:
44940 +lg_lock_handle_messages (gulm_interface_p lgp, lg_lockspace_callbacks_t * cbp,
44943 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
44946 + uint32_t x_code, x_error, x_flags;
44947 + uint16_t x_keylen, x_lvblen = 0;
44950 + /* make sure it is a gulm_interface_p. */
44953 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
44956 + if (lg->core_enc == NULL || lg->core_dec == NULL)
44959 + down (&lg->lock_recver);
44960 + if (lg->in_lock_hm)
44962 + lg->in_lock_hm = TRUE;
44963 + up (&lg->lock_recver);
44965 + dec = lg->lock_dec;
44967 + err = xdr_dec_uint32 (dec, &x_code);
44971 + if (gulm_lock_login_rpl == x_code) {
44973 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
44975 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
44980 + if (cbp->login_reply == NULL) {
44984 + err = cbp->login_reply (misc, x_error, x_state);
44986 + } else if (gulm_lock_logout_rpl == x_code) {
44987 + if (cbp->logout_reply != NULL) {
44988 + err = cbp->logout_reply (misc);
44991 + xdr_close (&lg->lock_fd);
44992 + xdr_enc_release (lg->lock_enc);
44993 + lg->lock_enc = NULL;
44994 + xdr_dec_release (lg->lock_dec);
44995 + lg->lock_dec = NULL;
44998 + } else if (gulm_lock_state_rpl == x_code) {
45001 + xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45002 + &lg->lfba_len, &x_keylen)) != 0)
45004 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45006 + if ((err = xdr_dec_uint32 (dec, &x_flags)) != 0)
45008 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45010 + if (x_flags & gio_lck_fg_hasLVB) {
45012 + xdr_dec_raw_ag (dec, (void **) &lg->lfbb,
45014 + &x_lvblen)) != 0)
45021 + if (x_keylen <= 4) {
45022 + err = -EPROTO; /* or something */
45025 + if (cbp->lock_state == NULL) {
45029 + err = cbp->lock_state (misc, &lg->lfba[4], x_keylen - 4,
45030 + x_state, x_flags, x_error,
45031 + lg->lfbb, x_lvblen);
45033 + } else if (gulm_lock_action_rpl == x_code) {
45036 + xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45037 + &lg->lfba_len, &x_keylen)) != 0)
45039 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45041 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45047 + if (x_keylen <= 4) {
45048 + err = -EPROTO; /* or something */
45051 + if (cbp->lock_action == NULL) {
45056 + cbp->lock_action (misc, &lg->lfba[4], x_keylen - 4, x_state,
45059 + } else if (gulm_lock_cb_state == x_code) {
45062 + xdr_dec_raw_ag (dec, (void **) &lg->lfba,
45063 + &lg->lfba_len, &x_keylen)) != 0)
45065 + if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
45071 + if (cbp->drop_lock_req == NULL) {
45076 + cbp->drop_lock_req (misc, &lg->lfba[4], x_keylen - 4,
45079 + } else if (gulm_lock_cb_dropall == x_code) {
45080 + if (cbp->drop_all == NULL) {
45084 + err = cbp->drop_all (misc);
45086 + } else if (gulm_info_stats_rpl == x_code) {
45087 + if (cbp->status != NULL) {
45089 + cbp->status (misc, lglcb_start, NULL, NULL)) != 0)
45093 + if ((err = xdr_dec_list_start (dec)) != 0)
45095 + while (xdr_dec_list_stop (dec) != 0) {
45097 + xdr_dec_string_ag (dec, &lg->lfba,
45098 + &lg->lfba_len)) != 0)
45101 + xdr_dec_string_ag (dec, &lg->lfbb,
45102 + &lg->lfbb_len)) != 0)
45104 + if (cbp->status != NULL) {
45106 + cbp->status (misc, lglcb_item,
45108 + lg->lfbb)) != 0) {
45117 + if (cbp->status == NULL) {
45121 + err = cbp->status (misc, lglcb_stop, NULL, NULL);
45123 + } else if (gulm_err_reply == x_code) {
45125 + if ((err = xdr_dec_uint32 (dec, &x_code)) != 0)
45127 + if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
45132 + if (cbp->error == NULL) {
45136 + err = cbp->error (misc, x_error);
45144 + lg->in_lock_hm = FALSE;
45149 + * lg_lock_login -
45150 + * @ulm_interface_p:
45157 +lg_lock_login (gulm_interface_p lgp, uint8_t lockspace[4])
45159 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45160 + struct sockaddr_in6 adr;
45166 + /* make sure it is a gulm_interface_p. */
45169 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45172 + adr.sin6_family = AF_INET6;
45173 + adr.sin6_addr = in6addr_loopback;
45174 + adr.sin6_port = htons (lg->lock_port);
45176 + if ((err = xdr_open (&cfd)) < 0) {
45180 + if ((err = xdr_connect (&adr, cfd)) < 0) {
45181 + xdr_close (&cfd);
45185 + enc = xdr_enc_init (cfd, 512);
45186 + if (enc == NULL) {
45187 + xdr_close (&cfd);
45191 + dec = xdr_dec_init (cfd, 512);
45192 + if (enc == NULL) {
45193 + xdr_enc_release (enc);
45194 + xdr_close (&cfd);
45199 + if ((err = xdr_enc_uint32 (enc, gulm_lock_login_req)) < 0)
45201 + if ((err = xdr_enc_uint32 (enc, GIO_WIREPROT_VERS)) < 0)
45203 + if ((err = xdr_enc_string (enc, lg->service_name)) < 0)
45205 + if ((err = xdr_enc_uint8 (enc, gio_lck_st_Client)) < 0)
45207 + if ((err = xdr_enc_flush (enc)) < 0)
45210 + if ((err = xdr_enc_uint32 (enc, gulm_lock_sel_lckspc)) < 0)
45212 + if ((err = xdr_enc_raw (enc, lockspace, 4)) < 0)
45214 + /* don't flush here.
45215 + * dumb programmer stunt. This way, the lockspace selection won't
45216 + * happen until the next thing the user of this lib sends. Which
45217 + * means it will be after we have received the login reply.
45219 + * Is there really a good reason not to flush here?
45223 + xdr_dec_release (dec);
45224 + xdr_enc_release (enc);
45225 + xdr_close (&cfd);
45229 + down (&lg->lock_sender);
45230 + lg->lock_fd = cfd;
45231 + lg->lock_enc = enc;
45232 + lg->lock_dec = dec;
45234 + memcpy (lg->lockspace, lockspace, 4);
45235 + up (&lg->lock_sender);
45241 + * lg_lock_logout -
45242 + * @ulm_interface_p:
45248 +lg_lock_logout (gulm_interface_p lgp)
45250 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45254 + /* make sure it is a gulm_interface_p. */
45257 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45260 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45263 + enc = lg->lock_enc;
45265 + down (&lg->lock_sender);
45267 + if ((err = xdr_enc_uint32 (enc, gulm_lock_logout_req)) != 0)
45269 + if ((err = xdr_enc_flush (enc)) != 0)
45272 + up (&lg->lock_sender);
45277 + * lg_lock_state_req -
45290 +lg_lock_state_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
45291 + uint8_t state, uint32_t flags, uint8_t * LVB,
45294 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45295 + struct iovec iov[2];
45297 + uint32_t iflgs = 0;
45300 + /* make sure it is a gulm_interface_p. */
45303 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45306 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45309 + if (state != lg_lock_state_Unlock &&
45310 + state != lg_lock_state_Exclusive &&
45311 + state != lg_lock_state_Deferred && state != lg_lock_state_Shared)
45314 + /* make sure only the accepted flags get passed through. */
45315 + if ((flags & lg_lock_flag_DoCB) == lg_lock_flag_DoCB)
45316 + iflgs |= lg_lock_flag_DoCB;
45317 + if ((flags & lg_lock_flag_Try) == lg_lock_flag_Try)
45318 + iflgs |= lg_lock_flag_Try;
45319 + if ((flags & lg_lock_flag_Any) == lg_lock_flag_Any)
45320 + iflgs |= lg_lock_flag_Any;
45321 + if ((flags & lg_lock_flag_IgnoreExp) == lg_lock_flag_IgnoreExp)
45322 + iflgs |= lg_lock_flag_IgnoreExp;
45323 + if ((flags & lg_lock_flag_Piority) == lg_lock_flag_Piority)
45324 + iflgs |= lg_lock_flag_Piority;
45326 + enc = lg->lock_enc;
45328 + if (LVB != NULL && LVBlen > 0)
45329 + iflgs |= gio_lck_fg_hasLVB;
45331 + iov[0].iov_base = lg->lockspace;
45332 + iov[0].iov_len = 4;
45333 + iov[1].iov_base = key;
45334 + iov[1].iov_len = keylen;
45336 + down (&lg->lock_sender);
45338 + if ((err = xdr_enc_uint32 (enc, gulm_lock_state_req)) != 0)
45340 + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45342 + if ((err = xdr_enc_uint8 (enc, state)) != 0)
45344 + if ((err = xdr_enc_uint32 (enc, iflgs)) != 0)
45346 + if (iflgs & gio_lck_fg_hasLVB)
45347 + if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0)
45349 + if ((err = xdr_enc_flush (enc)) != 0)
45352 + up (&lg->lock_sender);
45357 + * lg_lock_cancel_req -
45366 +lg_lock_cancel_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen)
45368 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45369 + struct iovec iov[2];
45373 + /* make sure it is a gulm_interface_p. */
45376 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45379 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45382 + enc = lg->lock_enc;
45384 + iov[0].iov_base = lg->lockspace;
45385 + iov[0].iov_len = 4;
45386 + iov[1].iov_base = key;
45387 + iov[1].iov_len = keylen;
45389 + down (&lg->lock_sender);
45391 + if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0)
45393 + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45395 + if ((err = xdr_enc_uint8 (enc, gio_lck_st_Cancel)) != 0)
45397 + if ((err = xdr_enc_flush (enc)) != 0)
45400 + up (&lg->lock_sender);
45405 + * lg_lock_action_req -
45414 + * I wonder if I should actually break this into three seperate calls for
45415 + * the lvb stuff. Does it really matter?
45420 +lg_lock_action_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
45421 + uint8_t action, uint8_t * LVB, uint16_t LVBlen)
45423 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45424 + struct iovec iov[2];
45428 + /* make sure it is a gulm_interface_p. */
45431 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45434 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45437 + if (action != lg_lock_act_HoldLVB &&
45438 + action != lg_lock_act_UnHoldLVB && action != lg_lock_act_SyncLVB)
45441 + enc = lg->lock_enc;
45443 + iov[0].iov_base = lg->lockspace;
45444 + iov[0].iov_len = 4;
45445 + iov[1].iov_base = key;
45446 + iov[1].iov_len = keylen;
45448 + down (&lg->lock_sender);
45450 + if ((err = xdr_enc_uint32 (enc, gulm_lock_action_req)) != 0)
45452 + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45454 + if ((err = xdr_enc_uint8 (enc, action)) != 0)
45456 + if (action == gio_lck_st_SyncLVB)
45457 + if ((err = xdr_enc_raw (enc, LVB, LVBlen)) != 0)
45459 + if ((err = xdr_enc_flush (enc)) != 0)
45462 + up (&lg->lock_sender);
45467 + * lg_lock_drop_exp -
45468 + * @ulm_interface_p:
45473 + * holder is the node name of the expired holder that you want to clear.
45474 + * Only locks matching the keymask will be looked at. (most of the time you
45475 + * will just set key to a bunch of 0xff to match all) The keymask lets you
45476 + * basically subdivide your lockspace into smaller seperate parts.
45477 + * (example, there is one gfs lockspace, but each filesystem gets its own
45478 + * subpart of that larger space)
45480 + * If holder is NULL, all expired holders in your lockspace will get
45486 +lg_lock_drop_exp (gulm_interface_p lgp, uint8_t * holder, uint8_t * key,
45489 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45490 + struct iovec iov[2];
45494 + /* make sure it is a gulm_interface_p. */
45497 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45500 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45503 + enc = lg->lock_enc;
45505 + iov[0].iov_base = lg->lockspace;
45506 + iov[0].iov_len = 4;
45507 + iov[1].iov_base = key;
45508 + iov[1].iov_len = (key != NULL) ? keylen : 0;
45510 + down (&lg->lock_sender);
45512 + if ((err = xdr_enc_uint32 (enc, gulm_lock_drop_exp)) != 0)
45514 + if ((err = xdr_enc_string (enc, holder)) != 0)
45516 + if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
45518 + if ((err = xdr_enc_flush (enc)) != 0)
45521 + up (&lg->lock_sender);
45526 + * lg_lock_status -
45533 +lg_lock_status (gulm_interface_p lgp)
45535 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45539 + /* make sure it is a gulm_interface_p. */
45542 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45545 + if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
45548 + enc = lg->lock_enc;
45550 + down (&lg->lock_sender);
45552 + if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
45554 + if ((err = xdr_enc_flush (enc)) != 0)
45557 + up (&lg->lock_sender);
45560 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_main.c linux-patched/fs/gfs_locking/lock_gulm/lg_main.c
45561 --- linux-orig/fs/gfs_locking/lock_gulm/lg_main.c 1969-12-31 18:00:00.000000000 -0600
45562 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_main.c 2004-06-16 12:03:21.958894765 -0500
45564 +/******************************************************************************
45565 +*******************************************************************************
45567 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
45568 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
45570 +** This copyrighted material is made available to anyone wishing to use,
45571 +** modify, copy, or redistribute it subject to the terms and conditions
45572 +** of the GNU General Public License v.2.
45574 +*******************************************************************************
45575 +******************************************************************************/
45577 +/* This is where all of the library specific functions exist.
45578 + * Not many, but keeps things clean.
45581 +#include "lg_priv.h"
45583 +extern gulm_cm_t gulm_cm;
45586 + * lg_initialize -
45587 + * @gulm_interface_p:
45591 + * if returning an error, nothing was done to the value of gulm_interface_p
45593 + * Returns: gulm_interface_p
45596 +lg_initialize (gulm_interface_p * ret, char *cluster_name, char *service_name)
45598 + gulm_interface_t *lg;
45601 + lg = kmalloc (sizeof (gulm_interface_t), GFP_KERNEL);
45605 + memset (lg, 0, sizeof (gulm_interface_t));
45606 + lg->first_magic = LGMAGIC;
45607 + lg->last_magic = LGMAGIC;
45609 + if (cluster_name == NULL)
45610 + cluster_name = "cluster";
45611 + len = strlen (cluster_name) + 1;
45612 + lg->clusterID = kmalloc (len, GFP_KERNEL);
45613 + if (lg->clusterID == NULL) {
45617 + memcpy (lg->clusterID, cluster_name, len);
45619 + len = strlen (service_name) + 1;
45620 + lg->service_name = kmalloc (len, GFP_KERNEL);
45621 + if (lg->service_name == NULL) {
45625 + memcpy (lg->service_name, service_name, len);
45627 + /* set up flutter bufs. */
45628 + lg->cfba_len = 64;
45629 + lg->cfba = kmalloc (lg->cfba_len, GFP_KERNEL);
45630 + if (lg->cfba == NULL) {
45635 + lg->cfbb_len = 64;
45636 + lg->cfbb = kmalloc (lg->cfbb_len, GFP_KERNEL);
45637 + if (lg->cfbb == NULL) {
45642 + lg->lfba_len = 128;
45643 + lg->lfba = kmalloc (lg->lfba_len, GFP_KERNEL);
45644 + if (lg->lfba == NULL) {
45649 + lg->lfbb_len = 128;
45650 + lg->lfbb = kmalloc (lg->lfbb_len, GFP_KERNEL);
45651 + if (lg->lfbb == NULL) {
45656 + /* setup mutexes */
45657 + init_MUTEX (&lg->core_sender);
45658 + init_MUTEX (&lg->core_recver);
45659 + init_MUTEX (&lg->lock_sender);
45660 + init_MUTEX (&lg->lock_recver);
45662 + lg->core_port = 40040;
45663 + lg->lock_port = 40042;
45668 + if (lg->clusterID != NULL)
45669 + kfree (lg->clusterID);
45670 + if (lg->service_name != NULL)
45671 + kfree (lg->service_name);
45672 + if (lg->cfba != NULL)
45673 + kfree (lg->cfba);
45674 + if (lg->cfbb != NULL)
45675 + kfree (lg->cfbb);
45676 + if (lg->lfba != NULL)
45677 + kfree (lg->lfba);
45678 + if (lg->lfbb != NULL)
45679 + kfree (lg->lfbb);
45690 +lg_release (gulm_interface_p lgp)
45692 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45695 + /* make sure it is a gulm_interface_p. */
45696 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45699 + if (lg->service_name != NULL)
45700 + kfree (lg->service_name);
45701 + if (lg->clusterID != NULL)
45702 + kfree (lg->clusterID);
45704 + /* wonder if I should send a logout packet? */
45705 + if (lg->core_enc != NULL)
45706 + xdr_enc_release (lg->core_enc);
45707 + if (lg->core_dec != NULL)
45708 + xdr_dec_release (lg->core_dec);
45709 + xdr_close (&lg->core_fd);
45711 + if (lg->lock_enc != NULL)
45712 + xdr_enc_release (lg->lock_enc);
45713 + if (lg->lock_dec != NULL)
45714 + xdr_dec_release (lg->lock_dec);
45715 + xdr_close (&lg->lock_fd);
45717 + if (lg->cfba != NULL)
45718 + kfree (lg->cfba);
45719 + if (lg->cfbb != NULL)
45720 + kfree (lg->cfbb);
45721 + if (lg->lfba != NULL)
45722 + kfree (lg->lfba);
45723 + if (lg->lfbb != NULL)
45724 + kfree (lg->lfbb);
45730 + * lg_set_core_port -
45738 +lg_set_core_port (gulm_interface_p lgp, uint16_t new)
45740 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45743 + /* make sure it is a gulm_interface_p. */
45744 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45747 + lg->core_port = new;
45752 + * lg_set_ltpx_port -
45760 +lg_set_lock_port (gulm_interface_p lgp, uint16_t new)
45762 + gulm_interface_t *lg = (gulm_interface_t *) lgp;
45765 + /* make sure it is a gulm_interface_p. */
45766 + if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
45769 + lg->lock_port = new;
45773 diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h
45774 --- linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h 1969-12-31 18:00:00.000000000 -0600
45775 +++ linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h 2004-06-16 12:03:21.958894765 -0500
45777 +/******************************************************************************
45778 +*******************************************************************************
45780 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
45781 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
45783 +** This copyrighted material is made available to anyone wishing to use,
45784 +** modify, copy, or redistribute it subject to the terms and conditions
45785 +** of the GNU General Public License v.2.
45787 +*******************************************************************************
45788 +******************************************************************************/
45790 +#ifndef __lg_priv_h__
45791 +#define __lg_priv_h__
45792 +/* private details that we don't want to give the users of this lib access
45797 +#include <linux/kernel.h>
45798 +#include <linux/sched.h>
45799 +#define __KERNEL_SYSCALLS__
45800 +#include <linux/unistd.h>
45801 +#endif /*__linux__*/
45804 +#include "gio_wiretypes.h"
45805 +#include "libgulm.h"
45807 +#define LGMAGIC (0x474d4354)
45809 +struct gulm_interface_s {
45810 + /* since we've masked this to a void* to the users, it is a nice safty
45811 + * net to put a little magic in here so we know things stay good.
45813 + uint32_t first_magic;
45815 + /* WHAT IS YOUR NAME?!? */
45816 + char *service_name;
45820 + uint16_t core_port;
45821 + xdr_socket core_fd;
45822 + xdr_enc_t *core_enc;
45823 + xdr_dec_t *core_dec;
45824 + struct semaphore core_sender;
45825 + struct semaphore core_recver;
45828 + uint16_t lock_port;
45829 + xdr_socket lock_fd;
45830 + xdr_enc_t *lock_enc;
45831 + xdr_dec_t *lock_dec;
45832 + struct semaphore lock_sender;
45833 + struct semaphore lock_recver;
45835 + uint8_t lockspace[4];
45837 + /* in the message recver func, we read data into these buffers and pass
45838 + * them to the callback function. This way we avoid doinf mallocs and
45839 + * frees on every callback.
45841 + uint16_t cfba_len;
45843 + uint16_t cfbb_len;
45845 + uint16_t lfba_len;
45847 + uint16_t lfbb_len;
45850 + uint32_t last_magic;
45852 +typedef struct gulm_interface_s gulm_interface_t;
45862 +#endif /*__lg_priv_h__*/
45863 diff -urN linux-orig/fs/gfs_locking/lock_gulm/libgulm.h linux-patched/fs/gfs_locking/lock_gulm/libgulm.h
45864 --- linux-orig/fs/gfs_locking/lock_gulm/libgulm.h 1969-12-31 18:00:00.000000000 -0600
45865 +++ linux-patched/fs/gfs_locking/lock_gulm/libgulm.h 2004-06-16 12:03:21.958894765 -0500
45867 +/******************************************************************************
45868 +*******************************************************************************
45870 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
45871 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
45873 +** This copyrighted material is made available to anyone wishing to use,
45874 +** modify, copy, or redistribute it subject to the terms and conditions
45875 +** of the GNU General Public License v.2.
45877 +*******************************************************************************
45878 +******************************************************************************/
45880 +#ifndef __libgulm_h__
45881 +#define __libgulm_h__
45883 +/* bit messy, but we need this to be rather seemless in both kernel and
45884 + * userspace. and this seems the easiest way to do it.
45888 +#include <linux/in6.h>
45889 +typedef struct socket *lg_socket;
45890 +#endif /*__linux__*/
45892 +typedef void *gulm_interface_p;
45894 +/* mallocs the interface structure.
45896 +int lg_initialize (gulm_interface_p *, char *cluster_name, char *service_name);
45899 +void lg_release (gulm_interface_p);
45901 +/* Determins where we are with a itemlist callback */
45902 +typedef enum { lglcb_start, lglcb_item, lglcb_stop } lglcb_t;
45904 +/****** Core specifics ******/
45906 +/* leaving a callback pointer as NULL, will cause that message type to
45908 +typedef struct lg_core_callbacks_s {
45909 + int (*login_reply) (void *misc, uint64_t gen, uint32_t error,
45910 + uint32_t rank, uint8_t corestate);
45911 + int (*logout_reply) (void *misc);
45912 + int (*nodelist) (void *misc, lglcb_t type, char *name,
45913 + struct in6_addr * ip, uint8_t state);
45914 + int (*statechange) (void *misc, uint8_t corestate,
45915 + struct in6_addr * masterip, char *mastername);
45916 + int (*nodechange) (void *misc, char *nodename,
45917 + struct in6_addr * nodeip, uint8_t nodestate);
45918 + int (*service_list) (void *misc, lglcb_t type, char *service);
45919 + int (*status) (void *misc, lglcb_t type, char *key, char *value);
45920 + int (*error) (void *misc, uint32_t err);
45921 +} lg_core_callbacks_t;
45923 +/* this will trigger a callback from gulm_core_callbacks_t
45924 + * handles one message! Either stick this inside of a thread,
45925 + * or in a poll()/select() loop using the function below.
45926 + * This will block until there is a message sent from core.
45928 +int lg_core_handle_messages (gulm_interface_p, lg_core_callbacks_t *,
45931 +/* this returns the filedescriptor that the library is using to
45932 + * communicate with the core. This is only for using in a poll()
45933 + * or select() call to avoid having the gulm_core_handle_messages()
45936 +lg_socket lg_core_selector (gulm_interface_p);
45938 +/* Queue requests. */
45939 +int lg_core_login (gulm_interface_p, int important);
45940 +int lg_core_logout (gulm_interface_p);
45941 +int lg_core_nodeinfo (gulm_interface_p, char *nodename);
45942 +int lg_core_nodelist (gulm_interface_p);
45943 +int lg_core_servicelist (gulm_interface_p);
45944 +int lg_core_corestate (gulm_interface_p);
45946 +/* for completeness mostly. */
45947 +int lg_core_shutdown (gulm_interface_p);
45948 +int lg_core_forceexpire (gulm_interface_p, char *node_name);
45949 +int lg_core_forcepending (gulm_interface_p);
45951 +int lg_core_status (gulm_interface_p);
45954 + * First three are actual states, as well as changes. Last is only a node
45955 + * change message.
45957 +#define lg_core_Logged_in (0x05)
45958 +#define lg_core_Logged_out (0x06)
45959 +#define lg_core_Expired (0x07)
45960 +#define lg_core_Fenced (0x08)
45962 +#define lg_core_Slave (0x01)
45963 +#define lg_core_Master (0x02)
45964 +#define lg_core_Pending (0x03)
45965 +#define lg_core_Arbitrating (0x04)
45966 +#define lg_core_Client (0x06)
45968 +/****** lock space specifics *****/
45969 +/* note that this library masks out the lock table seperation.
45972 +typedef struct lg_lockspace_callbacks_s {
45973 + int (*login_reply) (void *misc, uint32_t error, uint8_t which);
45974 + int (*logout_reply) (void *misc);
45975 + int (*lock_state) (void *misc, uint8_t * key, uint16_t keylen,
45976 + uint8_t state, uint32_t flags, uint32_t error,
45977 + uint8_t * LVB, uint16_t LVBlen);
45978 + int (*lock_action) (void *misc, uint8_t * key, uint16_t keylen,
45979 + uint8_t action, uint32_t error);
45980 + int (*cancel_reply) (void *misc, uint8_t * key, uint16_t keylen,
45982 + int (*drop_lock_req) (void *misc, uint8_t * key, uint16_t keylen,
45984 + int (*drop_all) (void *misc);
45985 + int (*status) (void *misc, lglcb_t type, char *key, char *value);
45986 + int (*error) (void *misc, uint32_t err);
45987 +} lg_lockspace_callbacks_t;
45989 +/* Like the core handle messages function, but for the lockspace.
45990 + * Handles one message, blocks.
45993 +int lg_lock_handle_messages (gulm_interface_p, lg_lockspace_callbacks_t *,
45996 +/* this returns the filedescriptor that the library is using to
45997 + * communicate with the ltpx. This is only for using in a poll()
45998 + * or select() call to avoid having the gulm_lock_handle_messages()
46001 +lg_socket lg_lock_selector (gulm_interface_p);
46003 +/* Lockspace request calls */
46004 +int lg_lock_login (gulm_interface_p, uint8_t lockspace[4]);
46005 +int lg_lock_logout (gulm_interface_p);
46006 +int lg_lock_state_req (gulm_interface_p, uint8_t * key, uint16_t keylen,
46007 + uint8_t state, uint32_t flags, uint8_t * LVB,
46008 + uint16_t LVBlen);
46009 +int lg_lock_cancel_req (gulm_interface_p, uint8_t * key, uint16_t keylen);
46010 +int lg_lock_action_req (gulm_interface_p, uint8_t * key,
46011 + uint16_t keylen, uint8_t action,
46012 + uint8_t * LVB, uint16_t LVBlen);
46013 +int lg_lock_drop_exp (gulm_interface_p, uint8_t * holder,
46014 + uint8_t * keymask, uint16_t kmlen);
46015 +int lg_lock_status (gulm_interface_p);
46017 +/* state requests */
46018 +#define lg_lock_state_Unlock (0x00)
46019 +#define lg_lock_state_Exclusive (0x01)
46020 +#define lg_lock_state_Deferred (0x02)
46021 +#define lg_lock_state_Shared (0x03)
46024 +#define lg_lock_act_HoldLVB (0x0b)
46025 +#define lg_lock_act_UnHoldLVB (0x0c)
46026 +#define lg_lock_act_SyncLVB (0x0d)
46029 +#define lg_lock_flag_DoCB (0x00000001)
46030 +#define lg_lock_flag_Try (0x00000002)
46031 +#define lg_lock_flag_Any (0x00000004)
46032 +#define lg_lock_flag_IgnoreExp (0x00000008)
46033 +#define lg_lock_flag_Cachable (0x00000020)
46034 +#define lg_lock_flag_Piority (0x00000040)
46036 +/* These are the possible values that can be in the error fields. */
46037 +#define lg_err_Ok (0)
46038 +#define lg_err_BadLogin (1001)
46039 +#define lg_err_BadCluster (1003)
46040 +#define lg_err_BadConfig (1004)
46041 +#define lg_err_BadGeneration (1005)
46042 +#define lg_err_BadWireProto (1019)
46044 +#define lg_err_NotAllowed (1006)
46045 +#define lg_err_Unknown_Cs (1007)
46046 +#define lg_err_BadStateChg (1008)
46047 +#define lg_err_MemoryIssues (1009)
46049 +#define lg_err_TryFailed (1011)
46050 +#define lg_err_AlreadyPend (1013)
46051 +#define lg_err_Canceled (1015)
46053 +#define lg_err_NoSuchFS (1016)
46054 +#define lg_err_NoSuchJID (1017)
46055 +#define lg_err_NoSuchName (1018)
46057 +#endif /*__libgulm_h__*/
46058 diff -urN linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c
46059 --- linux-orig/fs/gfs_locking/lock_gulm/linux_gulm_main.c 1969-12-31 18:00:00.000000000 -0600
46060 +++ linux-patched/fs/gfs_locking/lock_gulm/linux_gulm_main.c 2004-06-16 12:03:21.958894765 -0500
46062 +/******************************************************************************
46063 +*******************************************************************************
46065 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46066 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46068 +** This copyrighted material is made available to anyone wishing to use,
46069 +** modify, copy, or redistribute it subject to the terms and conditions
46070 +** of the GNU General Public License v.2.
46072 +*******************************************************************************
46073 +******************************************************************************/
46075 +#define EXPORT_SYMTAB
46076 +#define WANT_DEBUG_NAMES
46077 +#define WANT_GMALLOC_NAMES
46081 +#include <linux/init.h>
46084 +#include "gulm_procinfo.h"
46086 +MODULE_DESCRIPTION ("Grand Unified Locking Module " GULM_RELEASE_NAME);
46087 +MODULE_AUTHOR ("Red Hat, Inc.");
46088 +MODULE_LICENSE ("GPL");
46090 +extern gulm_cm_t gulm_cm;
46093 + * init_gulm - Initialize the gulm module
46095 + * Returns: 0 on success, -EXXX on failure
46102 + memset (&gulm_cm, 0, sizeof (gulm_cm_t));
46103 + gulm_cm.loaded = FALSE;
46104 + gulm_cm.hookup = NULL;
46106 + /* register with the lm layers. */
46107 + error = lm_register_proto (&gulm_ops);
46111 + error = init_proc_dir ();
46112 + if (error != 0) {
46118 + printk ("Gulm %s (built %s %s) installed\n",
46119 + GULM_RELEASE_NAME, __DATE__, __TIME__);
46124 + lm_unregister_proto (&gulm_ops);
46131 + * exit_gulm - cleanup the gulm module
46138 + remove_proc_dir ();
46139 + lm_unregister_proto (&gulm_ops);
46142 +module_init (init_gulm);
46143 +module_exit (exit_gulm);
46145 +/* the libgulm.h interface. */
46146 +EXPORT_SYMBOL (lg_initialize);
46147 +EXPORT_SYMBOL (lg_release);
46149 +EXPORT_SYMBOL (lg_core_handle_messages);
46150 +EXPORT_SYMBOL (lg_core_selector);
46151 +EXPORT_SYMBOL (lg_core_login);
46152 +EXPORT_SYMBOL (lg_core_logout);
46153 +EXPORT_SYMBOL (lg_core_nodeinfo);
46154 +EXPORT_SYMBOL (lg_core_nodelist);
46155 +EXPORT_SYMBOL (lg_core_servicelist);
46156 +EXPORT_SYMBOL (lg_core_corestate);
46157 +EXPORT_SYMBOL (lg_core_shutdown);
46158 +EXPORT_SYMBOL (lg_core_forceexpire);
46159 +EXPORT_SYMBOL (lg_core_forcepending);
46160 +EXPORT_SYMBOL (lg_core_status);
46162 +EXPORT_SYMBOL (lg_lock_handle_messages);
46163 +EXPORT_SYMBOL (lg_lock_selector);
46164 +EXPORT_SYMBOL (lg_lock_login);
46165 +EXPORT_SYMBOL (lg_lock_logout);
46166 +EXPORT_SYMBOL (lg_lock_state_req);
46167 +EXPORT_SYMBOL (lg_lock_cancel_req);
46168 +EXPORT_SYMBOL (lg_lock_action_req);
46169 +EXPORT_SYMBOL (lg_lock_drop_exp);
46170 +EXPORT_SYMBOL (lg_lock_status);
46171 diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.c linux-patched/fs/gfs_locking/lock_gulm/load_info.c
46172 --- linux-orig/fs/gfs_locking/lock_gulm/load_info.c 1969-12-31 18:00:00.000000000 -0600
46173 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.c 2004-06-16 12:03:21.958894765 -0500
46175 +/******************************************************************************
46176 +*******************************************************************************
46178 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46179 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46181 +** This copyrighted material is made available to anyone wishing to use,
46182 +** modify, copy, or redistribute it subject to the terms and conditions
46183 +** of the GNU General Public License v.2.
46185 +*******************************************************************************
46186 +******************************************************************************/
46190 +#include <linux/kernel.h>
46191 +#include <linux/sched.h>
46192 +#define __KERNEL_SYSCALLS__
46193 +#include <linux/unistd.h>
46195 +#include <linux/utsname.h> /* for extern system_utsname */
46198 +#include "utils_verb_flags.h"
46200 +gulm_cm_t gulm_cm;
46209 + lock_table_t *lt = &gulm_cm.ltpx;
46211 + INIT_LIST_HEAD (<->to_be_sent);
46212 + spin_lock_init (<->queue_sender);
46213 + init_waitqueue_head (<->send_wchan);
46214 + lt->magic_one = 0xAAAAAAAA;
46215 + init_MUTEX (<->sender);
46216 + init_completion (<->startup);
46217 + atomic_set (<->locks_pending, 0);
46218 + lt->hashbuckets = 8191;
46219 + lt->hshlk = kmalloc (sizeof (spinlock_t) * lt->hashbuckets, GFP_KERNEL);
46220 + if (lt->hshlk == NULL)
46223 + kmalloc (sizeof (struct list_head) * lt->hashbuckets, GFP_KERNEL);
46224 + if (lt->lkhsh == NULL) {
46225 + kfree (lt->hshlk);
46228 + for (j = 0; j < lt->hashbuckets; j++) {
46229 + spin_lock_init (<->hshlk[j]);
46230 + INIT_LIST_HEAD (<->lkhsh[j]);
46237 + * @hostdata: < optionally override the name of this node.
46242 +load_info (char *hostdata)
46246 + if (gulm_cm.loaded)
46249 + gulm_cm.verbosity = 0;
46250 + if (hostdata != NULL && strlen (hostdata) > 0) {
46251 + strncpy (gulm_cm.myName, hostdata, 64);
46253 + strncpy (gulm_cm.myName, system_utsname.nodename, 64);
46255 + gulm_cm.myName[63] = '\0';
46257 + /* breaking away from ccs. just hardcoding defaults here.
46258 + * Noone really used these anyways and if ppl want them badly, we'll
46259 + * find another way to set them. (modprobe options for example.)
46261 + gulm_cm.handler_threads = 2;
46262 + set_verbosity ("Default", &gulm_cm.verbosity);
46266 + gulm_cm.loaded = TRUE;
46270 +/* vim: set ai cin noet sw=8 ts=8 : */
46271 diff -urN linux-orig/fs/gfs_locking/lock_gulm/load_info.h linux-patched/fs/gfs_locking/lock_gulm/load_info.h
46272 --- linux-orig/fs/gfs_locking/lock_gulm/load_info.h 1969-12-31 18:00:00.000000000 -0600
46273 +++ linux-patched/fs/gfs_locking/lock_gulm/load_info.h 2004-06-16 12:03:21.958894765 -0500
46275 +/******************************************************************************
46276 +*******************************************************************************
46278 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46279 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46281 +** This copyrighted material is made available to anyone wishing to use,
46282 +** modify, copy, or redistribute it subject to the terms and conditions
46283 +** of the GNU General Public License v.2.
46285 +*******************************************************************************
46286 +******************************************************************************/
46288 +#ifndef __load_info_h__
46289 +#define __load_info_h__
46290 +int load_info (char *);
46291 +#endif /*__load_info_h__*/
46292 diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.c linux-patched/fs/gfs_locking/lock_gulm/util.c
46293 --- linux-orig/fs/gfs_locking/lock_gulm/util.c 1969-12-31 18:00:00.000000000 -0600
46294 +++ linux-patched/fs/gfs_locking/lock_gulm/util.c 2004-06-16 12:03:21.958894765 -0500
46296 +/******************************************************************************
46297 +*******************************************************************************
46299 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46300 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46302 +** This copyrighted material is made available to anyone wishing to use,
46303 +** modify, copy, or redistribute it subject to the terms and conditions
46304 +** of the GNU General Public License v.2.
46306 +*******************************************************************************
46307 +******************************************************************************/
46309 +#include <linux/kernel.h>
46310 +#include <linux/types.h>
46311 +#include <linux/string.h>
46312 +#include "utils_crc.h"
46326 + while ('0' <= *c && *c <= '9') {
46327 + x = x * 10 + (*c - '0');
46343 +inet_aton (char *ascii, uint32_t * ip)
46350 + for (x = 0; x < 4; x++) {
46351 + value = atoi (ascii);
46355 + *ip = (*ip << 8) | value;
46358 + for (; *ascii != '.' && *ascii != '\0'; ascii++) {
46359 + if (*ascii < '0' || *ascii > '9') {
46360 + /* not a number. stop */
46364 + if (*ascii == '\0')
46382 +inet_ntoa (uint32_t ip, char *buf)
46389 + for (i = 3; i >= 0; i--) {
46390 + p += sprintf (p, "%d", (ip >> (8 * i)) & 0xFF);
46397 +/* public functions */
46398 +#define hash_init_val 0x6d696b65
46400 +uint32_t __inline__
46401 +hash_lock_key (uint8_t * in, uint8_t len)
46402 +{ /* other hash function was to variable */
46403 + return crc32 (in, len, hash_init_val);
46405 diff -urN linux-orig/fs/gfs_locking/lock_gulm/util.h linux-patched/fs/gfs_locking/lock_gulm/util.h
46406 --- linux-orig/fs/gfs_locking/lock_gulm/util.h 1969-12-31 18:00:00.000000000 -0600
46407 +++ linux-patched/fs/gfs_locking/lock_gulm/util.h 2004-06-16 12:03:21.959894533 -0500
46409 +/******************************************************************************
46410 +*******************************************************************************
46412 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46413 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46415 +** This copyrighted material is made available to anyone wishing to use,
46416 +** modify, copy, or redistribute it subject to the terms and conditions
46417 +** of the GNU General Public License v.2.
46419 +*******************************************************************************
46420 +******************************************************************************/
46422 +#ifndef __UTIL_DOT_H__
46423 +#define __UTIL_DOT_H__
46425 +int atoi (char *c);
46426 +int inet_aton (char *ascii, uint32_t * ip);
46427 +void inet_ntoa (uint32_t ip, char *buf);
46428 +void dump_buffer (void *buf, int len);
46430 +uint32_t __inline__ hash_lock_key (uint8_t * in, uint8_t len);
46431 +uint8_t __inline__ fourtoone (uint32_t);
46433 +__inline__ int testbit (uint16_t bit, uint8_t * set);
46434 +__inline__ void setbit (uint16_t bit, uint8_t * set);
46435 +__inline__ void clearbit (uint16_t bit, uint8_t * set);
46437 +#endif /* __UTIL_DOT_H__ */
46438 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c
46439 --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.c 1969-12-31 18:00:00.000000000 -0600
46440 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.c 2004-06-16 12:03:21.959894533 -0500
46442 +/******************************************************************************
46443 +*******************************************************************************
46445 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46446 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46448 +** This copyrighted material is made available to anyone wishing to use,
46449 +** modify, copy, or redistribute it subject to the terms and conditions
46450 +** of the GNU General Public License v.2.
46452 +*******************************************************************************
46453 +******************************************************************************/
46455 +#include <linux/types.h>
46457 +static const uint32_t crc_32_tab[] = {
46458 + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
46459 + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
46460 + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
46461 + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
46462 + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
46463 + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
46464 + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
46465 + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
46466 + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
46467 + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
46468 + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
46469 + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
46470 + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
46471 + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
46472 + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
46473 + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
46474 + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
46475 + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
46476 + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
46477 + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
46478 + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
46479 + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
46480 + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
46481 + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
46482 + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
46483 + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
46484 + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
46485 + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
46486 + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
46487 + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
46488 + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
46489 + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
46490 + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
46491 + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
46492 + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
46493 + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
46494 + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
46495 + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
46496 + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
46497 + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
46498 + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
46499 + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
46500 + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
46504 + * crc32 - hash an array of data
46505 + * @data: the data to be hashed
46506 + * @len: the length of data to be hashed
46508 + * completely copied from GFS/src/fs.c
46510 + * Take some data and convert it to a 32-bit hash.
46512 + * The hash function is a 32-bit CRC of the data. The algorithm uses
46513 + * the crc_32_tab table above.
46515 + * This may not be the fastest hash function, but it does a fair bit better
46516 + * at providing uniform results than the others I've looked at. That's
46517 + * really important for efficient directories.
46519 + * Returns: the hash
46523 +crc32 (const char *data, int len, uint32_t init)
46525 + uint32_t hash = init;
46527 + for (; len--; data++)
46528 + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
46534 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h
46535 --- linux-orig/fs/gfs_locking/lock_gulm/utils_crc.h 1969-12-31 18:00:00.000000000 -0600
46536 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_crc.h 2004-06-16 12:03:21.959894533 -0500
46538 +/******************************************************************************
46539 +*******************************************************************************
46541 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46542 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46544 +** This copyrighted material is made available to anyone wishing to use,
46545 +** modify, copy, or redistribute it subject to the terms and conditions
46546 +** of the GNU General Public License v.2.
46548 +*******************************************************************************
46549 +******************************************************************************/
46551 +#ifndef __utils_crc_h__
46552 +#define __utils_crc_h__
46553 +uint32_t crc32 (const char *data, int len, uint32_t init);
46554 +#endif /*__utils_crc_h__*/
46555 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c
46556 --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c 1969-12-31 18:00:00.000000000 -0600
46557 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c 2004-06-16 12:03:21.959894533 -0500
46559 +/******************************************************************************
46560 +*******************************************************************************
46562 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46563 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46565 +** This copyrighted material is made available to anyone wishing to use,
46566 +** modify, copy, or redistribute it subject to the terms and conditions
46567 +** of the GNU General Public License v.2.
46569 +*******************************************************************************
46570 +******************************************************************************/
46572 +#include "gio_wiretypes.h"
46575 +gio_Err_to_str (int x)
46577 + char *t = "Unknown GULM Err";
46583 + case gio_Err_BadLogin:
46586 + case gio_Err_BadCluster:
46587 + t = "Bad Cluster ID";
46589 + case gio_Err_BadConfig:
46590 + t = "Incompatible configurations";
46592 + case gio_Err_BadGeneration:
46593 + t = "Bad Generation ID";
46595 + case gio_Err_BadWireProto:
46596 + t = "Bad Wire Protocol Version";
46599 + case gio_Err_NotAllowed:
46600 + t = "Not Allowed";
46602 + case gio_Err_Unknown_Cs:
46603 + t = "Uknown Client";
46605 + case gio_Err_BadStateChg:
46606 + t = "Bad State Change";
46608 + case gio_Err_MemoryIssues:
46609 + t = "Memory Problems";
46612 + case gio_Err_PushQu:
46613 + t = "Push Queue";
46615 + case gio_Err_TryFailed:
46616 + t = "Try Failed";
46618 + case gio_Err_AlreadyPend:
46619 + t = "Request Already Pending";
46621 + case gio_Err_Canceled:
46622 + t = "Request Canceled";
46625 + case gio_Err_NoSuchFS:
46626 + t = "No Such Filesystem";
46628 + case gio_Err_NoSuchJID:
46629 + t = "No Such JID";
46631 + case gio_Err_NoSuchName:
46632 + t = "No Such Node";
46639 +gio_mbrupdate_to_str (int x)
46641 + char *t = "Unknown Membership Update";
46643 + case gio_Mbr_Logged_in:
46646 + case gio_Mbr_Logged_out:
46647 + t = "Logged out";
46649 + case gio_Mbr_Expired:
46652 + case gio_Mbr_Killed:
46655 + case gio_Mbr_OM_lgin:
46656 + t = "Was Logged in";
46663 +gio_I_am_to_str (int x)
46666 + case gio_Mbr_ama_Slave:
46669 + case gio_Mbr_ama_Pending:
46670 + return "Pending";
46672 + case gio_Mbr_ama_Arbitrating:
46673 + return "Arbitrating";
46675 + case gio_Mbr_ama_Master:
46678 + case gio_Mbr_ama_Resource:
46679 + return "Service";
46681 + case gio_Mbr_ama_Client:
46685 + return "Unknown I_am state";
46691 +gio_license_states (int x)
46698 + return "expired";
46701 + return "invalid";
46704 + return "unknown";
46710 +gio_opcodes (int x)
46713 +#define CP(x) case (x): return #x ; break
46714 + CP (gulm_err_reply);
46716 + CP (gulm_core_login_req);
46717 + CP (gulm_core_login_rpl);
46718 + CP (gulm_core_logout_req);
46719 + CP (gulm_core_logout_rpl);
46720 + CP (gulm_core_reslgn_req);
46721 + CP (gulm_core_beat_req);
46722 + CP (gulm_core_beat_rpl);
46723 + CP (gulm_core_mbr_req);
46724 + CP (gulm_core_mbr_updt);
46725 + CP (gulm_core_mbr_lstreq);
46726 + CP (gulm_core_mbr_lstrpl);
46727 + CP (gulm_core_mbr_force);
46728 + CP (gulm_core_res_req);
46729 + CP (gulm_core_res_list);
46730 + CP (gulm_core_state_req);
46731 + CP (gulm_core_state_chgs);
46732 + CP (gulm_core_shutdown);
46733 + CP (gulm_core_forcepend);
46735 + CP (gulm_info_stats_req);
46736 + CP (gulm_info_stats_rpl);
46737 + CP (gulm_info_set_verbosity);
46738 + CP (gulm_socket_close);
46739 + CP (gulm_info_slave_list_req);
46740 + CP (gulm_info_slave_list_rpl);
46742 + CP (gulm_lock_login_req);
46743 + CP (gulm_lock_login_rpl);
46744 + CP (gulm_lock_logout_req);
46745 + CP (gulm_lock_logout_rpl);
46746 + CP (gulm_lock_state_req);
46747 + CP (gulm_lock_state_rpl);
46748 + CP (gulm_lock_state_updt);
46749 + CP (gulm_lock_action_req);
46750 + CP (gulm_lock_action_rpl);
46751 + CP (gulm_lock_action_updt);
46752 + CP (gulm_lock_update_rpl);
46753 + CP (gulm_lock_cb_state);
46754 + CP (gulm_lock_cb_dropall);
46755 + CP (gulm_lock_drop_exp);
46756 + CP (gulm_lock_dump_req);
46757 + CP (gulm_lock_dump_rpl);
46758 + CP (gulm_lock_rerunqueues);
46762 + return "Unknown Op Code";
46766 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h
46767 --- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h 1969-12-31 18:00:00.000000000 -0600
46768 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h 2004-06-16 12:03:21.959894533 -0500
46770 +/******************************************************************************
46771 +*******************************************************************************
46773 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46774 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46776 +** This copyrighted material is made available to anyone wishing to use,
46777 +** modify, copy, or redistribute it subject to the terms and conditions
46778 +** of the GNU General Public License v.2.
46780 +*******************************************************************************
46781 +******************************************************************************/
46783 +#ifndef __utils_tostr_h__
46784 +#define __utils_tostr_h__
46785 +char *gio_Err_to_str (int x);
46786 +char *gio_mbrupdate_to_str (int x);
46787 +char *gio_mbrama_to_str (int x);
46788 +char *gio_I_am_to_str (int x);
46789 +char *gio_license_states (int x);
46790 +char *gio_opcodes (int x);
46791 +#endif /*__utils_tostr_h__*/
46792 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c
46793 --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.c 1969-12-31 18:00:00.000000000 -0600
46794 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.c 2004-06-16 12:03:21.959894533 -0500
46796 +/******************************************************************************
46797 +*******************************************************************************
46799 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
46800 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
46802 +** This copyrighted material is made available to anyone wishing to use,
46803 +** modify, copy, or redistribute it subject to the terms and conditions
46804 +** of the GNU General Public License v.2.
46806 +*******************************************************************************
46807 +******************************************************************************/
46810 +#include <linux/kernel.h>
46811 +#include <linux/sched.h>
46812 +#define __KERNEL_SYSCALLS__
46813 +#include <linux/unistd.h>
46814 +#endif /*__linux__*/
46816 +#include "gulm_log_msg_bits.h"
46818 +static __inline__ int
46819 +strncasecmp (const char *s1, const char *s2, size_t l)
46821 + char c1 = '\0', c2 = '\0';
46823 + while (*s1 && *s2 && l-- > 0) {
46827 + if (c1 >= 'A' && c1 <= 'Z')
46830 + if (c2 >= 'A' && c2 <= 'Z')
46836 + return (c1 - c2);
46839 +static int bit_array[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
46841 +#define BITCOUNT(x) (bit_array[x & 0x000F] + \
46842 + bit_array[(x >> 4) & 0x000F] + \
46843 + bit_array[(x >> 8) & 0x000F] + \
46844 + bit_array[(x >> 12) & 0x000F] + \
46845 + bit_array[(x >> 16) & 0x000F] + \
46846 + bit_array[(x >> 20) & 0x000F] + \
46847 + bit_array[(x >> 24) & 0x000F] + \
46848 + bit_array[(x >> 28) & 0x000F])
46853 +} verbose_flags[] = {
46855 + "Network", lgm_Network,}, {
46856 + "Network2", lgm_Network2,}, {
46857 + "Network3", lgm_Network3,}, {
46858 + "Fencing", lgm_Stomith,}, {
46859 + "Heartbeat", lgm_Heartbeat,}, {
46860 + "Locking", lgm_locking,}, {
46861 + "Forking", lgm_Forking,}, {
46862 + "JIDMap", lgm_JIDMap,}, {
46863 + "JIDUpdates", lgm_JIDUpdates,}, {
46864 + "Subscribers", lgm_Subscribers,}, {
46865 + "LockUpdates", lgm_LockUpdates,}, {
46866 + "LoginLoops", lgm_LoginLoops,}, {
46867 + "ServerState", lgm_ServerState,}, {
46868 + "Default", lgm_Network | lgm_Stomith | lgm_Forking,},
46869 +/* Since I really don't want people really doing *all* flags with all,
46870 + * there is AlmostAll, which users really get, and ReallyAll, which is all
46872 + * This is mostly due to Network3, which dumps messages on nearly
46873 + * every packet. (should actually be every packet.)
46874 + * Also drop the slave updates, since that is on every packet as well.
46879 + ~(lgm_Network3 | lgm_JIDUpdates |
46880 + lgm_LockUpdates)),}, {
46882 + lgm_ReallyAll & ~(lgm_Network3 | lgm_JIDUpdates |
46883 + lgm_LockUpdates),}, {
46884 + "ReallyAll", lgm_ReallyAll,}
46888 +add_string (char *name, size_t * cur, char *str, size_t slen)
46892 + nl = strlen (name);
46893 + if (*cur + nl > slen) {
46894 + memcpy (str + *cur, "...", 3);
46896 + str[*cur] = '\0';
46899 + memcpy (str + *cur, name, nl);
46908 + * get_verbosity_string -
46916 +get_verbosity_string (char *str, size_t slen, uint32_t verb)
46918 + int i, vlen = sizeof (verbose_flags) / sizeof (verbose_flags[0]);
46920 + int combo_match = -1, error = 0;
46922 + memset (str, 0, slen);
46923 + slen -= 4; /* leave room for dots and null */
46926 + error = add_string ("Quiet", &cur, str, slen);
46930 + /* Combo verb flag phase */
46931 + for (i = 0; i < vlen; i++) {
46932 + if (BITCOUNT (verbose_flags[i].val) > 1) {
46933 + /* check to see if this flag matches exclusively */
46934 + if ((verbose_flags[i].val ^ verb) == 0) {
46936 + add_string (verbose_flags[i].name, &cur,
46941 + if ((verbose_flags[i].val & verb) ==
46942 + verbose_flags[i].val) {
46943 + if (combo_match < 0) {
46946 + /* Compare this combo with the one in combo_match */
46947 + if (BITCOUNT (verbose_flags[i].val) >
46948 + BITCOUNT (verbose_flags
46949 + [combo_match].val)) {
46957 + /* Add the best combo to the string */
46958 + if (combo_match > -1) {
46960 + (verbose_flags[combo_match].name, &cur, str, slen) == -1) {
46966 + /* Single verb flag phase */
46967 + for (i = 0; i < vlen; i++) {
46968 + if (BITCOUNT (verbose_flags[i].val) == 1) {
46969 + if (combo_match > -1) {
46970 + if ((verbose_flags[combo_match].
46971 + val & verbose_flags[i].val) ==
46972 + verbose_flags[i].val) {
46977 + if ((verbose_flags[i].val & verb) ==
46978 + verbose_flags[i].val) {
46980 + (verbose_flags[i].name, &cur, str,
46989 + /* Clear trailing ',' */
46990 + if (str[cur - 1] == ',') {
46991 + str[cur - 1] = '\0';
46997 + * set_verbosity -
47001 + * toggle bits according to the `rules' in the str.
47002 + * str is a list of verb flags. can be prefexed with '+' or '-'
47003 + * No prefix is the same as '+' prefix
47005 + * '-' unsets bits.
47006 + * special 'clear' unsets all.
47009 +set_verbosity (char *str, uint32_t * verb)
47011 + char *token, *next;
47012 + int i, wl, tl, len = sizeof (verbose_flags) / sizeof (verbose_flags[0]);
47017 + wl = strlen (str);
47020 + for (token = str, tl = 0; tl < wl &&
47021 + token[tl] != ',' &&
47022 + token[tl] != ' ' && token[tl] != '|' && token[tl] != '\0'; tl++) ;
47023 + next = token + tl + 1;
47026 + if (token[0] == '-') {
47028 + for (i = 0; i < len; i++) {
47030 + (token, verbose_flags[i].name, tl) == 0) {
47031 + (*verb) &= ~(verbose_flags[i].val);
47034 + } else if (token[0] == '+') {
47036 + for (i = 0; i < len; i++) {
47038 + (token, verbose_flags[i].name, tl) == 0) {
47039 + (*verb) |= verbose_flags[i].val;
47043 + if (strncasecmp (token, "clear", tl) == 0) {
47046 + for (i = 0; i < len; i++) {
47048 + (token, verbose_flags[i].name,
47050 + (*verb) |= verbose_flags[i].val;
47056 + if (next >= str + wl)
47058 + for (token = next, tl = 0;
47060 + token[tl] != ',' &&
47061 + token[tl] != ' ' &&
47062 + token[tl] != '|' && token[tl] != '\0'; tl++) ;
47063 + next = token + tl + 1;
47067 diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h
47068 --- linux-orig/fs/gfs_locking/lock_gulm/utils_verb_flags.h 1969-12-31 18:00:00.000000000 -0600
47069 +++ linux-patched/fs/gfs_locking/lock_gulm/utils_verb_flags.h 2004-06-16 12:03:21.959894533 -0500
47071 +/******************************************************************************
47072 +*******************************************************************************
47074 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
47075 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
47077 +** This copyrighted material is made available to anyone wishing to use,
47078 +** modify, copy, or redistribute it subject to the terms and conditions
47079 +** of the GNU General Public License v.2.
47081 +*******************************************************************************
47082 +******************************************************************************/
47084 +#ifndef __utils_verb_flags_h__
47085 +#define __utils_verb_flags_h__
47086 +int get_verbosity_string (char *str, size_t slen, uint32_t verb);
47087 +void set_verbosity (char *str, uint32_t * verb);
47088 +#endif /*__utils_verb_flags_h__*/
47089 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr.h linux-patched/fs/gfs_locking/lock_gulm/xdr.h
47090 --- linux-orig/fs/gfs_locking/lock_gulm/xdr.h 1969-12-31 18:00:00.000000000 -0600
47091 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr.h 2004-06-16 12:03:21.959894533 -0500
47093 +/******************************************************************************
47094 +*******************************************************************************
47096 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
47097 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
47099 +** This copyrighted material is made available to anyone wishing to use,
47100 +** modify, copy, or redistribute it subject to the terms and conditions
47101 +** of the GNU General Public License v.2.
47103 +*******************************************************************************
47104 +******************************************************************************/
47106 +#ifndef __gulm_xdr_h__
47107 +#define __gulm_xdr_h__
47108 +typedef struct xdr_enc_s xdr_enc_t;
47109 +typedef struct xdr_dec_s xdr_dec_t;
47111 +/* sockets in kernel space are done a bit different than socket in
47112 + * userspace. But we need to have them appear to be the same.
47117 +#include <linux/net.h>
47118 +#include <linux/in.h>
47119 +#include <linux/in6.h>
47120 +#include <linux/socket.h>
47121 +#include <net/sock.h>
47123 +typedef struct socket *xdr_socket;
47124 +#endif /*__linux__*/
47125 +#else /*__KERNEL__*/
47126 +#include <sys/types.h>
47127 +#include <sys/uio.h>
47128 +#include <sys/socket.h>
47129 +#include <netinet/in.h>
47130 +#include <netinet/tcp.h>
47131 +#include <unistd.h>
47132 +#include <errno.h>
47133 +typedef int xdr_socket;
47134 +#endif /*__KERNEL__*/
47136 +/* start things up */
47137 +int xdr_open (xdr_socket * sk);
47138 +int xdr_connect (struct sockaddr_in6 *adr, xdr_socket sk);
47139 +void xdr_close (xdr_socket * sk);
47141 +/* deep, basic io */
47144 +size_t xdr_send (struct socket *sock, void *buf, size_t size);
47145 +size_t xdr_recv (struct socket *sock, void *buf, size_t size);
47146 +#endif /*__linux__*/
47147 +#else /*__KERNEL__*/
47148 +ssize_t xdr_recv (int fd, void *buf, size_t len);
47149 +ssize_t xdr_send (int fd, void *buf, size_t len);
47150 +#endif /*__KERNEL__*/
47152 +xdr_enc_t *xdr_enc_init (xdr_socket sk, int buffer_size);
47153 +xdr_dec_t *xdr_dec_init (xdr_socket sk, int buffer_size);
47154 +int xdr_enc_flush (xdr_enc_t * xdr);
47155 +int xdr_enc_release (xdr_enc_t * xdr); /* calls xdr_enc_flush() */
47156 +void xdr_enc_force_release (xdr_enc_t * xdr); /* doesn't call xdr_enc_flush() */
47157 +void xdr_dec_release (xdr_dec_t * xdr);
47158 +/* xdr_enc_force_release() is for when you get and error sending and you
47159 + * want to free that stuff up right away. If you use the regular release
47160 + * for enc, it will fail if it cannot send data over the filedesciptor.
47163 +/* encoders add to a stream */
47164 +int __inline__ xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i);
47165 +int __inline__ xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i);
47166 +int __inline__ xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i);
47167 +int __inline__ xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i);
47168 +int __inline__ xdr_enc_ipv6 (xdr_enc_t * enc, struct in6_addr *ip);
47169 +int xdr_enc_raw (xdr_enc_t * xdr, void *pointer, uint16_t len);
47170 +int xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov);
47171 +int xdr_enc_string (xdr_enc_t * xdr, uint8_t * s);
47172 +int xdr_enc_list_start (xdr_enc_t * xdr);
47173 +int xdr_enc_list_stop (xdr_enc_t * xdr);
47175 +/* decoders remove from stream */
47176 +int xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i);
47177 +int xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i);
47178 +int xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i);
47179 +int xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i);
47180 +int xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip);
47181 +int xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l); /* no malloc */
47182 +int xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l); /* mallocs p */
47183 +int xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl);
47184 +int xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp); /* mallocs s */
47185 +int xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * strp, size_t l); /* no malloc */
47186 +int xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl);
47187 +int xdr_dec_list_start (xdr_dec_t * xdr);
47188 +int xdr_dec_list_stop (xdr_dec_t * xdr);
47190 +#endif /*__gulm_xdr_h__*/
47191 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c
47192 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c 1969-12-31 18:00:00.000000000 -0600
47193 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c 2004-06-16 12:03:21.959894533 -0500
47195 +/******************************************************************************
47196 +*******************************************************************************
47198 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
47199 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
47201 +** This copyrighted material is made available to anyone wishing to use,
47202 +** modify, copy, or redistribute it subject to the terms and conditions
47203 +** of the GNU General Public License v.2.
47205 +*******************************************************************************
47206 +******************************************************************************/
47209 + * This is a bit of an abstraction layer to get this working in both kernel
47214 +#define MIN(a,b) ((a<b)?a:b)
47217 +#include <linux/kernel.h>
47218 +#include <linux/sched.h>
47219 +#include <linux/slab.h>
47220 +#include <linux/vmalloc.h>
47221 +#define __KERNEL_SYSCALLS__
47222 +#include <linux/unistd.h>
47223 +#endif /*__linux__*/
47228 + * xdr_realloc - a realloc for kernel space.
47229 + * @a: < pointer to realloc
47230 + * @nl: < desired new size
47231 + * @ol: < current old size
47233 + * Not as good as the real realloc, since it always moves memory. But good
47234 + * enough for as little as it will get used here.
47236 + * XXX this is broken.
47241 +xdr_realloc (void *a, size_t nl, size_t ol)
47245 + } else if (nl == 0) {
47248 + } else if (a == NULL && nl > 0) {
47249 + return kmalloc (nl, GFP_KERNEL);
47252 + tmp = kmalloc (nl, GFP_KERNEL);
47255 + memcpy (tmp, a, MIN (nl, ol));
47261 +typedef enum { xdr_enc, xdr_dec } xdr_type;
47263 +/* encoders have this sorta non-blocking, growing buffering stunt.
47264 + * makes them a bit different from the decoders now.
47266 +struct xdr_enc_s {
47267 + size_t default_buf_size;
47275 +/* decoders only pull a single item off of the socket at a time.
47276 + * so this is all they need.
47278 +struct xdr_dec_s {
47279 + size_t length; /* total byte length of the stream */
47280 + size_t curloc; /* current byte offset from start */
47281 + uint8_t *stream; /* start of the encoded stream. */
47286 +/* the types of data we support. */
47288 +#define XDR_NULL 0x00 /* NOT A VALID TAG!!! used in dec code. */
47289 +#define XDR_LIST_START 0x01
47290 +#define XDR_LIST_STOP 0x02
47291 +/* list is a variable length device. It is a start tag, some number of
47292 + * xdr_enc_*, then an stop tag. It's main purpose is to provide a method
47293 + * of encasing data.
47295 +#define XDR_STRING 0x04
47296 +/* string tag is followed by a uint16 which is the byte length */
47297 +#define XDR_RAW 0x05
47298 +/* raw tag is followed by a uint16 which is the byte length
47299 + * if 65535 bytes isn't enough, split your data and put multiples of these
47300 + * back to back. (idea of xdr is to avoid this twit.)
47303 +/* note, if the size of these should variate, I'm screwed. Should consider
47304 + * changing this all to the bit shift and array access to be more concrete.
47307 +#define XDR_UINT64 0x06
47308 +#define XDR_UINT32 0x07
47309 +#define XDR_UINT16 0x08
47310 +#define XDR_UINT8 0x09
47311 +/* should add signed ints */
47313 +#define XDR_IPv6 0x0a /* 16 bytes, IPv6 address */
47315 +/* any other base types?
47318 +#define XDR_DEFAULT_BUFFER_SIZE 4096
47319 +/*****************************************************************************/
47327 + * Returns: xdr_enc_t*
47330 +xdr_enc_init (xdr_socket fd, int buffer_size)
47334 + if (buffer_size <= 0)
47335 + buffer_size = XDR_DEFAULT_BUFFER_SIZE;
47337 + xdr = kmalloc (sizeof (xdr_enc_t), GFP_KERNEL);
47340 + xdr->stream = kmalloc (buffer_size, GFP_KERNEL);
47341 + if (xdr->stream == NULL) {
47346 + xdr->type = xdr_enc;
47347 + xdr->default_buf_size = buffer_size;
47348 + xdr->length = buffer_size;
47360 + * Returns: xdr_dec_t*
47363 +xdr_dec_init (xdr_socket fd, int buffer_size)
47367 + if (buffer_size <= 0)
47368 + buffer_size = XDR_DEFAULT_BUFFER_SIZE;
47370 + xdr = kmalloc (sizeof (xdr_dec_t), GFP_KERNEL);
47373 + xdr->length = buffer_size;
47375 + xdr->stream = kmalloc (buffer_size, GFP_KERNEL);
47377 + xdr->type = xdr_dec;
47378 + if (xdr->stream == NULL) {
47382 + *(xdr->stream) = XDR_NULL; /* so the first dec_call will call get_next */
47386 +/*****************************************************************************/
47388 + * xdr_enc_flush -
47394 +xdr_enc_flush (xdr_enc_t * xdr)
47399 + if (xdr->type != xdr_enc)
47401 + if (xdr->curloc == 0)
47404 + err = xdr_send (xdr->fd, xdr->stream, xdr->curloc);
47408 + return -EPROTO; /* why? */
47418 + * Free the memory, losing whatever may be there.
47421 +xdr_dec_release (xdr_dec_t * xdr)
47425 + kfree (xdr->stream);
47430 + * xdr_enc_force_release -
47433 + * Free the memory, losing whatever may be there.
47436 +xdr_enc_force_release (xdr_enc_t * xdr)
47440 + if (xdr->stream != NULL)
47441 + kfree (xdr->stream);
47446 + * xdr_enc_release -
47449 + * Free things up, trying to send any possible leftover data first.
47454 +xdr_enc_release (xdr_enc_t * xdr)
47459 + if ((e = xdr_enc_flush (xdr)) != 0)
47461 + xdr_enc_force_release (xdr);
47465 +/*****************************************************************************/
47471 + * each single encoded call needs to fit within a buffer. So we make sure
47472 + * the buffer is big enough.
47474 + * If the buffer is big enough, but just doesn't have room, we send the
47475 + * data in the buffer, emptying it, first.
47480 +grow_stream (xdr_enc_t * enc, size_t len)
47485 + /* buffer must be big enough for one type entry. */
47486 + if (len > enc->length) {
47487 + c = xdr_realloc (enc->stream, len, enc->length);
47491 + enc->length = len;
47494 + /* if there isn't room on the end of this chunk,
47495 + * try sending what we've got.
47497 + if (enc->curloc + len > enc->length) {
47498 + err = xdr_enc_flush (enc);
47500 + /* error, better pass this up. */
47519 +append_bytes (xdr_enc_t * xdr, uint8_t xdr_type, void *bytes, size_t len)
47524 + if (xdr->type != xdr_enc)
47527 + /* len + 1; need the one byte for the type code. */
47528 + if ((e = grow_stream (xdr, len + 1)) != 0)
47530 + *(xdr->stream + xdr->curloc) = xdr_type;
47531 + xdr->curloc += 1;
47532 + memcpy ((xdr->stream + xdr->curloc), bytes, len);
47533 + xdr->curloc += len;
47539 +xdr_enc_uint64 (xdr_enc_t * xdr, uint64_t i)
47541 + uint64_t b = cpu_to_be64 (i);
47542 + return append_bytes (xdr, XDR_UINT64, &b, sizeof (uint64_t));
47546 +xdr_enc_uint32 (xdr_enc_t * xdr, uint32_t i)
47548 + uint32_t b = cpu_to_be32 (i);
47549 + return append_bytes (xdr, XDR_UINT32, &b, sizeof (uint32_t));
47553 +xdr_enc_uint16 (xdr_enc_t * xdr, uint16_t i)
47555 + uint16_t b = cpu_to_be16 (i);
47556 + return append_bytes (xdr, XDR_UINT16, &b, sizeof (uint16_t));
47560 +xdr_enc_uint8 (xdr_enc_t * xdr, uint8_t i)
47562 + return append_bytes (xdr, XDR_UINT8, &i, sizeof (uint8_t));
47566 +xdr_enc_ipv6 (xdr_enc_t * xdr, struct in6_addr *ip)
47567 +{ /* bytes should already be in the right order. */
47568 + return append_bytes (xdr, XDR_IPv6, ip->s6_addr, 16);
47572 +xdr_enc_raw (xdr_enc_t * xdr, void *p, uint16_t len)
47577 + if ((e = grow_stream (xdr, len + 3)) != 0)
47579 + *(xdr->stream + xdr->curloc) = XDR_RAW;
47580 + xdr->curloc += 1;
47581 + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47582 + cpu_to_be16 (len);
47583 + xdr->curloc += 2;
47584 + memcpy ((xdr->stream + xdr->curloc), p, len);
47585 + xdr->curloc += len;
47590 +xdr_enc_raw_iov (xdr_enc_t * xdr, int count, struct iovec *iov)
47592 + size_t total = 0;
47594 + if (xdr == NULL || count < 1 || iov == NULL)
47596 + for (i = 0; i < count; i++)
47597 + total += iov[i].iov_len;
47598 + /* make sure it fits in a uint16_t */
47599 + if (total > 0xffff)
47601 + /* grow to fit */
47602 + if ((err = grow_stream (xdr, total + 3)) != 0)
47604 + /* copy in header and size */
47605 + *(xdr->stream + xdr->curloc) = XDR_RAW;
47606 + xdr->curloc += 1;
47607 + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47608 + cpu_to_be16 (total);
47609 + xdr->curloc += 2;
47610 + /* copy in all iovbufs */
47611 + for (i = 0; i < count; i++) {
47612 + if (iov[i].iov_base == NULL)
47614 + memcpy ((xdr->stream + xdr->curloc), iov[i].iov_base,
47616 + xdr->curloc += iov[i].iov_len;
47622 +xdr_enc_string (xdr_enc_t * xdr, uint8_t * s)
47630 + len = strlen (s);
47631 + if ((e = grow_stream (xdr, len + 3)) != 0)
47633 + *(xdr->stream + xdr->curloc) = XDR_STRING;
47634 + xdr->curloc += 1;
47635 + (uint16_t) * ((uint16_t *) (xdr->stream + xdr->curloc)) =
47636 + cpu_to_be16 (len);
47637 + xdr->curloc += 2;
47639 + memcpy ((xdr->stream + xdr->curloc), s, len);
47640 + xdr->curloc += len;
47646 +xdr_enc_list_start (xdr_enc_t * xdr)
47651 + if ((e = grow_stream (xdr, 1)) != 0)
47653 + *(xdr->stream + xdr->curloc) = XDR_LIST_START;
47654 + xdr->curloc += 1;
47659 +xdr_enc_list_stop (xdr_enc_t * xdr)
47664 + if ((e = grow_stream (xdr, 1)) != 0)
47666 + *(xdr->stream + xdr->curloc) = XDR_LIST_STOP;
47667 + xdr->curloc += 1;
47671 +/*****************************************************************************/
47677 + * get what ever may be next, and put it into the buffer.
47682 +get_next (xdr_dec_t * xdr)
47686 + if ((err = xdr_recv (xdr->fd, xdr->stream, 1)) < 0)
47691 + if (*(xdr->stream) == XDR_UINT64) {
47692 + len = sizeof (uint64_t);
47693 + } else if (*(xdr->stream) == XDR_UINT32) {
47694 + len = sizeof (uint32_t);
47695 + } else if (*(xdr->stream) == XDR_UINT16) {
47696 + len = sizeof (uint16_t);
47697 + } else if (*(xdr->stream) == XDR_UINT8) {
47698 + len = sizeof (uint8_t);
47699 + } else if (*(xdr->stream) == XDR_IPv6) {
47701 + } else if (*(xdr->stream) == XDR_STRING) {
47702 + if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0)
47706 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47707 + xdr->curloc += 2;
47708 + } else if (*(xdr->stream) == XDR_RAW) {
47709 + if ((err = xdr_recv (xdr->fd, (xdr->stream + 1), 2)) < 0)
47713 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47714 + xdr->curloc += 2;
47715 + } else if (*(xdr->stream) == XDR_LIST_START) {
47718 + } else if (*(xdr->stream) == XDR_LIST_STOP) {
47725 + /* grow buffer if need be. */
47726 + if (xdr->curloc + len > xdr->length) {
47728 + c = xdr_realloc (xdr->stream, xdr->curloc + len, xdr->length);
47732 + xdr->length = xdr->curloc + len;
47737 + xdr_recv (xdr->fd, (xdr->stream + xdr->curloc), len)) < 0)
47747 +xdr_dec_uint64 (xdr_dec_t * xdr, uint64_t * i)
47750 + if (xdr == NULL || i == NULL)
47752 + if (*(xdr->stream) == XDR_NULL) {
47753 + if ((err = get_next (xdr)) != 0)
47756 + if (*(xdr->stream) != XDR_UINT64)
47758 + *i = be64_to_cpu (*((uint64_t *) (xdr->stream + 1)));
47759 + /* read the item out, mark that */
47760 + *(xdr->stream) = XDR_NULL;
47765 +xdr_dec_uint32 (xdr_dec_t * xdr, uint32_t * i)
47768 + if (xdr == NULL || i == NULL)
47770 + if (*(xdr->stream) == XDR_NULL) {
47771 + if ((err = get_next (xdr)) != 0)
47774 + if (*(xdr->stream) != XDR_UINT32)
47776 + *i = be32_to_cpu (*((uint32_t *) (xdr->stream + 1)));
47777 + /* read the item out, mark that */
47778 + *(xdr->stream) = XDR_NULL;
47783 +xdr_dec_uint16 (xdr_dec_t * xdr, uint16_t * i)
47786 + if (xdr == NULL || i == NULL)
47788 + if (*(xdr->stream) == XDR_NULL) {
47789 + if ((err = get_next (xdr)) != 0)
47792 + if (*(xdr->stream) != XDR_UINT16)
47794 + *i = be16_to_cpu (*((uint16_t *) (xdr->stream + 1)));
47795 + /* read the item out, mark that */
47796 + *(xdr->stream) = XDR_NULL;
47801 +xdr_dec_uint8 (xdr_dec_t * xdr, uint8_t * i)
47804 + if (xdr == NULL || i == NULL)
47807 + if (*(xdr->stream) == XDR_NULL) {
47808 + if ((err = get_next (xdr)) != 0)
47811 + if (*(xdr->stream) != XDR_UINT8)
47813 + *i = *((uint8_t *) (xdr->stream + 1));
47814 + /* read the item out, mark that */
47815 + *(xdr->stream) = XDR_NULL;
47820 +xdr_dec_ipv6 (xdr_dec_t * xdr, struct in6_addr *ip)
47823 + if (xdr == NULL || ip == NULL)
47825 + if (*(xdr->stream) == XDR_NULL) {
47826 + if ((err = get_next (xdr)) != 0)
47829 + if (*(xdr->stream) != XDR_IPv6)
47831 + memcpy (ip, xdr->stream + 1, 16);
47832 + /* read the item out, mark that */
47833 + *(xdr->stream) = XDR_NULL;
47837 +/* mallocing version */
47839 +xdr_dec_raw_m (xdr_dec_t * xdr, void **p, uint16_t * l)
47845 + if (xdr == NULL || p == NULL || l == NULL)
47847 + if (*(xdr->stream) == XDR_NULL) {
47848 + if ((err = get_next (xdr)) != 0)
47851 + if (*(xdr->stream) != XDR_RAW)
47855 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47856 + xdr->curloc += 2;
47858 + str = kmalloc (len, GFP_KERNEL);
47861 + memcpy (str, (xdr->stream + xdr->curloc), len);
47862 + xdr->curloc += len;
47866 + /* read the item out, mark that */
47867 + *(xdr->stream) = XDR_NULL;
47871 +/* non-mallocing version */
47873 +xdr_dec_raw (xdr_dec_t * xdr, void *p, uint16_t * l)
47878 + if (xdr == NULL || p == NULL || l == NULL)
47880 + if (*(xdr->stream) == XDR_NULL) {
47881 + if ((err = get_next (xdr)) != 0)
47884 + if (*(xdr->stream) != XDR_RAW)
47888 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47889 + xdr->curloc += 2;
47894 + memcpy (p, (xdr->stream + xdr->curloc), len);
47895 + xdr->curloc += len;
47899 + /* read the item out, mark that */
47900 + *(xdr->stream) = XDR_NULL;
47905 + * xdr_dec_raw_ag - auto-growing version
47907 + * @p: <> pointer to buffer
47908 + * @bl: <> size of the buffer
47909 + * @rl: > size of data read from stream
47911 + * This form of xdr_dec_raw will increase the size of a pre-malloced buffer
47912 + * to fit the data it is reading. It is kind of a merger of the
47913 + * non-mallocing and mallocing versions.
47918 +xdr_dec_raw_ag (xdr_dec_t * xdr, void **p, uint16_t * bl, uint16_t * rl)
47923 + if (xdr == NULL || p == NULL || bl == NULL || rl == NULL)
47925 + if (*(xdr->stream) == XDR_NULL) {
47926 + if ((err = get_next (xdr)) != 0)
47929 + if (*(xdr->stream) != XDR_RAW)
47933 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47934 + xdr->curloc += 2;
47936 + if (len > *bl) { /* grow p */
47938 + temp = xdr_realloc (*p, len, *bl);
47939 + if (temp == NULL)
47945 + memcpy (*p, (xdr->stream + xdr->curloc), len);
47946 + xdr->curloc += len;
47950 + *(xdr->stream) = XDR_NULL;
47954 +/* mallocing version */
47956 +xdr_dec_string (xdr_dec_t * xdr, uint8_t ** strp)
47961 + if (xdr == NULL || strp == NULL)
47963 + if (*(xdr->stream) == XDR_NULL) {
47964 + if ((err = get_next (xdr)) != 0)
47967 + if (*(xdr->stream) != XDR_STRING)
47971 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
47972 + xdr->curloc += 2;
47975 + str = kmalloc (len + 1, GFP_KERNEL);
47979 + memcpy (str, (xdr->stream + xdr->curloc), len);
47980 + xdr->curloc += len;
47987 + /* read the item out, mark that */
47988 + *(xdr->stream) = XDR_NULL;
47992 +/* non-mallocing version */
47994 +xdr_dec_string_nm (xdr_dec_t * xdr, uint8_t * string, size_t l)
47998 + if (xdr == NULL || string == NULL)
48000 + if (*(xdr->stream) == XDR_NULL) {
48001 + if ((err = get_next (xdr)) != 0)
48004 + if (*(xdr->stream) != XDR_STRING)
48008 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
48009 + xdr->curloc += 2;
48012 + memcpy (string, (xdr->stream + xdr->curloc), MIN (len, l));
48014 + string[len] = '\0';
48016 + string[l - 1] = '\0';
48018 + string[0] = '\0';
48021 + /* read the item out, mark that */
48022 + *(xdr->stream) = XDR_NULL;
48027 +xdr_dec_string_ag (xdr_dec_t * xdr, uint8_t ** s, uint16_t * bl)
48031 + if (xdr == NULL || s == NULL || bl == NULL)
48033 + if (*(xdr->stream) == XDR_NULL) {
48034 + if ((err = get_next (xdr)) != 0)
48037 + if (*(xdr->stream) != XDR_STRING)
48041 + len = be16_to_cpu (*((uint16_t *) (xdr->stream + xdr->curloc)));
48042 + xdr->curloc += 2;
48044 + if (len == 0) { /* empty string */
48046 + *(xdr->stream) = XDR_NULL;
48050 + if (len >= *bl) { /* grow s */
48052 + temp = xdr_realloc (*s, len + 1, *bl);
48053 + if (temp == NULL)
48059 + memcpy (*s, (xdr->stream + xdr->curloc), len);
48060 + (*s)[len] = '\0';
48062 + *(xdr->stream) = XDR_NULL;
48067 +xdr_dec_list_start (xdr_dec_t * xdr)
48072 + if (*(xdr->stream) == XDR_NULL) {
48073 + if ((err = get_next (xdr)) != 0)
48076 + if (*(xdr->stream) != XDR_LIST_START)
48078 + /* read the item out, mark that */
48079 + *(xdr->stream) = XDR_NULL;
48084 +xdr_dec_list_stop (xdr_dec_t * xdr)
48089 + if (*(xdr->stream) == XDR_NULL) {
48090 + if ((err = get_next (xdr)) != 0)
48093 + if (*(xdr->stream) != XDR_LIST_STOP)
48095 + /* read the item out, mark that */
48096 + *(xdr->stream) = XDR_NULL;
48099 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c
48100 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c 1969-12-31 18:00:00.000000000 -0600
48101 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c 2004-06-16 12:03:21.959894533 -0500
48103 +/******************************************************************************
48104 +*******************************************************************************
48106 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
48107 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
48109 +** This copyrighted material is made available to anyone wishing to use,
48110 +** modify, copy, or redistribute it subject to the terms and conditions
48111 +** of the GNU General Public License v.2.
48113 +*******************************************************************************
48114 +******************************************************************************/
48117 + * does the lowest level of reads and writes.
48118 + * In kernel and/or userspace.
48125 +#include <linux/net.h>
48126 +#include <linux/in.h>
48127 +#include <linux/socket.h>
48128 +#include <net/sock.h>
48129 +#include "asm/uaccess.h"
48132 + * do_tfer - transfers data over a socket
48133 + * @sock: < socket
48134 + * @iov: <> iovec of buffers
48135 + * @n: < how many iovecs
48136 + * @size: < total data size to send/recv
48137 + * @dir: < send or recv
48138 + * @timeout: < how many sec to wait. 0 == forever.
48140 + * Returns: <0: Error
48141 + * >=0: Bytes transfered
48144 +do_tfer (struct socket *sock, struct iovec *iov, int n, int size, int dir)
48146 + unsigned long flags;
48150 + int rv, moved = 0;
48153 + set_fs (get_ds ());
48155 + /* XXX do I still want the signal stuff? */
48156 + spin_lock_irqsave (¤t->sighand->siglock, flags);
48157 + oldset = current->blocked;
48158 + siginitsetinv (¤t->blocked,
48159 + sigmask (SIGKILL) | sigmask (SIGTERM));
48160 + recalc_sigpending ();
48161 + spin_unlock_irqrestore (¤t->sighand->siglock, flags);
48163 + memset (&m, 0, sizeof (struct msghdr));
48166 + m.msg_iovlen = n;
48167 + m.msg_flags = MSG_NOSIGNAL;
48170 + rv = sock_sendmsg (sock, &m, size - moved);
48172 + rv = sock_recvmsg (sock, &m, size - moved, 0);
48178 + if (moved >= size)
48181 + /* adjust iov's for next transfer */
48182 + while (iov->iov_len == 0) {
48190 + spin_lock_irqsave (¤t->sighand->siglock, flags);
48191 + current->blocked = oldset;
48192 + recalc_sigpending ();
48193 + spin_unlock_irqrestore (¤t->sighand->siglock, flags);
48201 +xdr_send (struct socket * sock, void *buf, size_t size)
48203 + struct iovec iov;
48206 + iov.iov_base = buf;
48207 + iov.iov_len = size;
48209 + res = do_tfer (sock, &iov, 1, size, 1);
48215 +xdr_recv (struct socket * sock, void *buf, size_t size)
48217 + struct iovec iov;
48220 + iov.iov_base = buf;
48221 + iov.iov_len = size;
48223 + res = do_tfer (sock, &iov, 1, size, 0);
48228 +#endif /*__linux__*/
48229 +#else /*__KERNEL__*/
48231 +#include <errno.h>
48232 +#include <sys/types.h>
48233 +#include <sys/socket.h>
48236 +xdr_recv (int fd, void *buf, size_t len)
48240 + while (len > 0) {
48241 + cnt = recv (fd, buf, len, 0);
48254 +xdr_send (int fd, void *buf, size_t len)
48258 + while (len > 0) {
48259 + cnt = send (fd, buf, len, 0);
48271 +#endif /*__KERNEL__*/
48272 diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c
48273 --- linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c 1969-12-31 18:00:00.000000000 -0600
48274 +++ linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c 2004-06-16 12:03:21.959894533 -0500
48276 +/******************************************************************************
48277 +*******************************************************************************
48279 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
48280 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
48282 +** This copyrighted material is made available to anyone wishing to use,
48283 +** modify, copy, or redistribute it subject to the terms and conditions
48284 +** of the GNU General Public License v.2.
48286 +*******************************************************************************
48287 +******************************************************************************/
48290 + * This file opens and closes a socket.
48291 + * In kernel and/or userspace.
48300 +xdr_open (xdr_socket * xsk)
48302 + return sock_create (AF_INET6, SOCK_STREAM, 0, xsk);
48306 +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk)
48308 + return xsk->ops->connect (xsk,
48309 + (struct sockaddr *) adr,
48310 + sizeof (struct sockaddr_in6), 0);
48314 +xdr_close (xdr_socket * xsk)
48316 + if (*xsk == NULL)
48318 + sock_release (*xsk);
48322 +#endif /*__linux__*/
48323 +#else /*__KERNEL__*/
48326 +xdr_open (xdr_socket * xsk)
48329 + sk = socket (AF_INET6, SOCK_STREAM, 0);
48337 +xdr_connect (struct sockaddr_in6 *adr, xdr_socket xsk)
48341 + connect (xsk, (struct sockaddr *) adr,
48342 + sizeof (struct sockaddr_in6));
48349 +xdr_close (xdr_socket * xsk)
48357 +#endif /*__KERNEL__*/
48358 diff -urN linux-orig/fs/gfs_locking/lock_harness/main.c linux-patched/fs/gfs_locking/lock_harness/main.c
48359 --- linux-orig/fs/gfs_locking/lock_harness/main.c 1969-12-31 18:00:00.000000000 -0600
48360 +++ linux-patched/fs/gfs_locking/lock_harness/main.c 2004-06-16 12:03:10.006671787 -0500
48362 +/******************************************************************************
48363 +*******************************************************************************
48365 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
48366 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
48368 +** This copyrighted material is made available to anyone wishing to use,
48369 +** modify, copy, or redistribute it subject to the terms and conditions
48370 +** of the GNU General Public License v.2.
48372 +*******************************************************************************
48373 +******************************************************************************/
48375 +#include <linux/module.h>
48376 +#include <linux/init.h>
48377 +#include <linux/string.h>
48378 +#include <linux/slab.h>
48379 +#include <linux/wait.h>
48380 +#include <linux/sched.h>
48381 +#include <linux/kmod.h>
48382 +#include <linux/lm_interface.h>
48384 +#define RELEASE_NAME "<CVS>"
48386 +struct lmh_wrapper {
48387 + struct list_head lw_list;
48388 + struct lm_lockops *lw_ops;
48391 +static struct semaphore lmh_lock;
48392 +static struct list_head lmh_list;
48395 + * lm_register_proto - Register a low-level locking protocol
48396 + * @proto: the protocol definition
48398 + * Returns: 0 on success, -EXXX on failure
48402 +lm_register_proto(struct lm_lockops *proto)
48404 + struct list_head *tmp, *head;
48405 + struct lmh_wrapper *lw;
48409 + for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) {
48410 + lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48412 + if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) {
48414 + printk("lock_harness: protocol %s already exists\n",
48415 + proto->lm_proto_name);
48420 + lw = kmalloc(sizeof (struct lmh_wrapper), GFP_KERNEL);
48425 + memset(lw, 0, sizeof (struct lmh_wrapper));
48427 + lw->lw_ops = proto;
48428 + list_add(&lw->lw_list, &lmh_list);
48436 + * lm_unregister_proto - Unregister a low-level locking protocol
48437 + * @proto: the protocol definition
48442 +lm_unregister_proto(struct lm_lockops *proto)
48444 + struct list_head *tmp, *head;
48445 + struct lmh_wrapper *lw = NULL;
48449 + for (head = &lmh_list, tmp = head->next; tmp != head; tmp = tmp->next) {
48450 + lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48452 + if (strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name) == 0) {
48453 + list_del(&lw->lw_list);
48462 + printk("lock_harness: can't unregister lock protocol %s\n",
48463 + proto->lm_proto_name);
48467 + * lm_mount - Mount a lock protocol
48468 + * @proto_name - the name of the protocol
48469 + * @table_name - the name of the lock space
48470 + * @host_data - data specific to this host
48471 + * @cb - the callback to the code using the lock module
48472 + * @fsdata - data to pass back with the callback
48473 + * @min_lvb_size - the mininum LVB size that the caller can deal with
48474 + * @lockstruct - a structure returned describing the mount
48476 + * Returns: 0 on success, -EXXX on failure
48480 +lm_mount(char *proto_name, char *table_name, char *host_data,
48481 + lm_callback_t cb, lm_fsdata_t * fsdata,
48482 + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
48484 + struct list_head *tmp;
48485 + struct lmh_wrapper *lw = NULL;
48492 + for (tmp = lmh_list.next; tmp != &lmh_list; tmp = tmp->next) {
48493 + lw = list_entry(tmp, struct lmh_wrapper, lw_list);
48495 + if (strcmp(lw->lw_ops->lm_proto_name, proto_name) == 0)
48502 + if (!try && capable(CAP_SYS_MODULE)) {
48505 + request_module(proto_name);
48508 + printk("lock_harness: can't find protocol %s\n", proto_name);
48513 + if (!try_module_get(lw->lw_ops->lm_owner)) {
48516 + current->state = TASK_UNINTERRUPTIBLE;
48517 + schedule_timeout(HZ);
48521 + error = lw->lw_ops->lm_mount(table_name, host_data,
48522 + cb, fsdata, min_lvb_size, lockstruct);
48524 + module_put(lw->lw_ops->lm_owner);
48533 + * lm_unmount - unmount a lock module
48534 + * @lockstruct: the lockstruct passed into mount
48539 +lm_unmount(struct lm_lockstruct *lockstruct)
48542 + lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
48543 + if (lockstruct->ls_ops->lm_owner)
48544 + module_put(lockstruct->ls_ops->lm_owner);
48549 + * init_lmh - Initialize the lock module harness
48551 + * Returns: 0 on success, -EXXX on failure
48557 + init_MUTEX(&lmh_lock);
48558 + INIT_LIST_HEAD(&lmh_list);
48560 + printk("Lock_Harness %s (built %s %s) installed\n",
48561 + RELEASE_NAME, __DATE__, __TIME__);
48567 + * exit_lmh - cleanup the Lock Module Harness
48569 + * Returns: 0 on success, -EXXX on failure
48577 +module_init(init_lmh);
48578 +module_exit(exit_lmh);
48580 +MODULE_DESCRIPTION("GFS Lock Module Harness " RELEASE_NAME);
48581 +MODULE_AUTHOR("Red Hat, Inc.");
48582 +MODULE_LICENSE("GPL");
48584 +EXPORT_SYMBOL_GPL(lm_register_proto);
48585 +EXPORT_SYMBOL_GPL(lm_unregister_proto);
48586 +EXPORT_SYMBOL_GPL(lm_mount);
48587 +EXPORT_SYMBOL_GPL(lm_unmount);
48588 diff -urN linux-orig/include/linux/lm_interface.h linux-patched/include/linux/lm_interface.h
48589 --- linux-orig/include/linux/lm_interface.h 1969-12-31 18:00:00.000000000 -0600
48590 +++ linux-patched/include/linux/lm_interface.h 2004-06-16 12:03:10.005672019 -0500
48592 +/******************************************************************************
48593 +*******************************************************************************
48595 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
48596 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
48598 +** This copyrighted material is made available to anyone wishing to use,
48599 +** modify, copy, or redistribute it subject to the terms and conditions
48600 +** of the GNU General Public License v.2.
48602 +*******************************************************************************
48603 +******************************************************************************/
48607 + Sooner or later, I need to put all the documentation back into this file.
48608 + In the mean time, here are some notes.
48610 + - The lock module is now responsible for STOMITHing the an expired
48611 + client before calling the callback with type LM_CB_NEED_RECOVERY.
48613 + - If mount() operation returns first == TRUE, GFS will check all the
48614 + journals. GFS itself can't/shouldn't stomith the machines, so the lock module
48615 + needs to make sure that there are no zombie machines on any of the
48616 + journals. (i.e. this should probably be on the first mount of the lock
48617 + space where all mounts by other machines are blocked.) GFS will call
48618 + others_may_mount() when the filesystem is in a consistent state.
48620 + - GFS can issue multiple simultaneous get_lock()s for the same lockname.
48621 + The lock module needs to deal with it, either by 1) building a hash table
48622 + to lookup the structures and keeping a reference count so there is only
48623 + on lm_lock_t for a given lockname. or 2) just dealing with multiple
48624 + lm_lock_t structures for a given lockname.
48628 +#ifndef __LM_INTERFACE_DOT_H__
48629 +#define __LM_INTERFACE_DOT_H__
48631 +typedef void lm_lockspace_t;
48632 +typedef void lm_lock_t;
48633 +typedef void lm_fsdata_t;
48634 +typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
48637 +/* Flags for the struct lm_lockstruct->ls_flags field */
48639 +#define LM_LSFLAG_LOCAL (0x00000001)
48640 +#define LM_LSFLAG_ASYNC (0x00000002)
48644 +#define LM_TYPE_RESERVED (0x00)
48645 +#define LM_TYPE_NONDISK (0x01)
48646 +#define LM_TYPE_INODE (0x02)
48647 +#define LM_TYPE_RGRP (0x03)
48648 +#define LM_TYPE_META (0x04)
48649 +#define LM_TYPE_IOPEN (0x05)
48650 +#define LM_TYPE_FLOCK (0x06)
48651 +#define LM_TYPE_PLOCK (0x07)
48652 +#define LM_TYPE_QUOTA (0x08)
48654 +/* States passed to lock() */
48656 +#define LM_ST_UNLOCKED (0)
48657 +#define LM_ST_EXCLUSIVE (1)
48658 +#define LM_ST_DEFERRED (2)
48659 +#define LM_ST_SHARED (3)
48661 +/* Flags passed to lock() */
48663 +#define LM_FLAG_TRY (0x00000001)
48664 +#define LM_FLAG_TRY_1CB (0x00000002)
48665 +#define LM_FLAG_NOEXP (0x00000004)
48666 +#define LM_FLAG_ANY (0x00000008)
48667 +#define LM_FLAG_PRIORITY (0x00000010)
48669 +/* Flags returned by lock() */
48671 +#define LM_OUT_ST_MASK (0x00000003)
48672 +#define LM_OUT_CACHEABLE (0x00000004)
48673 +#define LM_OUT_CANCELED (0x00000008)
48674 +#define LM_OUT_NEED_E (0x00000010)
48675 +#define LM_OUT_NEED_D (0x00000020)
48676 +#define LM_OUT_NEED_S (0x00000040)
48677 +#define LM_OUT_ASYNC (0x00000080)
48678 +#define LM_OUT_LVB_INVALID (0x00000100)
48680 +/* Callback types */
48682 +#define LM_CB_NEED_E (257)
48683 +#define LM_CB_NEED_D (258)
48684 +#define LM_CB_NEED_S (259)
48685 +#define LM_CB_NEED_RECOVERY (260)
48686 +#define LM_CB_DROPLOCKS (261)
48687 +#define LM_CB_ASYNC (262)
48689 +/* Reset_exp messages */
48691 +#define LM_RD_GAVEUP (308)
48692 +#define LM_RD_SUCCESS (309)
48694 +struct lm_lockname {
48695 + uint64_t ln_number;
48696 + unsigned int ln_type;
48699 +#define lm_name_equal(name1, name2) \
48700 +(((name1)->ln_number == (name2)->ln_number) && \
48701 + ((name1)->ln_type == (name2)->ln_type)) \
48703 +struct lm_async_cb {
48704 + struct lm_lockname lc_name;
48708 +struct lm_lockstruct;
48710 +struct lm_lockops {
48711 + char lm_proto_name[256];
48713 + /* Mount/Unmount */
48715 + int (*lm_mount) (char *table_name, char *host_data,
48716 + lm_callback_t cb, lm_fsdata_t *fsdata,
48717 + unsigned int min_lvb_size,
48718 + struct lm_lockstruct *lockstruct);
48719 + void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
48720 + void (*lm_unmount) (lm_lockspace_t *lockspace);
48722 + /* Lock oriented operations */
48724 + int (*lm_get_lock) (lm_lockspace_t *lockspace,
48725 + struct lm_lockname *name, lm_lock_t **lockp);
48726 + void (*lm_put_lock) (lm_lock_t *lock);
48728 + unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
48729 + unsigned int req_state, unsigned int flags);
48730 + unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
48732 + void (*lm_cancel) (lm_lock_t *lock);
48734 + int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
48735 + void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
48736 + void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
48738 + /* Posix Lock oriented operations */
48740 + int (*lm_plock_get) (lm_lockspace_t *lockspace,
48741 + struct lm_lockname *name, unsigned long owner,
48742 + uint64_t *start, uint64_t *end, int *exclusive,
48743 + unsigned long *rowner);
48745 + int (*lm_plock) (lm_lockspace_t *lockspace,
48746 + struct lm_lockname *name, unsigned long owner,
48747 + int wait, int exclusive, uint64_t start,
48750 + int (*lm_punlock) (lm_lockspace_t *lockspace,
48751 + struct lm_lockname *name, unsigned long owner,
48752 + uint64_t start, uint64_t end);
48754 + /* Client oriented operations */
48756 + void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
48757 + unsigned int message);
48759 + struct module *lm_owner;
48762 +struct lm_lockstruct {
48763 + unsigned int ls_jid;
48764 + unsigned int ls_first;
48765 + unsigned int ls_lvb_size;
48766 + lm_lockspace_t *ls_lockspace;
48767 + struct lm_lockops *ls_ops;
48771 +/* Bottom interface */
48773 +int lm_register_proto(struct lm_lockops *proto);
48774 +void lm_unregister_proto(struct lm_lockops *proto);
48776 +/* Top interface */
48778 +int lm_mount(char *proto_name,
48779 + char *table_name, char *host_data,
48780 + lm_callback_t cb, lm_fsdata_t *fsdata,
48781 + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
48782 +void lm_unmount(struct lm_lockstruct *lockstruct);
48784 +#endif /* __LM_INTERFACE_DOT_H__ */
48785 diff -urN linux-orig/fs/gfs_locking/lock_nolock/main.c linux-patched/fs/gfs_locking/lock_nolock/main.c
48786 --- linux-orig/fs/gfs_locking/lock_nolock/main.c 1969-12-31 18:00:00.000000000 -0600
48787 +++ linux-patched/fs/gfs_locking/lock_nolock/main.c 2004-06-16 12:03:13.918762838 -0500
48789 +/******************************************************************************
48790 +*******************************************************************************
48792 +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
48793 +** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
48795 +** This copyrighted material is made available to anyone wishing to use,
48796 +** modify, copy, or redistribute it subject to the terms and conditions
48797 +** of the GNU General Public License v.2.
48799 +*******************************************************************************
48800 +******************************************************************************/
48802 +#include <linux/module.h>
48803 +#include <linux/slab.h>
48804 +#include <linux/module.h>
48805 +#include <linux/init.h>
48806 +#include <linux/types.h>
48807 +#include <linux/lm_interface.h>
48809 +#define RELEASE_NAME "<CVS>"
48811 +struct nolock_lockspace {
48812 + unsigned int nl_lvb_size;
48815 +struct lm_lockops nolock_ops;
48818 + * nolock_mount - mount a nolock lockspace
48819 + * @table_name: the name of the space to mount
48820 + * @host_data: host specific data
48821 + * @cb: the callback
48822 + * @lockstruct: the structure of crap to fill in
48824 + * Returns: 0 on success, -EXXX on failure
48828 +nolock_mount(char *table_name, char *host_data,
48829 + lm_callback_t cb, lm_fsdata_t *fsdata,
48830 + unsigned int min_lvb_size, struct lm_lockstruct *lockstruct)
48833 + unsigned int jid;
48834 + struct nolock_lockspace *nl;
48836 + /* If there is a "jid=" in the hostdata, return that jid.
48837 + Otherwise, return zero. */
48839 + c = strstr(host_data, "jid=");
48844 + sscanf(c, "%u", &jid);
48847 + nl = kmalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
48851 + memset(nl, 0, sizeof(struct nolock_lockspace));
48852 + nl->nl_lvb_size = min_lvb_size;
48854 + lockstruct->ls_jid = jid;
48855 + lockstruct->ls_first = 1;
48856 + lockstruct->ls_lvb_size = min_lvb_size;
48857 + lockstruct->ls_lockspace = (lm_lockspace_t *)nl;
48858 + lockstruct->ls_ops = &nolock_ops;
48859 + lockstruct->ls_flags = LM_LSFLAG_LOCAL | LM_LSFLAG_ASYNC;
48865 + * nolock_others_may_mount - unmount a lock space
48866 + * @lockspace: the lockspace to unmount
48871 +nolock_others_may_mount(lm_lockspace_t *lockspace)
48876 + * nolock_unmount - unmount a lock space
48877 + * @lockspace: the lockspace to unmount
48882 +nolock_unmount(lm_lockspace_t *lockspace)
48884 + struct nolock_lockspace *nl = (struct nolock_lockspace *)lockspace;
48889 + * nolock_get_lock - get a lm_lock_t given a descripton of the lock
48890 + * @lockspace: the lockspace the lock lives in
48891 + * @name: the name of the lock
48892 + * @lockp: return the lm_lock_t here
48894 + * Returns: 0 on success, -EXXX on failure
48898 +nolock_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
48899 + lm_lock_t ** lockp)
48901 + *lockp = (lm_lock_t *)lockspace;
48906 + * nolock_put_lock - get rid of a lock structure
48907 + * @lock: the lock to throw away
48912 +nolock_put_lock(lm_lock_t *lock)
48917 + * nolock_lock - acquire a lock
48918 + * @lock: the lock to manipulate
48919 + * @cur_state: the current state
48920 + * @req_state: the requested state
48921 + * @flags: modifier flags
48923 + * Returns: A bitmap of LM_OUT_*
48926 +static unsigned int
48927 +nolock_lock(lm_lock_t *lock, unsigned int cur_state, unsigned int req_state,
48928 + unsigned int flags)
48930 + return req_state | LM_OUT_CACHEABLE;
48934 + * nolock_unlock - unlock a lock
48935 + * @lock: the lock to manipulate
48936 + * @cur_state: the current state
48941 +static unsigned int
48942 +nolock_unlock(lm_lock_t *lock, unsigned int cur_state)
48948 + * nolock_cancel - cancel a request on a lock
48949 + * @lock: the lock to cancel request for
48954 +nolock_cancel(lm_lock_t *lock)
48959 + * nolock_hold_lvb - hold on to a lock value block
48960 + * @lock: the lock the LVB is associated with
48961 + * @lvbp: return the lm_lvb_t here
48963 + * Returns: 0 on success, -EXXX on failure
48967 +nolock_hold_lvb(lm_lock_t *lock, char **lvbp)
48969 + struct nolock_lockspace *nl = (struct nolock_lockspace *)lock;
48972 + *lvbp = kmalloc(nl->nl_lvb_size, GFP_KERNEL);
48974 + memset(*lvbp, 0, nl->nl_lvb_size);
48982 + * nolock_unhold_lvb - release a LVB
48983 + * @lock: the lock the LVB is associated with
48984 + * @lvb: the lock value block
48989 +nolock_unhold_lvb(lm_lock_t *lock, char *lvb)
48995 + * nolock_sync_lvb - sync out the value of a lvb
48996 + * @lock: the lock the LVB is associated with
48997 + * @lvb: the lock value block
49002 +nolock_sync_lvb(lm_lock_t *lock, char *lvb)
49007 + * nolock_plock_get -
49008 + * @lockspace: the lockspace
49019 +nolock_plock_get(lm_lockspace_t *lockspace,
49020 + struct lm_lockname *name, unsigned long owner,
49021 + uint64_t *start, uint64_t *end, int *exclusive,
49022 + unsigned long *rowner)
49029 + * @lockspace: the lockspace
49040 +nolock_plock(lm_lockspace_t *lockspace,
49041 + struct lm_lockname *name, unsigned long owner,
49042 + int wait, int exclusive, uint64_t start,
49049 + * nolock_punlock -
49050 + * @lockspace: the lockspace
49059 +nolock_punlock(lm_lockspace_t *lockspace,
49060 + struct lm_lockname *name, unsigned long owner,
49061 + uint64_t start, uint64_t end)
49067 + * nolock_recovery_done - reset the expired locks for a given jid
49068 + * @lockspace: the lockspace
49074 +nolock_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
49075 + unsigned int message)
49079 +struct lm_lockops nolock_ops = {
49080 + .lm_proto_name = "lock_nolock",
49081 + .lm_mount = nolock_mount,
49082 + .lm_others_may_mount = nolock_others_may_mount,
49083 + .lm_unmount = nolock_unmount,
49084 + .lm_get_lock = nolock_get_lock,
49085 + .lm_put_lock = nolock_put_lock,
49086 + .lm_lock = nolock_lock,
49087 + .lm_unlock = nolock_unlock,
49088 + .lm_cancel = nolock_cancel,
49089 + .lm_hold_lvb = nolock_hold_lvb,
49090 + .lm_unhold_lvb = nolock_unhold_lvb,
49091 + .lm_sync_lvb = nolock_sync_lvb,
49092 + .lm_plock_get = nolock_plock_get,
49093 + .lm_plock = nolock_plock,
49094 + .lm_punlock = nolock_punlock,
49095 + .lm_recovery_done = nolock_recovery_done,
49096 + .lm_owner = THIS_MODULE,
49100 + * init_nolock - Initialize the nolock module
49102 + * Returns: 0 on success, -EXXX on failure
49110 + error = lm_register_proto(&nolock_ops);
49112 + printk("lock_nolock: can't register protocol: %d\n", error);
49116 + printk("Lock_Nolock %s (built %s %s) installed\n",
49117 + RELEASE_NAME, __DATE__, __TIME__);
49123 + * exit_nolock - cleanup the nolock module
49130 + lm_unregister_proto(&nolock_ops);
49133 +module_init(init_nolock);
49134 +module_exit(exit_nolock);
49136 +MODULE_DESCRIPTION("GFS Nolock Locking Module " RELEASE_NAME);
49137 +MODULE_AUTHOR("Red Hat, Inc.");
49138 +MODULE_LICENSE("GPL");