-# Add CMAN to the build system
-diff -urN -p linux-2.6.8.1/Makefile linux/Makefile
---- linux-2.6.8.1/Makefile 2004-08-14 18:55:35.000000000 +0800
-+++ linux/Makefile 2004-08-24 13:17:22.000000000 +0800
-@@ -399,7 +399,7 @@ scripts_basic: include/linux/autoconf.h
-
- # Objects we will link into vmlinux / subdirs we need to visit
- init-y := init/
--drivers-y := drivers/ sound/
-+drivers-y := drivers/ sound/ cluster/
- net-y := net/
- libs-y := lib/
- core-y := usr/
-diff -urN -p linux-2.6.8.1/arch/alpha/Kconfig linux/arch/alpha/Kconfig
---- linux-2.6.8.1/arch/alpha/Kconfig 2004-08-14 18:55:48.000000000 +0800
-+++ linux/arch/alpha/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -698,2 +698,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/arm/Kconfig linux/arch/arm/Kconfig
---- linux-2.6.8.1/arch/arm/Kconfig 2004-08-14 18:55:33.000000000 +0800
-+++ linux/arch/arm/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -827,2 +827,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/arm26/Kconfig linux/arch/arm26/Kconfig
---- linux-2.6.8.1/arch/arm26/Kconfig 2004-08-14 18:55:33.000000000 +0800
-+++ linux/arch/arm26/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -333,2 +333,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/cris/Kconfig linux/arch/cris/Kconfig
---- linux-2.6.8.1/arch/cris/Kconfig 2004-08-14 18:56:22.000000000 +0800
-+++ linux/arch/cris/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -216,2 +216,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/i386/Kconfig linux/arch/i386/Kconfig
---- linux-2.6.8.1/arch/i386/Kconfig 2004-08-14 18:54:50.000000000 +0800
-+++ linux/arch/i386/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -1316,6 +1316,8 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-
-+source "cluster/Kconfig"
-+
- config X86_SMP
- bool
- depends on SMP && !X86_VOYAGER
-diff -urN -p linux-2.6.8.1/arch/ia64/Kconfig linux/arch/ia64/Kconfig
---- linux-2.6.8.1/arch/ia64/Kconfig 2004-08-14 18:56:22.000000000 +0800
-+++ linux/arch/ia64/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -489,3 +489,5 @@ endmenu
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/m68k/Kconfig linux/arch/m68k/Kconfig
---- linux-2.6.8.1/arch/m68k/Kconfig 2004-08-14 18:55:34.000000000 +0800
-+++ linux/arch/m68k/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -696,2 +696,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/mips/Kconfig linux/arch/mips/Kconfig
---- linux-2.6.8.1/arch/mips/Kconfig 2004-08-14 18:55:32.000000000 +0800
-+++ linux/arch/mips/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -1691,3 +1691,5 @@ source "security/Kconfig"
- source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/parisc/Kconfig linux/arch/parisc/Kconfig
---- linux-2.6.8.1/arch/parisc/Kconfig 2004-08-14 18:55:34.000000000 +0800
-+++ linux/arch/parisc/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -261,2 +261,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/ppc/Kconfig linux/arch/ppc/Kconfig
---- linux-2.6.8.1/arch/ppc/Kconfig 2004-08-14 18:56:23.000000000 +0800
-+++ linux/arch/ppc/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -1354,3 +1354,5 @@ endmenu
- source "security/Kconfig"
-
- source "crypto/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/ppc64/Kconfig linux/arch/ppc64/Kconfig
---- linux-2.6.8.1/arch/ppc64/Kconfig 2004-08-14 18:55:33.000000000 +0800
-+++ linux/arch/ppc64/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -442,2 +442,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/s390/Kconfig linux/arch/s390/Kconfig
---- linux-2.6.8.1/arch/s390/Kconfig 2004-08-14 18:55:19.000000000 +0800
-+++ linux/arch/s390/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -449,2 +449,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/sh/Kconfig linux/arch/sh/Kconfig
---- linux-2.6.8.1/arch/sh/Kconfig 2004-08-14 18:56:23.000000000 +0800
-+++ linux/arch/sh/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -900,2 +900,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/sparc/Kconfig linux/arch/sparc/Kconfig
---- linux-2.6.8.1/arch/sparc/Kconfig 2004-08-14 18:54:46.000000000 +0800
-+++ linux/arch/sparc/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -458,2 +458,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/sparc64/Kconfig linux/arch/sparc64/Kconfig
---- linux-2.6.8.1/arch/sparc64/Kconfig 2004-08-14 18:56:01.000000000 +0800
-+++ linux/arch/sparc64/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -714,2 +714,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/arch/um/Kconfig linux/arch/um/Kconfig
---- linux-2.6.8.1/arch/um/Kconfig 2004-08-14 18:55:32.000000000 +0800
-+++ linux/arch/um/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -194,6 +194,8 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-
-+source "cluster/Kconfig"
-+
- menu "SCSI support"
-
- config SCSI
-diff -urN -p linux-2.6.8.1/arch/x86_64/Kconfig linux/arch/x86_64/Kconfig
---- linux-2.6.8.1/arch/x86_64/Kconfig 2004-08-14 18:55:59.000000000 +0800
-+++ linux/arch/x86_64/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -505,2 +505,4 @@ source "crypto/Kconfig"
-
- source "lib/Kconfig"
-+
-+source "cluster/Kconfig"
-diff -urN -p linux-2.6.8.1/cluster/Kconfig linux/cluster/Kconfig
---- linux-2.6.8.1/cluster/Kconfig 1970-01-01 07:30:00.000000000 +0730
-+++ linux/cluster/Kconfig 2004-08-24 13:17:22.000000000 +0800
-@@ -0,0 +1,13 @@
-+menu "Cluster Support"
-+
-+config CLUSTER
-+ tristate "Cluster support"
-+ ---help---
-+ Enable clustering support. This is not the high-performance clustering
-+ made famous by beowulf. It is a high-availability cluster often using
-+ shared storage.
-+ The cluster manager is the heart(beat) of the cluster system. It is
-+ needed by all the other components. It provides membership services
-+ for those other subsystems.
-+
-+endmenu
-diff -urN -p linux-2.6.8.1/cluster/Makefile linux/cluster/Makefile
---- linux-2.6.8.1/cluster/Makefile 1970-01-01 07:30:00.000000000 +0730
-+++ linux/cluster/Makefile 2004-08-24 13:17:22.000000000 +0800
-@@ -0,0 +1,3 @@
-+obj-y := nocluster.o
-+
-+obj-$(CONFIG_CLUSTER) += cman/
-diff -urN -p linux-2.6.8.1/cluster/cman/Makefile linux/cluster/cman/Makefile
---- linux-2.6.8.1/cluster/cman/Makefile 1970-01-01 07:30:00.000000000 +0730
-+++ linux/cluster/cman/Makefile 2004-08-24 13:17:22.000000000 +0800
-@@ -0,0 +1,6 @@
-+cman-objs := cnxman.o config.o membership.o proc.o\
-+ sm_barrier.o sm_control.o sm_daemon.o sm_joinleave.o\
-+ sm_membership.o sm_message.o sm_misc.o sm_recover.o sm_services.o \
-+ sm_user.o
-+
-+obj-$(CONFIG_CLUSTER) := cman.o
-diff -urN -p linux-2.6.8.1/cluster/nocluster.c linux/cluster/nocluster.c
---- linux-2.6.8.1/cluster/nocluster.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux/cluster/nocluster.c 2004-08-24 13:17:22.000000000 +0800
-@@ -0,0 +1,20 @@
-+/*
-+ * cluster/nocluster.c
-+ *
-+ * Copy from net/nonet.c
-+ * Dummy functions to allow us to configure cluster support entirely
-+ * out of the kernel.
-+ *
-+ * Distributed under the terms of the GNU GPL version 2.
-+ * Copyright (c) Matthew Wilcox 2003
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/errno.h>
-+#include <linux/fs.h>
-+#include <linux/init.h>
-+#include <linux/kernel.h>
-+
-+void __init nocluster_init(void)
-+{
-+}
-diff -urN linux-orig/cluster/cman/cnxman-private.h linux-patched/cluster/cman/cnxman-private.h
---- linux-orig/cluster/cman/cnxman-private.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/cnxman-private.h 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,430 @@
+diff -urN linux-orig/cluster/cman/cnxman.c linux-patched/cluster/cman/cnxman.c
+--- linux-orig/cluster/cman/cnxman.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/cnxman.c 2004-10-22 11:04:52.611551982 -0500
+@@ -0,0 +1,4233 @@
+/******************************************************************************
+*******************************************************************************
+**
+*******************************************************************************
+******************************************************************************/
+
-+#ifndef __CNXMAN_PRIVATE_H
-+#define __CNXMAN_PRIVATE_H
++#define EXPORT_SYMTAB
++#include <linux/init.h>
++#include <linux/socket.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/utsname.h>
++#include <net/sock.h>
++#include <linux/proc_fs.h>
++#include <linux/poll.h>
++#include <linux/module.h>
++#include <linux/list.h>
++#include <linux/uio.h>
++#include <cluster/cnxman.h>
++#include <cluster/service.h>
+
-+/* Version triplet */
-+#define CNXMAN_MAJOR_VERSION 2
-+#define CNXMAN_MINOR_VERSION 0
-+#define CNXMAN_PATCH_VERSION 1
++#include "cnxman-private.h"
++#include "sm_control.h"
++#include "sm_user.h"
++#include "config.h"
+
-+#define MAX_RETRIES 3 /* Maximum number of send retries */
-+#define CAP_CLUSTER CAP_SYS_ADMIN /* Capability needed to manage the
-+ * cluster */
-+#ifdef __KERNEL__
++#define CMAN_RELEASE_NAME "<CVS>"
+
-+/* How we announce ourself in console events */
-+#define CMAN_NAME "CMAN"
++static void process_incoming_packet(struct cl_comms_socket *csock, struct msghdr *msg, int len);
++static int cl_sendack(struct cl_comms_socket *sock, unsigned short seq,
++ int addr_len, char *addr, unsigned char remport,
++ unsigned char flag);
++static void send_listen_request(int nodeid, unsigned char port);
++static void send_listen_response(struct cl_comms_socket *csock, int nodeid,
++ unsigned char port, unsigned short tag);
++static void resend_last_message(void);
++static void start_ack_timer(void);
++static int send_queued_message(struct queued_message *qmsg);
++static void send_port_close_oob(unsigned char port);
++static void post_close_oob(unsigned char port, int nodeid);
++static void process_barrier_msg(struct cl_barriermsg *msg,
++ struct cluster_node *node);
++static struct cl_barrier *find_barrier(char *name);
++static void node_shutdown(void);
++static void node_cleanup(void);
++static int send_or_queue_message(struct socket *sock, void *buf, int len, struct sockaddr_cl *caddr,
++ unsigned int flags);
++static struct cl_comms_socket *get_next_interface(struct cl_comms_socket *cur);
++static void check_for_unacked_nodes(void);
++static void free_cluster_sockets(void);
++static uint16_t generate_cluster_id(char *name);
++static int is_valid_temp_nodeid(int nodeid);
+
-+/* One of these per AF_CLUSTER socket */
-+struct cluster_sock {
-+ /* WARNING: sk has to be the first member */
-+ struct sock sk;
++extern int start_membership_services(pid_t);
++extern int kcl_leave_cluster(int remove);
++extern int send_kill(int nodeid);
+
-+ unsigned char port; /* Bound port or zero */
-+ int (*kernel_callback) (char *, int, char *, int, unsigned int);
-+ void *service_data;
-+};
++static struct proto_ops cl_proto_ops;
++static struct sock *master_sock;
++static kmem_cache_t *cluster_sk_cachep;
+
-+#define cluster_sk(__sk) ((struct cluster_sock *)__sk)
++/* Pointer to the pseudo node that maintains quorum in a 2node system */
++struct cluster_node *quorum_device = NULL;
+
-+/* We have one of these for each socket we use for communications */
-+struct cl_comms_socket {
-+ struct socket *sock;
-+ int broadcast; /* This is a broadcast socket */
-+ int recv_only; /* This is the unicast receive end of a
-+ * multicast socket */
-+ struct sockaddr_in6 saddr; /* Socket address, contains the sockaddr for
-+ * the remote end(s) */
-+ int addr_len; /* Length of above */
-+ int number; /* Internal socket number, used to cycle around
-+ * sockets in case of network errors */
-+ struct file *file; /* file pointer for user-passed in sockets */
++/* Array of "ports" allocated. This is just a list of pointers to the sock that
++ * has this port bound. Speed is a major issue here so 1-2K of allocated
++ * storage is worth sacrificing. Port 0 is reserved for protocol messages */
++static struct sock *port_array[256];
++static struct semaphore port_array_lock;
+
-+ wait_queue_t wait;
++/* Our cluster name & number */
++unsigned short cluster_id;
++char cluster_name[MAX_CLUSTER_NAME_LEN+1];
+
-+ /* The socket list */
-+ struct list_head list;
++/* Two-node mode: causes cluster to remain quorate if one of two nodes fails.
++ * No more than two nodes are permitted to join the cluster. */
++unsigned short two_node;
+
-+ /* On here when it has something to say */
-+ struct list_head active_list;
-+ unsigned long active;
-+};
++/* Cluster configuration version that must be the same among members. */
++unsigned int config_version;
+
-+/* A client socket. We keep a list of these so we can notify clients of cluster
-+ * events */
-+struct cl_client_socket {
-+ struct socket *sock;
-+ struct list_head list;
-+};
++/* Reference counting for cluster applications */
++atomic_t use_count;
+
-+/* This structure is tacked onto the start of a cluster message packet for our
-+ * own nefarious purposes. */
-+struct cl_protheader {
-+ unsigned char port;
-+ unsigned char flags;
-+ unsigned short cluster; /* Our cluster number, little-endian */
-+ unsigned short seq; /* Packet sequence number, little-endian */
-+ unsigned short ack; /* inline ACK */
-+ int srcid; /* Node ID of the sender */
-+ int tgtid; /* Node ID of the target or 0 for multicast
-+ * messages */
-+};
++/* Length of sockaddr address for our comms protocol */
++unsigned int address_length;
+
-+/* A cluster internal protocol message - port number 0 */
-+struct cl_protmsg {
-+ struct cl_protheader header;
-+ unsigned char cmd;
-+};
++/* Message sending */
++static unsigned short cur_seq; /* Last message sent */
++static unsigned int ack_count; /* Number of acks received for message
++ * 'cur_seq' */
++static unsigned int acks_expected; /* Number of acks we expect to receive */
++static struct semaphore send_lock;
++static struct timer_list ack_timer;
+
-+/* A Cluster ACK message */
-+struct cl_ackmsg {
-+ struct cl_protheader header;
-+ unsigned char cmd; /* Always CLUSTER_CMD_ACK */
-+ unsigned char remport; /* Remote port number the original message was
-+ * for */
-+ unsigned char aflags; /* ACK flags 0=OK, 1=No listener */
-+ unsigned char pad;
-+};
++/* Saved packet information in case we need to resend it */
++static char saved_msg_buffer[MAX_CLUSTER_MESSAGE];
++static int saved_msg_len;
++static int retry_count;
+
-+/* A Cluster LISTENREQ/LISTENRESP message */
-+struct cl_listenmsg {
-+ unsigned char cmd; /* CLUSTER_CMD_LISTENRESP/REQ */
-+ unsigned char target_port; /* Port to probe */
-+ unsigned char listening; /* Always 0 for LISTENREQ */
-+ unsigned char pad;
-+ unsigned short tag; /* PID of remote waiting process */
-+};
++/* Task variables */
++static pid_t kcluster_pid;
++static pid_t membership_pid;
++extern struct task_struct *membership_task;
++extern int quit_threads;
+
-+/* A Cluster PORTCLOSED message */
-+struct cl_closemsg {
-+ unsigned char cmd; /* CLUSTER_CMD_PORTCLOSED */
-+ unsigned char port;
-+};
++wait_queue_head_t cnxman_waitq;
+
-+/* Structure of a newly dead node, passed from cnxman to kmembershipd */
-+struct cl_new_dead_node {
-+ struct list_head list;
-+ struct cluster_node *node;
-+};
++/* Variables owned by membership services */
++extern int cluster_members;
++extern struct list_head cluster_members_list;
++extern struct semaphore cluster_members_lock;
++extern int we_are_a_cluster_member;
++extern int cluster_is_quorate;
++extern struct cluster_node *us;
++extern struct list_head new_dead_node_list;
++extern struct semaphore new_dead_node_lock;
++extern char nodename[];
++extern int wanted_nodeid;
+
-+/* Subcommands for BARRIER message */
-+#define BARRIER_REGISTER 1
-+#define BARRIER_CHANGE 2
-+#define BARRIER_WAIT 4
-+#define BARRIER_COMPLETE 5
++/* A list of processes listening for membership events */
++static struct list_head event_listener_list;
++static struct semaphore event_listener_lock;
+
-+/* A Cluster BARRIER message */
-+struct cl_barriermsg {
-+ unsigned char cmd; /* CLUSTER_CMD_BARRIER */
-+ unsigned char subcmd; /* BARRIER sub command */
-+ unsigned short pad;
-+ unsigned int flags;
-+ unsigned int nodes;
-+ char name[MAX_BARRIER_NAME_LEN];
-+};
++/* A list of kernel callbacks listening for membership events */
++static struct list_head kernel_listener_list;
++static struct semaphore kernel_listener_lock;
+
-+/* Membership services messages, the cl_protheader is added transparently */
-+struct cl_mem_hello_msg {
-+ unsigned char cmd;
-+ unsigned char flags;
-+ unsigned short members; /* Number of nodes in the cluster,
-+ * little-endian */
-+ unsigned int generation; /* Current cluster generation number */
-+};
++/* A list of sockets we are listening on (and can transmit on...later) */
++static struct list_head socket_list;
+
-+struct cl_mem_endtrans_msg {
-+ unsigned char cmd;
-+ unsigned char pad1;
-+ unsigned short pad2;
-+ unsigned int quorum;
-+ unsigned int total_votes;
-+ unsigned int generation; /* Current cluster generation number */
-+ unsigned int new_node_id; /* If reason is a new node joining */
-+};
++/* A list of all open cluster client sockets */
++static struct list_head client_socket_list;
++static struct semaphore client_socket_lock;
+
-+/* ACK types for JOINACK message */
-+#define JOINACK_TYPE_OK 1 /* You can join */
-+#define JOINACK_TYPE_NAK 2 /* You can NOT join */
-+#define JOINACK_TYPE_WAIT 3 /* Wait a bit longer - cluster is in transition
-+ * already */
++/* A list of all current barriers */
++static struct list_head barrier_list;
++static struct semaphore barrier_list_lock;
+
-+struct cl_mem_joinack_msg {
-+ unsigned char cmd;
-+ unsigned char acktype;
-+};
++/* When a socket is read for reading it goes on this queue */
++static spinlock_t active_socket_lock;
++static struct list_head active_socket_list;
+
-+/* This is used by JOINREQ message */
-+struct cl_mem_join_msg {
-+ unsigned char cmd;
-+ unsigned char votes;
-+ unsigned short num_addr; /* Number of addresses for this node */
-+ unsigned int expected_votes;
-+ unsigned int members; /* Number of nodes in the cluster,
-+ * little-endian */
-+ unsigned int major_version; /* Not backwards compatible */
-+ unsigned int minor_version; /* Backwards compatible */
-+ unsigned int patch_version; /* Backwards/forwards compatible */
-+ unsigned int config_version;
-+ unsigned int addr_len; /* length of node addresses */
-+ char clustername[16];
-+ /* Followed by <num_addr> addresses of `address_length` bytes and a
-+ * NUL-terminated node name */
-+};
++/* If the cnxman process is running and available for work */
++atomic_t cnxman_running;
+
-+/* State transition start reasons: */
-+#define TRANS_NEWNODE 1 /* A new node is joining the cluster */
-+#define TRANS_REMNODE 2 /* a node has left the cluster */
-+#define TRANS_ANOTHERREMNODE 3 /* A node left the cluster while we were in
-+ * transition */
-+#define TRANS_NEWMASTER 4 /* We have had an election and I am the new
-+ * master */
-+#define TRANS_CHECK 5 /* A consistency check was called for */
-+#define TRANS_RESTART 6 /* Transition restarted because of a previous
-+ * timeout */
-+#define TRANS_DEADMASTER 7 /* The master died during transition and I have
-+ * taken over */
++/* Fkags set by timers etc for the mainloop to detect and act upon */
++static unsigned long mainloop_flags;
+
-+/* This is used to start a state transition */
-+struct cl_mem_starttrans_msg {
-+ unsigned char cmd;
-+ unsigned char reason; /* Why a start transition was started - see
-+ * above */
-+ unsigned char flags;
-+ unsigned char votes;
-+ unsigned int expected_votes;
-+ unsigned int generation; /* Incremented for each STARTTRANS sent
-+ */
-+ int nodeid; /* Node to be removed */
-+ unsigned short num_addrs;
-+ /* If reason == TRANS_NEWNODE: Followed by <num_addr> addresses of
-+ * `address_length` bytes and a NUL-terminated node name */
-+};
++#define ACK_TIMEOUT 1
++#define RESEND_NEEDED 2
+
-+struct cl_mem_startack_msg {
-+ unsigned char cmd;
-+ unsigned char reason;
-+ unsigned short pad;
-+ unsigned int generation;
-+ unsigned int node_id; /* node_id we think new node should have */
-+ unsigned int highest_node_id; /* highest node_id on this system */
-+};
++/* A queue of messages waiting to be sent. If kcl_sendmsg is called outside of
++ * process context then the messages get put in here */
++static struct list_head messages_list;
++static struct semaphore messages_list_lock;
+
-+/* Reconfigure a cluster parameter */
-+struct cl_mem_reconfig_msg {
-+ unsigned char cmd;
-+ unsigned char param;
-+ unsigned short pad;
-+ unsigned int value;
-+};
++static struct semaphore start_thread_sem;
+
-+/* Structure containing information about an outstanding listen request */
-+struct cl_waiting_listen_request {
-+ wait_queue_head_t waitq;
-+ int result;
-+ int waiting;
-+ unsigned short tag;
-+ int nodeid;
-+ struct list_head list;
-+};
++/* List of outstanding ISLISTENING requests */
++static struct list_head listenreq_list;
++static struct semaphore listenreq_lock;
+
-+/* Messages from membership services */
-+#define CLUSTER_MEM_JOINCONF 1
-+#define CLUSTER_MEM_JOINREQ 2
-+#define CLUSTER_MEM_LEAVE 3
-+#define CLUSTER_MEM_HELLO 4
-+#define CLUSTER_MEM_KILL 5
-+#define CLUSTER_MEM_JOINACK 6
-+#define CLUSTER_MEM_ENDTRANS 7
-+#define CLUSTER_MEM_RECONFIG 8
-+#define CLUSTER_MEM_MASTERVIEW 9
-+#define CLUSTER_MEM_STARTTRANS 10
-+#define CLUSTER_MEM_JOINREJ 11
-+#define CLUSTER_MEM_VIEWACK 12
-+#define CLUSTER_MEM_STARTACK 13
-+#define CLUSTER_MEM_TRANSITION 14
-+#define CLUSTER_MEM_NEWCLUSTER 15
-+#define CLUSTER_MEM_CONFACK 16
-+#define CLUSTER_MEM_NOMINATE 17
++/* Any sending requests wait on this queue if necessary (eg inquorate, waiting
++ * ACK) */
++static DECLARE_WAIT_QUEUE_HEAD(socket_waitq);
+
-+/* Flags in the HELLO message */
-+#define HELLO_FLAG_MASTER 1
-+#define HELLO_FLAG_QUORATE 2
++/* Wait for thread to exit properly */
++struct completion cluster_thread_comp;
++struct completion member_thread_comp;
+
-+/* Parameters for RECONFIG command */
-+#define RECONFIG_PARAM_EXPECTED_VOTES 1
-+#define RECONFIG_PARAM_NODE_VOTES 2
-+#define RECONFIG_PARAM_CONFIG_VERSION 3
++/* The resend delay to use, We increase this geometrically(word?) each time a
++ * send is delayed. in deci-seconds */
++static int resend_delay = 1;
+
-+/* Data associated with an outgoing socket */
-+struct cl_socket {
-+ struct file *file; /* The real file */
-+ struct socket *socket; /* The real sock */
-+ int num_nodes; /* On this link */
-+ int retransmit_count;
-+};
++/* Highest numbered interface and the current default */
++static int num_interfaces;
++static struct cl_comms_socket *current_interface = NULL;
+
-+/* There's one of these for each node in the cluster */
-+struct cluster_node {
++struct temp_node
++{
++ int nodeid;
++ char addr[sizeof(struct sockaddr_in6)];
++ int addrlen;
+ struct list_head list;
-+ char *name; /* Node/host name of node */
-+ struct list_head addr_list;
-+ int us; /* This node is us */
-+ unsigned int node_id; /* Unique node ID */
-+ nodestate_t state;
-+ unsigned short last_seq_recv;
-+ unsigned short last_seq_acked;
-+ unsigned short last_seq_sent;
-+ unsigned int votes;
-+ unsigned int expected_votes;
-+ unsigned int leave_reason;
-+ unsigned int incarnation; /* Incremented each time a node joins
-+ * the cluster */
-+ unsigned long last_hello; /* Jiffies */
+};
++static struct list_head tempnode_list;
++static struct semaphore tempnode_lock;
+
-+/* This is how we keep a list of user processes that are listening for cluster
-+ * membership events */
-+struct notify_struct {
-+ struct list_head list;
-+ pid_t pid;
-+ int signal;
-+};
+
-+/* This is how we keep a list of kernel callbacks that are registered for
-+ * cluster membership events */
-+struct kernel_notify_struct {
-+ struct list_head list;
-+ void (*callback) (kcl_callback_reason, long arg);
++/* This is what's squirrelled away in skb->cb */
++struct cb_info
++{
++ int orig_nodeid;
++ char orig_port;
++ char oob;
+};
+
-+/* A message waiting to be sent */
-+struct queued_message {
-+ struct list_head list;
+
-+ struct socket *socket;
-+ struct sockaddr_cl addr;
-+ int addr_len;
-+ int msg_len;
-+ unsigned char port;
-+ unsigned int flags;
-+ char msg_buffer[MAX_CLUSTER_MESSAGE];
-+};
++/* Wake up any processes that are waiting to send. This is usually called when
++ * all the ACKs have been gathered up or when a node has left the cluster
++ * unexpectedly and we reckon there are no more acks to collect */
++static void unjam(void)
++{
++ wake_up_interruptible(&socket_waitq);
++ wake_up_interruptible(&cnxman_waitq);
++}
+
-+/* A barrier */
-+struct cl_barrier {
-+ struct list_head list;
++/* Used by the data_ready routine to locate a connection given the socket */
++static inline struct cl_comms_socket *find_comms_by_sock(struct sock *sk)
++{
++ struct list_head *conlist;
+
-+ char name[MAX_BARRIER_NAME_LEN];
-+ unsigned int flags;
-+ enum { BARRIER_STATE_WAITING, BARRIER_STATE_INACTIVE,
-+ BARRIER_STATE_COMPLETE } state;
-+ unsigned int expected_nodes;
-+ unsigned int registered_nodes;
-+ atomic_t got_nodes;
-+ atomic_t completed_nodes;
-+ unsigned int inuse;
-+ unsigned int waitsent;
-+ unsigned int phase; /* Completion phase */
-+ unsigned int endreason; /* Reason we were woken, usually 0 */
-+ unsigned long timeout; /* In seconds */
++ list_for_each(conlist, &socket_list) {
++ struct cl_comms_socket *clsock =
++ list_entry(conlist, struct cl_comms_socket, list);
++ if (clsock->sock->sk == sk) {
++ return clsock;
++ }
++ }
++ return NULL;
++}
+
-+ void (*callback) (char *name, int status);
-+ wait_queue_head_t waitq;
-+ struct semaphore lock; /* To synch with cnxman messages */
-+ spinlock_t phase2_spinlock; /* Need to synchronise with timer
-+ * interrupts */
-+ struct timer_list timer;
-+};
++/* Data available on socket */
++static void cnxman_data_ready(struct sock *sk, int count_unused)
++{
++ struct cl_comms_socket *clsock = find_comms_by_sock(sk);
+
-+/* Cluster protocol commands sent to port 0 */
-+#define CLUSTER_CMD_ACK 1
-+#define CLUSTER_CMD_LISTENREQ 2
-+#define CLUSTER_CMD_LISTENRESP 3
-+#define CLUSTER_CMD_PORTCLOSED 4
-+#define CLUSTER_CMD_BARRIER 5
++ if (clsock == NULL) /* ASSERT ?? */
++ return;
+
-+extern struct cluster_node *find_node_by_addr(unsigned char *addr,
-+ int addr_len);
-+extern struct cluster_node *find_node_by_nodeid(unsigned int id);
-+extern struct cluster_node *find_node_by_name(char *name);
-+extern void set_quorate(int);
-+extern void notify_kernel_listeners(kcl_callback_reason reason, long arg);
-+extern void notify_listeners(void);
-+extern void free_nodeid_array(void);
-+extern int send_reconfigure(int param, unsigned int value);
-+extern int calculate_quorum(int, int, int *);
-+extern void recalculate_quorum(int);
-+extern int send_leave(unsigned char);
-+extern int get_quorum(void);
-+extern void set_votes(int, int);
-+extern void kcl_wait_for_all_acks(void);
-+extern char *membership_state(char *, int);
-+extern void a_node_just_died(struct cluster_node *node);
-+extern void check_barrier_returns(void);
-+extern int in_transition(void);
-+extern void get_local_addresses(struct cluster_node *node);
-+extern int add_node_address(struct cluster_node *node, unsigned char *addr, int len);
-+extern void create_proc_entries(void);
-+extern void cleanup_proc_entries(void);
-+extern unsigned int get_highest_nodeid(void);
-+extern int allocate_nodeid_array(void);
-+extern void queue_oob_skb(struct socket *sock, int cmd);
-+extern int new_temp_nodeid(char *addr, int addrlen);
-+extern int get_addr_from_temp_nodeid(int nodeid, char *addr, int *addrlen);
-+extern void purge_temp_nodeids(void);
-+extern inline char *print_addr(unsigned char *addr, int len, char *buf)
-+{
-+ int i;
-+ int ptr = 0;
++ /* If we're already on the list then don't do it again */
++ if (test_and_set_bit(1, &clsock->active))
++ return;
+
-+ for (i = 0; i < len; i++)
-+ ptr += sprintf(buf + ptr, "%02x ", addr[i]);
++ spin_lock_irq(&active_socket_lock);
++ list_add(&clsock->active_list, &active_socket_list);
++ spin_unlock_irq(&active_socket_lock);
+
-+ return buf;
++ wake_up_interruptible(&cnxman_waitq);
+}
+
-+#define MAX_ADDR_PRINTED_LEN (address_length*3 + 1)
++static int receive_message(struct cl_comms_socket *csock, char *iobuf)
++{
++ struct msghdr msg;
++ struct iovec iov;
++ struct sockaddr_in6 sin;
++ int len;
++ mm_segment_t fs;
+
-+/* Debug enabling macros. Sorry about the C++ comments but they're easier to
-+ * get rid of than C ones... */
++ memset(&sin, 0, sizeof (sin));
+
-+// #define DEBUG_MEMB
-+// #define DEBUG_COMMS
-+// #define DEBUG_BARRIER
++ msg.msg_control = NULL;
++ msg.msg_controllen = 0;
++ msg.msg_iovlen = 1;
++ msg.msg_iov = &iov;
++ msg.msg_name = &sin;
++ msg.msg_namelen = sizeof (sin);
++ msg.msg_flags = 0;
+
-+/* Debug macros */
-+#ifdef DEBUG_COMMS
-+#define P_COMMS(fmt, args...) printk(KERN_DEBUG "cman comms: " fmt, ## args)
-+#else
-+#define P_COMMS(fmt, args...)
-+#endif
++ iov.iov_len = MAX_CLUSTER_MESSAGE;
++ iov.iov_base = iobuf;
+
-+#ifdef DEBUG_BARRIER
-+#define P_BARRIER(fmt, args...) printk(KERN_DEBUG "cman barrier: " fmt, ## args)
-+#else
-+#define P_BARRIER(fmt, args...)
-+#endif
++ fs = get_fs();
++ set_fs(get_ds());
+
-+#ifdef DEBUG_MEMB
-+#define P_MEMB(fmt, args...) printk(KERN_DEBUG "cman memb: " fmt, ## args)
-+#define C_MEMB(fmt, args...) printk(fmt, ## args)
-+#else
-+#define P_MEMB(fmt, args...)
-+#define C_MEMB(fmt, args...)
-+#endif
++ len = sock_recvmsg(csock->sock, &msg, MAX_CLUSTER_MESSAGE, MSG_DONTWAIT);
++ set_fs(fs);
+
-+#endif /* __KERNEL */
++ iov.iov_base = iobuf;
+
-+#endif
-diff -urN linux-orig/cluster/cman/cnxman.c linux-patched/cluster/cman/cnxman.c
---- linux-orig/cluster/cman/cnxman.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/cnxman.c 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,4124 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++ if (len > 0) {
++ if (len > MAX_CLUSTER_MESSAGE) {
++ printk(KERN_CRIT CMAN_NAME
++ ": %d byte message far too big\n", len);
++ return 0;
++ }
++ process_incoming_packet(csock, &msg, len);
++ }
++ else {
++ if (len != -EAGAIN)
++ printk(KERN_CRIT CMAN_NAME ": recvmsg failed: %d\n",
++ len);
++ }
++ return len;
++}
+
-+#define EXPORT_SYMTAB
-+#include <linux/init.h>
-+#include <linux/socket.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/file.h>
-+#include <linux/utsname.h>
-+#include <net/sock.h>
-+#include <linux/proc_fs.h>
-+#include <linux/poll.h>
-+#include <linux/module.h>
-+#include <linux/list.h>
-+#include <cluster/cnxman.h>
-+#include <cluster/service.h>
++static int cluster_kthread(void *unused)
++{
++ int len;
++ char *iobuf;
++ struct list_head *socklist;
++ struct cl_comms_socket *csock;
++ wait_queue_t cnxman_waitq_head;
++ sigset_t tmpsig;
+
-+#include "cnxman-private.h"
-+#include "sm_control.h"
-+#include "sm_user.h"
-+#include "config.h"
++ daemonize("cman_comms");
+
-+#define CMAN_RELEASE_NAME "<CVS>"
++ /* Block everything but SIGKILL/SIGSTOP/SIGTERM */
++ siginitset(&tmpsig, SIGKILL | SIGSTOP | SIGTERM);
++ sigprocmask(SIG_BLOCK, &tmpsig, NULL);
+
-+static void send_to_userport(struct cl_comms_socket *csock, char *data, int len,
-+ char *addr, int addrlen);
-+static int cl_sendack(struct cl_comms_socket *sock, unsigned short seq,
-+ int addr_len, char *addr, unsigned char remport,
-+ unsigned char flag);
-+static void send_listen_request(int nodeid, unsigned char port);
-+static void send_listen_response(struct cl_comms_socket *csock, int nodeid,
-+ unsigned char port, unsigned short tag);
-+static void resend_last_message(void);
-+static void start_ack_timer(void);
-+static int send_queued_message(struct queued_message *qmsg);
-+static void send_port_close_oob(unsigned char port);
-+static void post_close_oob(unsigned char port, int nodeid);
-+static void process_barrier_msg(struct cl_barriermsg *msg,
-+ struct cluster_node *node);
-+static struct cl_barrier *find_barrier(char *name);
-+static void node_shutdown(void);
-+static void node_cleanup(void);
-+static int send_or_queue_message(void *buf, int len, struct sockaddr_cl *caddr,
-+ unsigned int flags);
-+static struct cl_comms_socket *get_next_interface(struct cl_comms_socket *cur);
-+static void check_for_unacked_nodes(void);
-+static void free_cluster_sockets(void);
-+static uint16_t generate_cluster_id(char *name);
++ /* This is the waitq we can wake the process up with */
++ init_waitqueue_head(&cnxman_waitq);
++ init_waitqueue_entry(&cnxman_waitq_head, current);
++ add_wait_queue(&cnxman_waitq, &cnxman_waitq_head);
+
-+static int is_valid_temp_nodeid(int nodeid);
++ set_user_nice(current, -6);
+
-+extern int start_membership_services(pid_t);
-+extern int kcl_leave_cluster(int remove);
-+extern int send_kill(int nodeid);
-+
-+static struct proto_ops cl_proto_ops;
-+static struct sock *master_sock;
-+static kmem_cache_t *cluster_sk_cachep;
-+
-+/* Pointer to the pseudo node that maintains quorum in a 2node system */
-+struct cluster_node *quorum_device = NULL;
-+
-+/* Array of "ports" allocated. This is just a list of pointers to the sock that
-+ * has this port bound. Speed is a major issue here so 1-2K of allocated
-+ * storage is worth sacrificing. Port 0 is reserved for protocol messages */
-+static struct sock *port_array[256];
-+static struct semaphore port_array_lock;
-+
-+/* Our cluster name & number */
-+unsigned short cluster_id;
-+char cluster_name[MAX_CLUSTER_NAME_LEN+1];
-+
-+/* Two-node mode: causes cluster to remain quorate if one of two nodes fails.
-+ * No more than two nodes are permitted to join the cluster. */
-+unsigned short two_node;
-+
-+/* Cluster configuration version that must be the same among members. */
-+unsigned int config_version;
-+
-+/* Reference counting for cluster applications */
-+atomic_t use_count;
-+
-+/* Length of sockaddr address for our comms protocol */
-+unsigned int address_length;
-+
-+/* Message sending */
-+static unsigned short cur_seq; /* Last message sent */
-+static unsigned int ack_count; /* Number of acks received for message
-+ * 'cur_seq' */
-+static unsigned int acks_expected; /* Number of acks we expect to receive */
-+static struct semaphore send_lock;
-+static struct timer_list ack_timer;
-+
-+/* Saved packet information in case we need to resend it */
-+static char saved_msg_buffer[MAX_CLUSTER_MESSAGE];
-+static int saved_msg_len;
-+static int retry_count;
-+
-+/* Task variables */
-+static pid_t kcluster_pid;
-+static pid_t membership_pid;
-+extern int quit_threads;
-+
-+wait_queue_head_t cnxman_waitq;
-+
-+/* Variables owned by membership services */
-+extern int cluster_members;
-+extern struct list_head cluster_members_list;
-+extern struct semaphore cluster_members_lock;
-+extern int we_are_a_cluster_member;
-+extern int cluster_is_quorate;
-+extern struct cluster_node *us;
-+extern struct list_head new_dead_node_list;
-+extern struct semaphore new_dead_node_lock;
-+extern char nodename[];
-+
-+/* A list of processes listening for membership events */
-+static struct list_head event_listener_list;
-+static struct semaphore event_listener_lock;
-+
-+/* A list of kernel callbacks listening for membership events */
-+static struct list_head kernel_listener_list;
-+static struct semaphore kernel_listener_lock;
-+
-+/* A list of sockets we are listening on (and can transmit on...later) */
-+static struct list_head socket_list;
-+
-+/* A list of all open cluster client sockets */
-+static struct list_head client_socket_list;
-+static struct semaphore client_socket_lock;
-+
-+/* A list of all current barriers */
-+static struct list_head barrier_list;
-+static struct semaphore barrier_list_lock;
-+
-+/* When a socket is read for reading it goes on this queue */
-+static spinlock_t active_socket_lock;
-+static struct list_head active_socket_list;
-+
-+/* If the cnxman process is running and available for work */
-+atomic_t cnxman_running;
-+
-+/* Fkags set by timers etc for the mainloop to detect and act upon */
-+static unsigned long mainloop_flags;
-+
-+#define ACK_TIMEOUT 1
-+#define RESEND_NEEDED 2
-+
-+/* A queue of messages waiting to be sent. If kcl_sendmsg is called outside of
-+ * process context then the messages get put in here */
-+static struct list_head messages_list;
-+static struct semaphore messages_list_lock;
-+
-+static struct semaphore start_thread_sem;
-+
-+/* List of outstanding ISLISTENING requests */
-+static struct list_head listenreq_list;
-+static struct semaphore listenreq_lock;
-+
-+/* Any sending requests wait on this queue if necessary (eg inquorate, waiting
-+ * ACK) */
-+static DECLARE_WAIT_QUEUE_HEAD(socket_waitq);
-+
-+/* Wait for thread to exit properly */
-+struct completion cluster_thread_comp;
-+struct completion member_thread_comp;
-+
-+/* The resend delay to use, We increase this geometrically(word?) each time a
-+ * send is delayed. in deci-seconds */
-+static int resend_delay = 1;
-+
-+/* Highest numbered interface and the current default */
-+static int num_interfaces;
-+static struct cl_comms_socket *current_interface = NULL;
-+
-+struct temp_node
-+{
-+ int nodeid;
-+ char addr[sizeof(struct sockaddr_in6)];
-+ int addrlen;
-+ struct list_head list;
-+};
-+static struct list_head tempnode_list;
-+static struct semaphore tempnode_lock;
-+
-+/* Wake up any processes that are waiting to send. This is usually called when
-+ * all the ACKs have been gathered up or when a node has left the cluster
-+ * unexpectedly and we reckon there are no more acks to collect */
-+static void unjam(void)
-+{
-+ wake_up_interruptible(&socket_waitq);
-+ wake_up_interruptible(&cnxman_waitq);
-+}
-+
-+/* Used by the data_ready routine to locate a connection given the socket */
-+static inline struct cl_comms_socket *find_comms_by_sock(struct sock *sk)
-+{
-+ struct list_head *conlist;
-+
-+ list_for_each(conlist, &socket_list) {
-+ struct cl_comms_socket *clsock =
-+ list_entry(conlist, struct cl_comms_socket, list);
-+ if (clsock->sock->sk == sk) {
-+ return clsock;
-+ }
-+ }
-+ return NULL;
-+}
-+
-+/* Data available on socket */
-+static void cnxman_data_ready(struct sock *sk, int count_unused)
-+{
-+ struct cl_comms_socket *clsock = find_comms_by_sock(sk);
-+
-+ if (clsock == NULL) /* ASSERT ?? */
-+ return;
-+
-+ /* If we're already on the list then don't do it again */
-+ if (test_and_set_bit(1, &clsock->active))
-+ return;
-+
-+ spin_lock_irq(&active_socket_lock);
-+ list_add(&clsock->active_list, &active_socket_list);
-+ spin_unlock_irq(&active_socket_lock);
-+
-+ wake_up_interruptible(&cnxman_waitq);
-+}
-+
-+static int receive_message(struct cl_comms_socket *csock, char *iobuf)
-+{
-+ struct msghdr msg;
-+ struct iovec iov;
-+ struct sockaddr_in6 sin;
-+ int len;
-+ mm_segment_t fs;
-+
-+ memset(&sin, 0, sizeof (sin));
-+
-+ msg.msg_control = NULL;
-+ msg.msg_controllen = 0;
-+ msg.msg_iovlen = 1;
-+ msg.msg_iov = &iov;
-+ msg.msg_name = &sin;
-+ msg.msg_namelen = sizeof (sin);
-+ msg.msg_flags = 0;
-+
-+ iov.iov_len = MAX_CLUSTER_MESSAGE;
-+ iov.iov_base = iobuf;
-+
-+ fs = get_fs();
-+ set_fs(get_ds());
-+
-+ len = sock_recvmsg(csock->sock, &msg, MAX_CLUSTER_MESSAGE, MSG_DONTWAIT);
-+ set_fs(fs);
-+
-+ if (len > 0) {
-+ if (len > MAX_CLUSTER_MESSAGE) {
-+ printk(KERN_CRIT CMAN_NAME
-+ ": %d byte message far too big\n", len);
-+ return 0;
-+ }
-+ send_to_userport(csock, iobuf, len, msg.msg_name, msg.msg_namelen);
-+ }
-+ else {
-+ if (len != -EAGAIN)
-+ printk(KERN_CRIT CMAN_NAME ": recvmsg failed: %d\n",
-+ len);
-+ }
-+ return len;
-+}
-+
-+static int cluster_kthread(void *unused)
-+{
-+ int len;
-+ char *iobuf;
-+ struct list_head *socklist;
-+ struct cl_comms_socket *csock;
-+ wait_queue_t cnxman_waitq_head;
-+ sigset_t tmpsig;
-+
-+ daemonize("cman_comms");
-+
-+ /* Block everything but SIGKILL/SIGSTOP/SIGTERM */
-+ siginitset(&tmpsig, SIGKILL | SIGSTOP | SIGTERM);
-+ sigprocmask(SIG_BLOCK, &tmpsig, NULL);
-+
-+ /* This is the waitq we can wake the process up with */
-+ init_waitqueue_head(&cnxman_waitq);
-+ init_waitqueue_entry(&cnxman_waitq_head, current);
-+ add_wait_queue(&cnxman_waitq, &cnxman_waitq_head);
-+
-+ set_user_nice(current, -6);
-+
-+ /* Allow the sockets to start receiving */
-+ list_for_each(socklist, &socket_list) {
-+ csock = list_entry(socklist, struct cl_comms_socket, list);
++ /* Allow the sockets to start receiving */
++ list_for_each(socklist, &socket_list) {
++ csock = list_entry(socklist, struct cl_comms_socket, list);
+
+ clear_bit(1, &csock->active);
+ }
+ }
+ P_COMMS("closing down\n");
+
-+ if (we_are_a_cluster_member)
-+ send_leave(us->leave_reason);
-+
-+ kfree(iobuf);
+ quit_threads = 1; /* force other thread to die too */
++
++ /* Wait for membership thread to finish, that way any
++ LEAVE message will get sent. */
++ wake_up_process(membership_task);
++ wait_for_completion(&member_thread_comp);
++
+ node_shutdown();
+
+ if (timer_pending(&ack_timer))
+ del_timer(&ack_timer);
+
-+ /* Wait for membership thread to die */
-+ wait_for_completion(&member_thread_comp);
-+
+ node_cleanup();
++ kfree(iobuf);
+
+ complete(&cluster_thread_comp);
+ return 0;
+static void check_for_unacked_nodes()
+{
+ struct list_head *nodelist;
++ struct list_head *temp;
+ struct cluster_node *node;
+
+ clear_bit(RESEND_NEEDED, &mainloop_flags);
+ /* Node did not ACK a message after <n> tries, remove it from the
+ * cluster */
+ down(&cluster_members_lock);
-+ list_for_each(nodelist, &cluster_members_list) {
++ list_for_each_safe(nodelist, temp, &cluster_members_list) {
+ node = list_entry(nodelist, struct cluster_node, list);
+
-+ P_COMMS
-+ ("checking node %s: last_acked = %d, last_seq_sent = %d\n",
-+ node->name, node->last_seq_acked, node->last_seq_sent);
-+ if (node->state != NODESTATE_DEAD
-+ && node->last_seq_acked != node->last_seq_sent && !node->us) {
++ P_COMMS("checking node %s: last_acked = %d, last_seq_sent = %d\n",
++ node->name, node->last_seq_acked, node->last_seq_sent);
++ if (node->state != NODESTATE_DEAD &&
++ node->last_seq_acked != node->last_seq_sent && !node->us) {
+ printk(KERN_WARNING CMAN_NAME
+ ": node %s is not responding - removing from the cluster\n",
+ node->name);
+
-+ /* Start a state transition */
++ /* Drop this lock or we can deadlock with membership */
++ up(&cluster_members_lock);
++
++ /* Start a state transition */
+ a_node_just_died(node);
++ down(&cluster_members_lock);
+ }
+ }
+ up(&cluster_members_lock);
+ case CLUSTER_CMD_ACK:
+ ackmsg = (struct cl_ackmsg *) data;
+
-+ if (ackmsg->aflags & 1) {
++ if (rem_node && (ackmsg->aflags & 1)) {
+ if (net_ratelimit())
+ printk(KERN_INFO CMAN_NAME
+ ": WARNING no listener for port %d on node %s\n",
+ listenmsg =
+ (struct cl_listenmsg *) (data +
+ sizeof (struct cl_protheader));
-+ cl_sendack(csock, header->seq, addrlen, addr, header->port, 0);
++ cl_sendack(csock, header->seq, addrlen, addr, header->tgtport, 0);
+ send_listen_response(csock, le32_to_cpu(header->srcid),
+ listenmsg->target_port, listenmsg->tag);
+ break;
+ listenmsg =
+ (struct cl_listenmsg *) (data +
+ sizeof (struct cl_protheader));
-+ cl_sendack(csock, header->seq, addrlen, addr, header->port, 0);
++ cl_sendack(csock, header->seq, addrlen, addr, header->tgtport, 0);
+ down(&listenreq_lock);
+ listen_request = find_listen_request(listenmsg->tag);
+ if (listen_request) {
+ closemsg =
+ (struct cl_closemsg *) (data +
+ sizeof (struct cl_protheader));
-+ cl_sendack(csock, header->seq, addrlen, addr, header->port, 0);
++ cl_sendack(csock, header->seq, addrlen, addr, header->tgtport, 0);
+ post_close_oob(closemsg->port, le32_to_cpu(header->srcid));
+ break;
+
+ barriermsg =
+ (struct cl_barriermsg *) (data +
+ sizeof (struct cl_protheader));
-+ cl_sendack(csock, header->seq, addrlen, addr, header->port, 0);
-+ process_barrier_msg(barriermsg, rem_node);
++ cl_sendack(csock, header->seq, addrlen, addr, header->tgtport, 0);
++ if (rem_node)
++ process_barrier_msg(barriermsg, rem_node);
+ break;
+
+ default:
+ return 0; /* FALSE */
+}
+
-+static void send_to_userport(struct cl_comms_socket *csock, char *data, int len,
-+ char *addr, int addrlen)
++/* TODO use kvec */
++static void memcpy_fromkvec(void *data, struct iovec *iov, int len)
++{
++ while (len > 0) {
++ if (iov->iov_len) {
++ int copy = min_t(unsigned int, len, iov->iov_len);
++ memcpy(data, iov->iov_base, copy);
++ len -= copy;
++ data += copy;
++ iov->iov_base += copy;
++ iov->iov_len -= copy;
++ }
++ iov++;
++ }
++}
++
++static int send_to_user_port(struct cl_comms_socket *csock,
++ struct cl_protheader *header,
++ struct msghdr *msg, struct iovec *iov,
++ int len)
+{
++ struct sk_buff *skb;
++ struct cb_info *cbinfo;
+ int err;
++
++ /* Get the port number and look for a listener */
++ down(&port_array_lock);
++ if (port_array[header->tgtport]) {
++ struct cluster_sock *c = cluster_sk(port_array[header->tgtport]);
++
++ /* ACK it */
++ if (!(header->flags & MSG_NOACK) &&
++ !(header->flags & MSG_REPLYEXP)) {
++
++ cl_sendack(csock, header->seq, msg->msg_namelen,
++ msg->msg_name, header->tgtport, 0);
++ }
++
++ /* Call a callback if there is one */
++ if (c->kernel_callback) {
++ up(&port_array_lock);
++ if (msg->msg_iovlen == 1) {
++ c->kernel_callback(iov->iov_base,
++ iov->iov_len,
++ msg->msg_name, msg->msg_namelen,
++ le32_to_cpu(header->srcid));
++
++ }
++ else { /* Unroll iov, this Hardly ever Happens */
++ char *data;
++ data = kmalloc(len, GFP_KERNEL);
++ if (!data)
++ return -ENOMEM;
++
++ memcpy_fromkvec(data, iov, len);
++ c->kernel_callback(data, len,
++ msg->msg_name, msg->msg_namelen,
++ le32_to_cpu(header->srcid));
++ kfree(data);
++ }
++ return len;
++ }
++
++ /* Otherwise put it into an SKB and pass it onto the recvmsg
++ * mechanism */
++ skb = alloc_skb(len, GFP_KERNEL);
++ if (!skb) {
++ up(&port_array_lock);
++ printk(KERN_INFO CMAN_NAME
++ ": Failed to allocate skb\n");
++ return -ENOMEM;
++ }
++
++ skb_put(skb, len);
++ memcpy_fromkvec(skb->data, iov, len);
++
++ /* Put metadata into cb[] */
++ cbinfo = (struct cb_info *)skb->cb;
++ cbinfo->orig_nodeid = le32_to_cpu(header->srcid);
++ cbinfo->orig_port = header->srcport;
++ cbinfo->oob = 0;
++
++ if ((err =
++ sock_queue_rcv_skb(port_array[header->tgtport], skb)) < 0) {
++
++ printk(KERN_INFO CMAN_NAME
++ ": Error queueing request to port %d: %d\n",
++ header->tgtport, err);
++ kfree_skb(skb);
++
++ /* If the port was MEMBERSHIP then we have to die */
++ if (header->tgtport == CLUSTER_PORT_MEMBERSHIP) {
++ up(&port_array_lock);
++ send_leave(CLUSTER_LEAVEFLAG_PANIC);
++ panic("membership stopped responding");
++ }
++ }
++ up(&port_array_lock);
++
++ }
++ else {
++ /* ACK it, but set the flag bit so remote end knows no-one
++ * caught it */
++ if (!(header->flags & MSG_NOACK))
++ cl_sendack(csock, header->seq,
++ msg->msg_namelen, msg->msg_name,
++ header->tgtport, 1);
++
++ /* Nobody listening, drop it */
++ up(&port_array_lock);
++ }
++ return len;
++}
++
++/* NOTE: This routine knows (assumes!) that there is only one
++ iov element passed into it. */
++static void process_incoming_packet(struct cl_comms_socket *csock,
++ struct msghdr *msg, int len)
++{
++ char *data = msg->msg_iov->iov_base;
++ char *addr = msg->msg_name;
++ int addrlen = msg->msg_namelen;
+ struct cl_protheader *header = (struct cl_protheader *) data;
+ struct cluster_node *rem_node =
-+ find_node_by_nodeid(le32_to_cpu(header->srcid));
-+ struct sk_buff *skb = NULL;
++ find_node_by_nodeid(le32_to_cpu(header->srcid));
+
-+ P_COMMS
-+ ("seen message, from %d for %d, sequence num = %d, rem_node=%p, state=%d\n",
++ P_COMMS("seen message, from %d for %d, sequence num = %d, rem_node=%p, state=%d\n",
+ le32_to_cpu(header->srcid), le32_to_cpu(header->tgtid),
+ le16_to_cpu(header->seq), rem_node,
+ rem_node ? rem_node->state : -1);
+ if (le16_to_cpu(header->cluster) != cluster_id) {
+ P_COMMS("Dumping message - wrong cluster ID (us=%d, msg=%d)\n",
+ cluster_id, header->cluster);
-+ goto userport_finish;
++ goto incoming_finish;
+ }
+
+ /* If the message is from us then just dump it */
+ if (rem_node && rem_node->us)
-+ goto userport_finish;
++ goto incoming_finish;
+
+ /* If we can't find the nodeid then check for our own messages the hard
+ * way - this only happens during joining */
+ if (clsock->recv_only) {
+
+ if (memcmp(addr, &clsock->saddr, address_length) == 0) {
-+ goto userport_finish;
++ goto incoming_finish;
+ }
+ }
+ }
+ /* Ignore messages not for us */
+ if (le32_to_cpu(header->tgtid) > 0 && us
+ && le32_to_cpu(header->tgtid) != us->node_id) {
-+ goto userport_finish;
++ goto incoming_finish;
+ }
+
+ P_COMMS("got message, from %d for %d, sequence num = %d\n",
+
+ /* Have we received this message before ? If so just ignore it, it's a
+ * resend for someone else's benefit */
-+ if (!(header->flags & (MSG_NOACK >> 16)) &&
++ if (!(header->flags & MSG_NOACK) &&
+ rem_node && le16_to_cpu(header->seq) == rem_node->last_seq_recv) {
+ P_COMMS
+ ("Discarding message - Already seen this sequence number %d\n",
+ rem_node->last_seq_recv);
+ /* Still need to ACK it though, in case it was the ACK that got
+ * lost */
-+ cl_sendack(csock, header->seq, addrlen, addr, header->port, 0);
-+ goto userport_finish;
++ cl_sendack(csock, header->seq, addrlen, addr, header->tgtport, 0);
++ goto incoming_finish;
+ }
+
+ /* Check that the message is from the node we think it is from */
+ header->srcid = cpu_to_le32(new_temp_nodeid(addr, addrlen));
+
+ P_COMMS("Got message: flags = %x, port = %d, we_are_a_member = %d\n",
-+ header->flags, header->port, we_are_a_cluster_member);
++ header->flags, header->tgtport, we_are_a_cluster_member);
+
+
+ /* If we are not part of the cluster then ignore multicast messages
+ * that need an ACK as we will confuse the sender who is only expecting
+ * ACKS from bona fide members */
-+ if (header->flags & (MSG_MULTICAST >> 16) &&
-+ !(header->flags & (MSG_NOACK >> 16)) && !we_are_a_cluster_member) {
++ if ((header->flags & MSG_MULTICAST) &&
++ !(header->flags & MSG_NOACK) && !we_are_a_cluster_member) {
+ P_COMMS
+ ("Discarding message - multicast and we are not a cluster member. port=%d flags=%x\n",
-+ header->port, header->flags);
-+ goto userport_finish;
++ header->tgtport, header->flags);
++ goto incoming_finish;
+ }
+
+ /* Save the sequence number of this message so we can ignore duplicates
+ * (above) */
-+ if (!(header->flags & (MSG_NOACK >> 16)) && rem_node) {
++ if (!(header->flags & MSG_NOACK) && rem_node) {
+ P_COMMS("Saving seq %d for node %s\n", le16_to_cpu(header->seq),
+ rem_node->name);
+ rem_node->last_seq_recv = le16_to_cpu(header->seq);
+ }
+
+ /* Is it a protocol message? */
-+ if (header->port == 0) {
++ if (header->tgtport == 0) {
+ process_cnxman_message(csock, data, len, addr, addrlen,
+ rem_node);
-+ goto userport_finish;
++ goto incoming_finish;
+ }
+
+ /* Skip past the header to the data */
-+ data += sizeof (struct cl_protheader);
++ msg->msg_iov[0].iov_base = data + sizeof (struct cl_protheader);
++ msg->msg_iov[0].iov_len -= sizeof (struct cl_protheader);
+ len -= sizeof (struct cl_protheader);
+
-+ /* Get the port number and look for a listener */
-+ down(&port_array_lock);
-+ if (port_array[header->port]) {
-+ int native_srcid;
-+ struct cluster_sock *c = cluster_sk(port_array[header->port]);
-+
-+ /* ACK it */
-+ if (!(header->flags & (MSG_NOACK >> 16)) &&
-+ !(header->flags & (MSG_REPLYEXP >> 16))) {
-+
-+ cl_sendack(csock, header->seq, addrlen, addr,
-+ header->port, 0);
-+ }
-+
-+ /* Call a callback if there is one */
-+ if (c->kernel_callback) {
-+ up(&port_array_lock);
-+ c->kernel_callback(data, len, addr, addrlen,
-+ le32_to_cpu(header->srcid));
-+ goto userport_finish;
-+ }
-+
-+ /* Otherwise put it into an SKB and pass it onto the recvmsg
-+ * mechanism */
-+ skb = alloc_skb(len, GFP_KERNEL);
-+ if (!skb) {
-+ up(&port_array_lock);
-+ printk(KERN_INFO CMAN_NAME
-+ ": Failed to allocate skb\n");
-+ return;
-+ }
-+
-+ skb_put(skb, len);
-+ memcpy(skb->data, data, len);
-+
-+ /* Put the nodeid into cb so we can pass it to the clients */
-+ skb->cb[0] = 0; /* Clear flags */
-+ native_srcid = le32_to_cpu(header->srcid);
-+ memcpy(skb->cb + 1, &native_srcid, sizeof(int));
-+
-+ if ((err =
-+ sock_queue_rcv_skb(port_array[header->port], skb)) < 0) {
-+
-+ printk(KERN_INFO CMAN_NAME
-+ ": Error queueing request to port %d: %d\n",
-+ header->port, err);
-+ kfree_skb(skb);
-+
-+ /* If the port was MEMBERSHIP then we have to die */
-+ if (header->port == CLUSTER_PORT_MEMBERSHIP) {
-+ up(&port_array_lock);
-+ send_leave(CLUSTER_LEAVEFLAG_PANIC);
-+ panic("membership stopped responding");
-+ }
-+ }
-+ up(&port_array_lock);
-+
-+ }
-+ else {
-+ /* ACK it, but set the flag bit so remote end knows no-one
-+ * caught it */
-+ if (!(header->flags & (MSG_NOACK >> 16)))
-+ cl_sendack(csock, header->seq, addrlen, addr,
-+ header->port, 1);
-+
-+ /* Nobody listening, drop it */
-+ up(&port_array_lock);
-+ }
++ send_to_user_port(csock, header, msg, msg->msg_iov, len);
+
-+ userport_finish:
++ incoming_finish:
+ return;
+}
+
+
+static int do_ioctl_set_nodeid(unsigned long arg)
+{
-+ // TODO
-+ return -ENOTSUPP;
++ int nodeid = (int)arg;
++
++ if (!capable(CAP_CLUSTER))
++ return -EPERM;
++ if (atomic_read(&cnxman_running))
++ return -EINVAL;
++ if (nodeid < 0 || nodeid > 4096)
++ return -EINVAL;
++
++ wanted_nodeid = (int)arg;
++ return 0;
+}
+
+static int do_ioctl_join_cluster(unsigned long arg)
+ wake_up_interruptible(&cnxman_waitq);
+
+ wait_for_completion(&cluster_thread_comp);
++ atomic_set(&use_count, 0);
+ return 0;
+}
+
+ int result;
+ struct iovec save_vectors[msg->msg_iovlen];
+
-+ /* Save a copy of the IO vectors as send_msg mucks around with them and
++ /* Save a copy of the IO vectors as sendmsg mucks around with them and
+ * we may want to send the same stuff out more than once (for different
+ * interfaces)
+ */
+{
+ struct sock *sk = sock->sk;
+ struct sockaddr_cl *sin = (struct sockaddr_cl *) msg->msg_name;
-+ struct cluster_sock *c = cluster_sk(sk);
+ struct sk_buff *skb;
++ struct cb_info *cbinfo;
+ int copied, err = 0;
-+ int isoob = 0;
+
+ /* Socket was notified of shutdown, remove any pending skbs and return
+ * EOF */
+ if (!skb)
+ goto out;
+
-+ /* Is it OOB */
-+ if (skb->cb[0] & 0x80)
-+ isoob = 1;
-+ else
-+ isoob = 0;
++ cbinfo = (struct cb_info *)skb->cb;
+
-+ /* If it is and the user doesn't want it, then throw it away. */
-+ if (isoob && !(flags & MSG_OOB)) {
++ /* If it is OOB and the user doesn't want it, then throw it away. */
++ if (cbinfo->oob && !(flags & MSG_OOB)) {
+ skb_free_datagram(sk, skb);
+
+ /* If we peeked (?) an OOB but the user doesn't want it
+ }
+ }
+ }
-+ while (isoob && !(flags & MSG_OOB));
++ while (cbinfo->oob && !(flags & MSG_OOB));
+
+ copied = skb->len;
+ if (copied > size) {
+
+ /* Nodeid is in native byte order - anything else is just
+ * perverse */
-+ memcpy(&sin->scl_nodeid, skb->cb + 1, sizeof(int));
++ sin->scl_nodeid = cbinfo->orig_nodeid;
+ }
+ msg->msg_namelen = sizeof (struct sockaddr_cl);
-+ sin->scl_port = c->port;
++ sin->scl_port = cbinfo->orig_port;
+ }
+
-+ /* Top bit set in cb[0] means this is an OOB message */
-+ if (skb->cb[0] & 0x80) {
++ if (cbinfo->oob) {
+ msg->msg_flags |= MSG_OOB;
+ }
+
+ struct sockaddr_cl *caddr = msg->msg_name;
+ struct cl_protheader header;
+ struct iovec vectors[msg->msg_iovlen + 1];
++ unsigned char srcport;
+ int nodeid = 0;
+
+ if (size > MAX_CLUSTER_MESSAGE)
+ return -ENOTCONN;
+ }
+
++ /* If there's no sending client socket then the source
++ port is 0: "us" */
++ if (sock) {
++ struct cluster_sock *csock = cluster_sk(sock->sk);
++ srcport = csock->port;
++ }
++ else {
++ srcport = 0;
++ }
++
+ /* We can only have one send outstanding at a time so we might as well
+ * lock the whole send mechanism */
+ down(&send_lock);
+ down(&send_lock);
+ }
+
++ set_task_state(tsk, TASK_RUNNING);
++ remove_wait_queue(&socket_waitq, &wq);
++
+ /* Going down */
+ if (quit_threads) {
+ up(&send_lock);
+ return -ENOTCONN;
+ }
+
-+ set_task_state(tsk, TASK_RUNNING);
-+ remove_wait_queue(&socket_waitq, &wq);
-+
+ if (signal_pending(current)) {
+ up(&send_lock);
+ return -ERESTARTSYS;
+ memset(&our_msg, 0, sizeof (our_msg));
+
+ /* Build the header */
-+ header.port = port;
-+ header.flags = msg->msg_flags >> 16;
++ header.tgtport = port;
++ header.srcport = srcport;
++ header.flags = msg->msg_flags;
+ header.cluster = cpu_to_le16(cluster_id);
+ header.srcid = us ? cpu_to_le32(us->node_id) : 0;
+ header.tgtid = caddr ? cpu_to_le32(nodeid) : 0;
+
+ /* Set the MULTICAST flag on messages with no particular destination */
+ if (!msg->msg_namelen) {
-+ header.flags |= MSG_MULTICAST >> 16;
++ header.flags |= MSG_MULTICAST;
+ header.tgtid = 0;
+ }
+
+ our_msg.msg_iovlen = msg->msg_iovlen + 1;
+ our_msg.msg_iov = vectors;
+
-+ /* Work out how many ACKS are wanted - *don't* reset acks_expected to
++ /* Loopback shortcut */
++ if (nodeid == us->node_id && nodeid != 0) {
++
++ up(&send_lock);
++ header.flags |= MSG_NOACK; /* Don't ack it! */
++
++ return send_to_user_port(NULL, &header, msg, msg->msg_iov, size);
++ }
++
++ /* Work out how many ACKS are wanted - *don't* reset acks_expected to
+ * zero if no acks are required as an ACK-needed message may still be
+ * outstanding */
+ if (!(msg->msg_flags & MSG_NOACK)) {
+ }
+ }
+
++ /* if the client wants a broadcast message sending back to itself
++ then loop it back */
++ if (nodeid == 0 && (flags & MSG_BCASTSELF)) {
++ header.flags |= MSG_NOACK; /* Don't ack it! */
++
++ result = send_to_user_port(NULL, &header, msg, msg->msg_iov, size);
++ }
++
+ /* Save a copy of the message if we're expecting an ACK */
+ if (!(flags & MSG_NOACK) && acks_expected) {
-+ mm_segment_t fs;
+ struct cl_protheader *savhdr = (struct cl_protheader *) saved_msg_buffer;
+
-+ fs = get_fs();
-+ set_fs(get_ds());
-+
-+ memcpy_fromiovec(saved_msg_buffer, our_msg.msg_iov,
-+ size + sizeof (header));
-+ set_fs(fs);
++ memcpy_fromkvec(saved_msg_buffer, our_msg.msg_iov,
++ size + sizeof (header));
+
+ saved_msg_len = size + sizeof (header);
+ retry_count = ack_count = 0;
+
+ /* Clear the REPLYEXPected flag so we force a real ACK
+ if it's necessary to resend this packet */
-+ savhdr->flags &= ~(MSG_REPLYEXP>>16);
++ savhdr->flags &= ~MSG_REPLYEXP;
+ start_ack_timer();
+ }
+
+ return result;
+}
+
-+static int queue_message(void *buf, int len, struct sockaddr_cl *caddr,
++static int queue_message(struct socket *sock, void *buf, int len,
++ struct sockaddr_cl *caddr,
+ unsigned char port, int flags)
+{
+ struct queued_message *qmsg;
+ }
+ qmsg->flags = flags;
+ qmsg->port = port;
-+ qmsg->socket = NULL;
++ qmsg->socket = sock;
+
+ down(&messages_list_lock);
+ list_add_tail(&qmsg->list, &messages_list);
+ /* If we have no process context then queue it up for kclusterd to
+ * send. */
+ if (in_interrupt() || flags & MSG_QUEUE) {
-+ return queue_message(buf, size, caddr, port,
++ return queue_message(sock, buf, size, caddr, port,
+ flags & ~MSG_QUEUE);
+ }
+
+/* Used where we are in kclusterd context and we can't allow the task to wait
+ * as we are also responsible to processing the ACKs that do the wake up. Try
+ * to send the message immediately and queue it if that's not possible */
-+static int send_or_queue_message(void *buf, int len, struct sockaddr_cl *caddr,
++static int send_or_queue_message(struct socket *sock, void *buf, int len,
++ struct sockaddr_cl *caddr,
+ unsigned int flags)
+{
+ struct iovec iovecs[1];
+ return status;
+ }
+
-+ return queue_message(buf, len, caddr, 0, flags);
++ return queue_message(sock, buf, len, caddr, 0, flags);
+}
+
+/* Send a listen request to a node */
+ caddr.scl_port = 0;
+ caddr.scl_nodeid = nodeid;
+
-+ send_or_queue_message(&listenmsg, sizeof(listenmsg), &caddr, MSG_REPLYEXP);
++ send_or_queue_message(NULL, &listenmsg, sizeof(listenmsg), &caddr, MSG_REPLYEXP);
+ return;
+}
+
+ caddr.scl_port = 0;
+ caddr.scl_nodeid = nodeid;
+
-+ status = send_or_queue_message(&listenmsg,
++ status = send_or_queue_message(NULL, &listenmsg,
+ sizeof (listenmsg),
+ &caddr, 0);
+
+ }
+
+ /* Build the header */
-+ ackmsg.header.port = 0; /* Protocol port */
++ ackmsg.header.tgtport = 0; /* Protocol port */
++ ackmsg.header.srcport = 0;
+ ackmsg.header.seq = 0;
-+ ackmsg.header.flags = MSG_NOACK >> 16;
++ ackmsg.header.flags = MSG_NOACK;
+ ackmsg.header.cluster = cpu_to_le16(cluster_id);
+ ackmsg.header.srcid = us ? cpu_to_le32(us->node_id) : 0;
+ ackmsg.header.ack = seq; /* already in LE order */
+ closemsg.cmd = CLUSTER_CMD_PORTCLOSED;
+ closemsg.port = port;
+
-+ send_or_queue_message(&closemsg, sizeof (closemsg), NULL, 0);
++ send_or_queue_message(NULL, &closemsg, sizeof (closemsg), NULL, 0);
+ return;
+}
+
+ struct cl_portclosed_oob *oobmsg;
+ struct sk_buff *skb;
+ struct sock *sock = port_array[port];
++ struct cb_info *cbinfo;
+
+ if (!sock) {
+ return; /* No-one listening */
+ oobmsg = (struct cl_portclosed_oob *) skb->data;
+ oobmsg->port = port;
+ oobmsg->cmd = CLUSTER_OOB_MSG_PORTCLOSED;
-+ skb->cb[0] = 0x80;
-+ memcpy(skb->cb + 1, &nodeid, sizeof(int));
++
++ cbinfo = (struct cb_info *)skb->cb;
++ cbinfo->oob = 1;
++ cbinfo->orig_nodeid = nodeid;
++ cbinfo->orig_port = port;
+
+ sock_queue_rcv_skb(sock, skb);
+
+ struct cl_client_socket *csock;
+ struct sk_buff *null_skb;
+
-+ printk(KERN_INFO CMAN_NAME ": we are leaving the cluster\n");
++ if (we_are_a_cluster_member)
++ printk(KERN_INFO CMAN_NAME ": we are leaving the cluster. %s\n",
++ us->leave_reason?leave_string(us->leave_reason):"");
+
+ atomic_set(&cnxman_running, 0);
+ unjam();
+ cluster_members = 0;
+ up(&cluster_members_lock);
+
-+ /* Clean the temop node IDs list. */
++ /* Clean the temp node IDs list. */
+ down(&tempnode_lock);
+ list_for_each_entry_safe(tn, tmp, &tempnode_list, list) {
+ list_del(&tn->list);
+ kcluster_pid = 0;
+ clear_bit(RESEND_NEEDED, &mainloop_flags);
+ acks_expected = 0;
++ wanted_nodeid = 0;
+}
+
+/* If "cluster_is_quorate" is 0 then all activity apart from protected ports is
+void queue_oob_skb(struct socket *sock, int cmd)
+{
+ struct sk_buff *skb;
++ struct cb_info *cbinfo;
+ struct cl_portclosed_oob *oobmsg;
+
+ skb = alloc_skb(sizeof (*oobmsg), GFP_KERNEL);
+
+ /* There is no remote node associated with this so
+ clear out the field to avoid any accidents */
-+ memset(skb->cb, 0, sizeof(int));
-+ skb->cb[0] = 0x80;
++ cbinfo = (struct cb_info *)skb->cb;
++ cbinfo->oob = 1;
++ cbinfo->orig_nodeid = 0;
++ cbinfo->orig_port = 0;
+
+ sock_queue_rcv_skb(sock->sk, skb);
+}
+ strcpy(bmsg.name, barrier->name);
+
+ P_BARRIER("Sending COMPLETE for %s\n", barrier->name);
-+ queue_message((char *) &bmsg, sizeof (bmsg), NULL, 0, 0);
++ queue_message(NULL, (char *) &bmsg, sizeof (bmsg), NULL, 0, 0);
+ }
+}
+
+ * cnxman and COMPLETE may /just/ slide in
+ * before WAIT if its in the queue
+ */
-+ P_BARRIER("Sending WAIT for %s\n", name);
-+ status = queue_message(&bmsg, sizeof (bmsg), NULL, 0, 0);
++ P_BARRIER("Sending WAIT for %s\n", barrier->name);
++ status = queue_message(NULL, &bmsg, sizeof (bmsg), NULL, 0, 0);
+ if (status < 0) {
+ up(&barrier->lock);
+ return status;
+}
+
+
-+
-+
+/* Quorum device functions */
+int kcl_register_quorum_device(char *name, int votes)
+{
+/* Socket registration stuff */
+static struct net_proto_family cl_family_ops = {
+ .family = AF_CLUSTER,
-+ .create = cl_create
++ .create = cl_create,
++ .owner = THIS_MODULE,
+};
+
+static struct proto_ops cl_proto_ops = {
+ .recvmsg = cl_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
++ .owner = THIS_MODULE,
+};
+
+#ifdef MODULE
+ return -1;
+ }
+
-+#ifdef CONFIG_PROC_FS
-+ create_proc_entries();
-+#endif
++#ifdef CONFIG_PROC_FS
++ create_proc_entries();
++#endif
++
++ init_MUTEX(&start_thread_sem);
++ init_MUTEX(&send_lock);
++ init_MUTEX(&barrier_list_lock);
++ init_MUTEX(&cluster_members_lock);
++ init_MUTEX(&port_array_lock);
++ init_MUTEX(&messages_list_lock);
++ init_MUTEX(&listenreq_lock);
++ init_MUTEX(&client_socket_lock);
++ init_MUTEX(&new_dead_node_lock);
++ init_MUTEX(&event_listener_lock);
++ init_MUTEX(&kernel_listener_lock);
++ init_MUTEX(&tempnode_lock);
++ spin_lock_init(&active_socket_lock);
++ init_timer(&ack_timer);
++
++ INIT_LIST_HEAD(&event_listener_list);
++ INIT_LIST_HEAD(&kernel_listener_list);
++ INIT_LIST_HEAD(&socket_list);
++ INIT_LIST_HEAD(&client_socket_list);
++ INIT_LIST_HEAD(&active_socket_list);
++ INIT_LIST_HEAD(&barrier_list);
++ INIT_LIST_HEAD(&messages_list);
++ INIT_LIST_HEAD(&listenreq_list);
++ INIT_LIST_HEAD(&cluster_members_list);
++ INIT_LIST_HEAD(&new_dead_node_list);
++ INIT_LIST_HEAD(&tempnode_list);
++
++ atomic_set(&cnxman_running, 0);
++
++ sm_init();
++
++ return 0;
++}
++
++static void __exit cluster_exit(void)
++{
++#ifdef CONFIG_PROC_FS
++ cleanup_proc_entries();
++#endif
++
++ sock_unregister(AF_CLUSTER);
++ kmem_cache_destroy(cluster_sk_cachep);
++}
++
++module_init(cluster_init);
++module_exit(cluster_exit);
++
++EXPORT_SYMBOL(kcl_sendmsg);
++EXPORT_SYMBOL(kcl_register_read_callback);
++EXPORT_SYMBOL(kcl_add_callback);
++EXPORT_SYMBOL(kcl_remove_callback);
++EXPORT_SYMBOL(kcl_get_members);
++EXPORT_SYMBOL(kcl_get_member_ids);
++EXPORT_SYMBOL(kcl_get_all_members);
++EXPORT_SYMBOL(kcl_is_quorate);
++EXPORT_SYMBOL(kcl_get_node_by_addr);
++EXPORT_SYMBOL(kcl_get_node_by_name);
++EXPORT_SYMBOL(kcl_get_node_by_nodeid);
++EXPORT_SYMBOL(kcl_get_node_addresses);
++EXPORT_SYMBOL(kcl_addref_cluster);
++EXPORT_SYMBOL(kcl_releaseref_cluster);
++EXPORT_SYMBOL(kcl_cluster_name);
++
++EXPORT_SYMBOL(kcl_barrier_register);
++EXPORT_SYMBOL(kcl_barrier_setattr);
++EXPORT_SYMBOL(kcl_barrier_delete);
++EXPORT_SYMBOL(kcl_barrier_wait);
++EXPORT_SYMBOL(kcl_barrier_cancel);
++
++EXPORT_SYMBOL(kcl_register_quorum_device);
++EXPORT_SYMBOL(kcl_unregister_quorum_device);
++EXPORT_SYMBOL(kcl_quorum_device_available);
++
++EXPORT_SYMBOL(kcl_register_service);
++EXPORT_SYMBOL(kcl_unregister_service);
++EXPORT_SYMBOL(kcl_join_service);
++EXPORT_SYMBOL(kcl_leave_service);
++EXPORT_SYMBOL(kcl_global_service_id);
++EXPORT_SYMBOL(kcl_start_done);
++EXPORT_SYMBOL(kcl_get_services);
++EXPORT_SYMBOL(kcl_get_current_interface);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -urN linux-orig/cluster/cman/cnxman-private.h linux-patched/cluster/cman/cnxman-private.h
+--- linux-orig/cluster/cman/cnxman-private.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/cnxman-private.h 2004-10-22 11:04:52.654551302 -0500
+@@ -0,0 +1,432 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __CNXMAN_PRIVATE_H
++#define __CNXMAN_PRIVATE_H
++
++/* Version triplet */
++#define CNXMAN_MAJOR_VERSION 3
++#define CNXMAN_MINOR_VERSION 0
++#define CNXMAN_PATCH_VERSION 1
++
++#define MAX_RETRIES 3 /* Maximum number of send retries */
++#define CAP_CLUSTER CAP_SYS_ADMIN /* Capability needed to manage the
++ * cluster */
++#ifdef __KERNEL__
++
++/* How we announce ourself in console events */
++#define CMAN_NAME "CMAN"
++
++/* One of these per AF_CLUSTER socket */
++struct cluster_sock {
++ /* WARNING: sk has to be the first member */
++ struct sock sk;
++
++ unsigned char port; /* Bound port or zero */
++ int (*kernel_callback) (char *, int, char *, int, unsigned int);
++ void *service_data;
++};
++
++#define cluster_sk(__sk) ((struct cluster_sock *)__sk)
++
++/* We have one of these for each socket we use for communications */
++struct cl_comms_socket {
++ struct socket *sock;
++ int broadcast; /* This is a broadcast socket */
++ int recv_only; /* This is the unicast receive end of a
++ * multicast socket */
++ struct sockaddr_in6 saddr; /* Socket address, contains the sockaddr for
++ * the remote end(s) */
++ int addr_len; /* Length of above */
++ int number; /* Internal socket number, used to cycle around
++ * sockets in case of network errors */
++ struct file *file; /* file pointer for user-passed in sockets */
++
++ wait_queue_t wait;
++
++ /* The socket list */
++ struct list_head list;
++
++ /* On here when it has something to say */
++ struct list_head active_list;
++ unsigned long active;
++};
++
++/* A client socket. We keep a list of these so we can notify clients of cluster
++ * events */
++struct cl_client_socket {
++ struct socket *sock;
++ struct list_head list;
++};
++
++/* This structure is tacked onto the start of a cluster message packet for our
++ * own nefarious purposes. */
++struct cl_protheader {
++ unsigned char tgtport; /* Target port number */
++ unsigned char srcport; /* Source (originationg) port number */
++ unsigned short seq; /* Packet sequence number, little-endian */
++ unsigned short ack; /* Inline ACK */
++ unsigned short cluster; /* Our cluster number, little-endian */
++ unsigned int flags;
++ int srcid; /* Node ID of the sender */
++ int tgtid; /* Node ID of the target or 0 for multicast
++ * messages */
++};
++
++/* A cluster internal protocol message - port number 0 */
++struct cl_protmsg {
++ struct cl_protheader header;
++ unsigned char cmd;
++};
++
++/* A Cluster ACK message */
++struct cl_ackmsg {
++ struct cl_protheader header;
++ unsigned char cmd; /* Always CLUSTER_CMD_ACK */
++ unsigned char remport; /* Remote port number the original message was
++ * for */
++ unsigned char aflags; /* ACK flags 0=OK, 1=No listener */
++ unsigned char pad;
++};
++
++/* A Cluster LISTENREQ/LISTENRESP message */
++struct cl_listenmsg {
++ unsigned char cmd; /* CLUSTER_CMD_LISTENRESP/REQ */
++ unsigned char target_port; /* Port to probe */
++ unsigned char listening; /* Always 0 for LISTENREQ */
++ unsigned char pad;
++ unsigned short tag; /* PID of remote waiting process */
++};
++
++/* A Cluster PORTCLOSED message */
++struct cl_closemsg {
++ unsigned char cmd; /* CLUSTER_CMD_PORTCLOSED */
++ unsigned char port;
++};
++
++/* Structure of a newly dead node, passed from cnxman to kmembershipd */
++struct cl_new_dead_node {
++ struct list_head list;
++ struct cluster_node *node;
++};
++
++/* Subcommands for BARRIER message */
++#define BARRIER_REGISTER 1
++#define BARRIER_CHANGE 2
++#define BARRIER_WAIT 4
++#define BARRIER_COMPLETE 5
++
++/* A Cluster BARRIER message */
++struct cl_barriermsg {
++ unsigned char cmd; /* CLUSTER_CMD_BARRIER */
++ unsigned char subcmd; /* BARRIER sub command */
++ unsigned short pad;
++ unsigned int flags;
++ unsigned int nodes;
++ char name[MAX_BARRIER_NAME_LEN];
++};
++
++/* Membership services messages, the cl_protheader is added transparently */
++struct cl_mem_hello_msg {
++ unsigned char cmd;
++ unsigned char flags;
++ unsigned short members; /* Number of nodes in the cluster,
++ * little-endian */
++ unsigned int generation; /* Current cluster generation number */
++};
++
++struct cl_mem_endtrans_msg {
++ unsigned char cmd;
++ unsigned char pad1;
++ unsigned short pad2;
++ unsigned int quorum;
++ unsigned int total_votes;
++ unsigned int generation; /* Current cluster generation number */
++ unsigned int new_node_id; /* If reason is a new node joining */
++};
++
++/* ACK types for JOINACK message */
++#define JOINACK_TYPE_OK 1 /* You can join */
++#define JOINACK_TYPE_NAK 2 /* You can NOT join */
++#define JOINACK_TYPE_WAIT 3 /* Wait a bit longer - cluster is in transition
++ * already */
++
++struct cl_mem_joinack_msg {
++ unsigned char cmd;
++ unsigned char acktype;
++};
++
++/* This is used by JOINREQ message */
++struct cl_mem_join_msg {
++ unsigned char cmd;
++ unsigned char votes;
++ unsigned short num_addr; /* Number of addresses for this node */
++ unsigned int expected_votes;
++ unsigned int nodeid; /* node ID we want */
++ unsigned int major_version; /* Not backwards compatible */
++ unsigned int minor_version; /* Backwards compatible */
++ unsigned int patch_version; /* Backwards/forwards compatible */
++ unsigned int config_version;
++ unsigned int addr_len; /* length of node addresses */
++ char clustername[16];
++ /* Followed by <num_addr> addresses of `address_length` bytes and a
++ * NUL-terminated node name */
++};
++
++/* State transition start reasons: */
++#define TRANS_NEWNODE 1 /* A new node is joining the cluster */
++#define TRANS_REMNODE 2 /* a node has left the cluster */
++#define TRANS_ANOTHERREMNODE 3 /* A node left the cluster while we were in
++ * transition */
++#define TRANS_NEWMASTER 4 /* We have had an election and I am the new
++ * master */
++#define TRANS_CHECK 5 /* A consistency check was called for */
++#define TRANS_RESTART 6 /* Transition restarted because of a previous
++ * timeout */
++#define TRANS_DEADMASTER 7 /* The master died during transition and I have
++ * taken over */
++
++/* This is used to start a state transition */
++struct cl_mem_starttrans_msg {
++ unsigned char cmd;
++ unsigned char reason; /* Why a start transition was started - see
++ * above */
++ unsigned char flags;
++ unsigned char votes;
++ unsigned int expected_votes;
++ unsigned int generation; /* Incremented for each STARTTRANS sent
++ */
++ int nodeid; /* Node to be removed */
++ unsigned short num_addrs;
++ /* If reason == TRANS_NEWNODE: Followed by <num_addr> addresses of
++ * `address_length` bytes and a NUL-terminated node name */
++};
++
++struct cl_mem_startack_msg {
++ unsigned char cmd;
++ unsigned char reason;
++ unsigned short pad;
++ unsigned int generation;
++ unsigned int node_id; /* node_id we think new node should have */
++ unsigned int highest_node_id; /* highest node_id on this system */
++};
++
++/* Reconfigure a cluster parameter */
++struct cl_mem_reconfig_msg {
++ unsigned char cmd;
++ unsigned char param;
++ unsigned short pad;
++ unsigned int value;
++};
++
++/* Structure containing information about an outstanding listen request */
++struct cl_waiting_listen_request {
++ wait_queue_head_t waitq;
++ int result;
++ int waiting;
++ unsigned short tag;
++ int nodeid;
++ struct list_head list;
++};
++
++/* Messages from membership services */
++#define CLUSTER_MEM_JOINCONF 1
++#define CLUSTER_MEM_JOINREQ 2
++#define CLUSTER_MEM_LEAVE 3
++#define CLUSTER_MEM_HELLO 4
++#define CLUSTER_MEM_KILL 5
++#define CLUSTER_MEM_JOINACK 6
++#define CLUSTER_MEM_ENDTRANS 7
++#define CLUSTER_MEM_RECONFIG 8
++#define CLUSTER_MEM_MASTERVIEW 9
++#define CLUSTER_MEM_STARTTRANS 10
++#define CLUSTER_MEM_JOINREJ 11
++#define CLUSTER_MEM_VIEWACK 12
++#define CLUSTER_MEM_STARTACK 13
++#define CLUSTER_MEM_TRANSITION 14
++#define CLUSTER_MEM_NEWCLUSTER 15
++#define CLUSTER_MEM_CONFACK 16
++#define CLUSTER_MEM_NOMINATE 17
++
++/* Flags in the HELLO message */
++#define HELLO_FLAG_MASTER 1
++#define HELLO_FLAG_QUORATE 2
++
++/* Parameters for RECONFIG command */
++#define RECONFIG_PARAM_EXPECTED_VOTES 1
++#define RECONFIG_PARAM_NODE_VOTES 2
++#define RECONFIG_PARAM_CONFIG_VERSION 3
++
++/* Data associated with an outgoing socket */
++struct cl_socket {
++ struct file *file; /* The real file */
++ struct socket *socket; /* The real sock */
++ int num_nodes; /* On this link */
++ int retransmit_count;
++};
++
++/* There's one of these for each node in the cluster */
++struct cluster_node {
++ struct list_head list;
++ char *name; /* Node/host name of node */
++ struct list_head addr_list;
++ int us; /* This node is us */
++ unsigned int node_id; /* Unique node ID */
++ nodestate_t state;
++ unsigned short last_seq_recv;
++ unsigned short last_seq_acked;
++ unsigned short last_seq_sent;
++ unsigned int votes;
++ unsigned int expected_votes;
++ unsigned int leave_reason;
++ unsigned int incarnation; /* Incremented each time a node joins
++ * the cluster */
++ unsigned long last_hello; /* Jiffies */
++ struct timeval join_time;
++};
++
++/* This is how we keep a list of user processes that are listening for cluster
++ * membership events */
++struct notify_struct {
++ struct list_head list;
++ pid_t pid;
++ int signal;
++};
++
++/* This is how we keep a list of kernel callbacks that are registered for
++ * cluster membership events */
++struct kernel_notify_struct {
++ struct list_head list;
++ void (*callback) (kcl_callback_reason, long arg);
++};
++
++/* A message waiting to be sent */
++struct queued_message {
++ struct list_head list;
++
++ struct socket *socket;
++ struct sockaddr_cl addr;
++ int addr_len;
++ int msg_len;
++ unsigned char port;
++ unsigned int flags;
++ char msg_buffer[MAX_CLUSTER_MESSAGE];
++};
++
++/* A barrier */
++struct cl_barrier {
++ struct list_head list;
++
++ char name[MAX_BARRIER_NAME_LEN];
++ unsigned int flags;
++ enum { BARRIER_STATE_WAITING, BARRIER_STATE_INACTIVE,
++ BARRIER_STATE_COMPLETE } state;
++ unsigned int expected_nodes;
++ unsigned int registered_nodes;
++ atomic_t got_nodes;
++ atomic_t completed_nodes;
++ unsigned int inuse;
++ unsigned int waitsent;
++ unsigned int phase; /* Completion phase */
++ unsigned int endreason; /* Reason we were woken, usually 0 */
++ unsigned long timeout; /* In seconds */
+
-+ init_MUTEX(&start_thread_sem);
-+ init_MUTEX(&send_lock);
-+ init_MUTEX(&barrier_list_lock);
-+ init_MUTEX(&cluster_members_lock);
-+ init_MUTEX(&port_array_lock);
-+ init_MUTEX(&messages_list_lock);
-+ init_MUTEX(&listenreq_lock);
-+ init_MUTEX(&client_socket_lock);
-+ init_MUTEX(&new_dead_node_lock);
-+ init_MUTEX(&event_listener_lock);
-+ init_MUTEX(&kernel_listener_lock);
-+ init_MUTEX(&tempnode_lock);
-+ spin_lock_init(&active_socket_lock);
-+ init_timer(&ack_timer);
++ void (*callback) (char *name, int status);
++ wait_queue_head_t waitq;
++ struct semaphore lock; /* To synch with cnxman messages */
++ spinlock_t phase2_spinlock; /* Need to synchronise with timer
++ * interrupts */
++ struct timer_list timer;
++};
+
-+ INIT_LIST_HEAD(&event_listener_list);
-+ INIT_LIST_HEAD(&kernel_listener_list);
-+ INIT_LIST_HEAD(&socket_list);
-+ INIT_LIST_HEAD(&client_socket_list);
-+ INIT_LIST_HEAD(&active_socket_list);
-+ INIT_LIST_HEAD(&barrier_list);
-+ INIT_LIST_HEAD(&messages_list);
-+ INIT_LIST_HEAD(&listenreq_list);
-+ INIT_LIST_HEAD(&cluster_members_list);
-+ INIT_LIST_HEAD(&new_dead_node_list);
-+ INIT_LIST_HEAD(&tempnode_list);
++/* Cluster protocol commands sent to port 0 */
++#define CLUSTER_CMD_ACK 1
++#define CLUSTER_CMD_LISTENREQ 2
++#define CLUSTER_CMD_LISTENRESP 3
++#define CLUSTER_CMD_PORTCLOSED 4
++#define CLUSTER_CMD_BARRIER 5
+
-+ atomic_set(&cnxman_running, 0);
++extern struct cluster_node *find_node_by_addr(unsigned char *addr,
++ int addr_len);
++extern struct cluster_node *find_node_by_nodeid(unsigned int id);
++extern struct cluster_node *find_node_by_name(char *name);
++extern void set_quorate(int);
++extern void notify_kernel_listeners(kcl_callback_reason reason, long arg);
++extern void notify_listeners(void);
++extern void free_nodeid_array(void);
++extern int send_reconfigure(int param, unsigned int value);
++extern int calculate_quorum(int, int, int *);
++extern void recalculate_quorum(int);
++extern int send_leave(unsigned char);
++extern int get_quorum(void);
++extern void set_votes(int, int);
++extern void kcl_wait_for_all_acks(void);
++extern char *membership_state(char *, int);
++extern char *leave_string(int reason);
++extern void a_node_just_died(struct cluster_node *node);
++extern void check_barrier_returns(void);
++extern int in_transition(void);
++extern void get_local_addresses(struct cluster_node *node);
++extern int add_node_address(struct cluster_node *node, unsigned char *addr, int len);
++extern void create_proc_entries(void);
++extern void cleanup_proc_entries(void);
++extern unsigned int get_highest_nodeid(void);
++extern int allocate_nodeid_array(void);
++extern void queue_oob_skb(struct socket *sock, int cmd);
++extern int new_temp_nodeid(char *addr, int addrlen);
++extern int get_addr_from_temp_nodeid(int nodeid, char *addr, int *addrlen);
++extern void purge_temp_nodeids(void);
++extern inline char *print_addr(unsigned char *addr, int len, char *buf)
++{
++ int i;
++ int ptr = 0;
+
-+ sm_init();
++ for (i = 0; i < len; i++)
++ ptr += sprintf(buf + ptr, "%02x ", addr[i]);
+
-+ return 0;
++ return buf;
+}
+
-+static void __exit cluster_exit(void)
-+{
-+#ifdef CONFIG_PROC_FS
-+ cleanup_proc_entries();
-+#endif
++#define MAX_ADDR_PRINTED_LEN (address_length*3 + 1)
+
-+ sock_unregister(AF_CLUSTER);
-+ kmem_cache_destroy(cluster_sk_cachep);
-+}
++/* Debug enabling macros. Sorry about the C++ comments but they're easier to
++ * get rid of than C ones... */
+
-+module_init(cluster_init);
-+module_exit(cluster_exit);
++// #define DEBUG_MEMB
++// #define DEBUG_COMMS
++// #define DEBUG_BARRIER
+
-+EXPORT_SYMBOL(kcl_sendmsg);
-+EXPORT_SYMBOL(kcl_register_read_callback);
-+EXPORT_SYMBOL(kcl_add_callback);
-+EXPORT_SYMBOL(kcl_remove_callback);
-+EXPORT_SYMBOL(kcl_get_members);
-+EXPORT_SYMBOL(kcl_get_member_ids);
-+EXPORT_SYMBOL(kcl_get_all_members);
-+EXPORT_SYMBOL(kcl_is_quorate);
-+EXPORT_SYMBOL(kcl_get_node_by_addr);
-+EXPORT_SYMBOL(kcl_get_node_by_name);
-+EXPORT_SYMBOL(kcl_get_node_by_nodeid);
-+EXPORT_SYMBOL(kcl_get_node_addresses);
-+EXPORT_SYMBOL(kcl_addref_cluster);
-+EXPORT_SYMBOL(kcl_releaseref_cluster);
-+EXPORT_SYMBOL(kcl_cluster_name);
++/* Debug macros */
++#ifdef DEBUG_COMMS
++#define P_COMMS(fmt, args...) printk(KERN_DEBUG "cman comms: " fmt, ## args)
++#else
++#define P_COMMS(fmt, args...)
++#endif
+
-+EXPORT_SYMBOL(kcl_barrier_register);
-+EXPORT_SYMBOL(kcl_barrier_setattr);
-+EXPORT_SYMBOL(kcl_barrier_delete);
-+EXPORT_SYMBOL(kcl_barrier_wait);
-+EXPORT_SYMBOL(kcl_barrier_cancel);
++#ifdef DEBUG_BARRIER
++#define P_BARRIER(fmt, args...) printk(KERN_DEBUG "cman barrier: " fmt, ## args)
++#else
++#define P_BARRIER(fmt, args...)
++#endif
+
-+EXPORT_SYMBOL(kcl_register_quorum_device);
-+EXPORT_SYMBOL(kcl_unregister_quorum_device);
-+EXPORT_SYMBOL(kcl_quorum_device_available);
++#ifdef DEBUG_MEMB
++#define P_MEMB(fmt, args...) printk(KERN_DEBUG "cman memb: " fmt, ## args)
++#define C_MEMB(fmt, args...) printk(fmt, ## args)
++#else
++#define P_MEMB(fmt, args...)
++#define C_MEMB(fmt, args...)
++#endif
+
-+EXPORT_SYMBOL(kcl_register_service);
-+EXPORT_SYMBOL(kcl_unregister_service);
-+EXPORT_SYMBOL(kcl_join_service);
-+EXPORT_SYMBOL(kcl_leave_service);
-+EXPORT_SYMBOL(kcl_global_service_id);
-+EXPORT_SYMBOL(kcl_start_done);
-+EXPORT_SYMBOL(kcl_get_services);
-+EXPORT_SYMBOL(kcl_get_current_interface);
++#endif /* __KERNEL */
+
-+/*
-+ * Overrides for Emacs so that we follow Linus's tabbing style.
-+ * Emacs will notice this stuff at the end of the file and automatically
-+ * adjust the settings for this buffer only. This must remain at the end
-+ * of the file.
-+ * ---------------------------------------------------------------------------
-+ * Local variables:
-+ * c-file-style: "linux"
-+ * End:
-+ */
++#endif
diff -urN linux-orig/cluster/cman/config.c linux-patched/cluster/cman/config.c
---- linux-orig/cluster/cman/config.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/config.c 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,46 @@
+--- linux-orig/cluster/cman/config.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/config.c 2004-10-22 11:04:52.690550732 -0500
+@@ -0,0 +1,49 @@
+/******************************************************************************
+*******************************************************************************
+**
+ * restarts before we die */
+#define DEFAULT_SM_DEBUG_SIZE 256 /* Size in bytes of SM debug buffer */
+
++#define DEFAULT_NEWCLUSTER_TIMEOUT 16 /* Time to send NEWCLUSTER messages */
++
+struct config_info cman_config = {
+ .joinwait_timeout = DEFAULT_JOIN_WAIT_TIME,
+ .joinconf_timeout = DEFAULT_JOINCONF_TIMER,
+ .transition_restarts = DEFAULT_TRANSITION_RESTARTS,
+ .max_nodes = DEFAULT_MAX_NODES,
+ .sm_debug_size = DEFAULT_SM_DEBUG_SIZE,
++ .newcluster_timeout = DEFAULT_NEWCLUSTER_TIMEOUT,
+};
diff -urN linux-orig/cluster/cman/config.h linux-patched/cluster/cman/config.h
---- linux-orig/cluster/cman/config.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/config.h 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,31 @@
+--- linux-orig/cluster/cman/config.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/config.h 2004-10-22 11:04:52.698550606 -0500
+@@ -0,0 +1,32 @@
+/******************************************************************************
+*******************************************************************************
+**
+ int transition_restarts;
+ int max_nodes;
+ int sm_debug_size;
++ int newcluster_timeout;
+};
+
+extern struct config_info cman_config;
+
+#endif /* __CONFIG_DOT_H__ */
diff -urN linux-orig/cluster/cman/kjoin.c linux-patched/cluster/cman/kjoin.c
---- linux-orig/cluster/cman/kjoin.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/kjoin.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/kjoin.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/kjoin.c 2004-10-22 11:04:52.704550511 -0500
@@ -0,0 +1,238 @@
+/******************************************************************************
+*******************************************************************************
+ * End:
+ */
diff -urN linux-orig/cluster/cman/membership.c linux-patched/cluster/cman/membership.c
---- linux-orig/cluster/cman/membership.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/membership.c 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,3047 @@
+--- linux-orig/cluster/cman/membership.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/membership.c 2004-10-22 11:04:52.727550147 -0500
+@@ -0,0 +1,3171 @@
+/******************************************************************************
+*******************************************************************************
+**
+/* Our node name, usually system_utsname.nodename, but can be overridden */
+char nodename[MAX_CLUSTER_MEMBER_NAME_LEN + 1];
+
++/* Node ID that we want. defaults of zero means
++ * it will be allocated by the cluster join mechanism
++ */
++int wanted_nodeid;
++
+static spinlock_t members_by_nodeid_lock;
+static int sizeof_members_array; /* Can dynamically increase (vmalloc
+ * permitting) */
+static void join_or_form_cluster(void);
+static int do_timer_wakeup(void);
+static int start_transition(unsigned char reason, struct cluster_node *node);
++static uint32_t low32_of_ip(void);
+int send_leave(unsigned char);
+int send_reconfigure(int, unsigned int);
+
+#endif
+
+/* State of the node */
-+static enum { STARTING, JOINING, JOINWAIT, JOINACK, TRANSITION,
++static enum { STARTING, NEWCLUSTER, JOINING, JOINWAIT, JOINACK, TRANSITION,
+ TRANSITION_COMPLETE, MEMBER, REJECTED, LEFT_CLUSTER, MASTER
-+} node_state = STARTING;
++} node_state = LEFT_CLUSTER;
+
+/* Sub-state when we are MASTER */
+static enum { MASTER_START, MASTER_COLLECT, MASTER_CONFIRM,
+static int responses_expected;
+
+/* Current cluster generation number */
-+static int cluster_generation = 1;
++int cluster_generation = 1;
+
+/* When another node initiates a transtion then store it's pointer in here so
+ * we can check for other nodes trying to spoof us */
+ return;
+
+ node->node_id = nodeid;
-+ if (nodeid > sizeof_members_array) {
++ if (nodeid >= sizeof_members_array) {
+ int new_size = sizeof_members_array + MEMBER_INCREMENT_SIZE;
-+ struct cluster_node **new_array =
-+ vmalloc((new_size) * sizeof (struct cluster_node *));
++ struct cluster_node **new_array;
++
++ if (new_size < nodeid)
++ new_size = nodeid + MEMBER_INCREMENT_SIZE;
++
++ new_array = vmalloc((new_size) * sizeof (struct cluster_node *));
+ if (new_array) {
+ spin_lock(&members_by_nodeid_lock);
+ memcpy(new_array, members_by_nodeid,
+ sizeof_members_array *
+ sizeof (struct cluster_node *));
+ memset(&new_array[sizeof_members_array], 0,
-+ MEMBER_INCREMENT_SIZE *
++ (new_size - sizeof_members_array) *
+ sizeof (struct cluster_node *));
+ vfree(members_by_nodeid);
++
+ members_by_nodeid = new_array;
+ sizeof_members_array = new_size;
+ spin_unlock(&members_by_nodeid_lock);
+static int membership_kthread(void *unused)
+{
+ struct task_struct *tsk = current;
-+ struct socket *tmp_socket;
+ sigset_t tmpsig;
+
+ daemonize("cman_memb");
+
+ /* Got a JOINACK but no JOIN-CONF, start waiting for HELLO
+ * messages again */
-+ if (node_state == JOINACK
-+ && time_after(jiffies,
-+ join_time + cman_config.join_timeout * HZ)) {
++ if (node_state == JOINACK &&
++ time_after(jiffies,
++ join_time + cman_config.join_timeout * HZ)) {
+ P_MEMB
+ ("Waited a long time for a join-conf, going back to JOINWAIT state\n");
+ node_state = JOINWAIT;
+ joinwait_time = jiffies;
+ }
+
++ /* Have we had an ACK for our JOINREQ message ? */
++ if (node_state == JOINING &&
++ time_after(jiffies,
++ join_time + cman_config.join_timeout * HZ)) {
++ P_MEMB("didn't get JOINACK, going back to JOINWAIT\n");
++ node_state = JOINWAIT;
++ joinwait_time = jiffies;
++ }
++
+ /* Have we been in joinwait for too long... */
-+ if (node_state == JOINWAIT
-+ && time_after(jiffies, joinwait_time +
-+ cman_config.join_timeout * HZ)) {
++ if (node_state == JOINWAIT &&
++ time_after(jiffies,
++ joinwait_time + cman_config.joinwait_timeout * HZ)) {
+ printk(CMAN_NAME
+ ": Been in JOINWAIT for too long - giving up\n");
+ goto leave_cluster;
+ P_MEMB("closing down\n");
+ quit_threads = 1; /* force other thread to exit too */
+
-+ /* Close the socket, NULL the pointer first so it doesn't get used
-+ * by send_leave()
-+ */
-+ tmp_socket = mem_socket;
-+ mem_socket = NULL;
-+ sock_release(tmp_socket);
++ send_leave(us->leave_reason);
++ sock_release(mem_socket);
+ highest_nodeid = 0;
+ complete(&member_thread_comp);
+ return 0;
+ printk(KERN_INFO CMAN_NAME ": forming a new cluster\n");
+ node_state = MEMBER;
+ we_are_a_cluster_member = TRUE;
-+ us->node_id = 1;
+ us->state = NODESTATE_MEMBER;
-+ set_nodeid(us, 1);
++ if (wanted_nodeid)
++ set_nodeid(us, wanted_nodeid);
++ else
++ set_nodeid(us, 1);
+ recalculate_quorum(0);
+ sm_member_update(cluster_is_quorate);
+ send_hello();
+
+ printk(KERN_INFO CMAN_NAME
+ ": Waiting to join or form a Linux-cluster\n");
++
++ restart_joinwait:
+ join_time = 0;
+ start_time = jiffies;
+ joinwait_time = jiffies;
+ last_hello = 0;
-+ send_newcluster();
+
-+ /* Listen for a reply */
++ /* Listen for HELLO or NEWCLUSTER messages */
+ do {
+ DECLARE_WAITQUEUE(wait, current);
+ set_task_state(current, TASK_INTERRUPTIBLE);
+ while (time_before(jiffies, start_time + cman_config.joinwait_timeout * HZ) &&
+ node_state == STARTING);
+
-+ /* If we didn't hear any HELLO messages then form a new cluster */
+ if (node_state == STARTING) {
++ start_time = jiffies;
++ joinwait_time = jiffies;
++ node_state = NEWCLUSTER;
++ }
++
++ /* If we didn't hear any HELLO messages then start sending NEWCLUSTER messages */
++ while (time_before(jiffies, start_time + cman_config.newcluster_timeout * HZ) &&
++ node_state == NEWCLUSTER) {
++
++ DECLARE_WAITQUEUE(wait, current);
++
++ send_newcluster();
++
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ add_wait_queue(mem_socket->sk->sk_sleep, &wait);
++
++ if (!skb_peek(&mem_socket->sk->sk_receive_queue))
++ schedule_timeout((cman_config.joinwait_timeout * HZ) /
++ 5);
++
++ set_task_state(current, TASK_RUNNING);
++ remove_wait_queue(mem_socket->sk->sk_sleep, &wait);
++
++ while (skb_peek(&mem_socket->sk->sk_receive_queue)) {
++ dispatch_messages(mem_socket);
++ }
++ /* Did we get a lower "NEWCLUSTER" message ? */
++ if (node_state == STARTING) {
++ P_MEMB("NEWCLUSTER: restarting joinwait\n");
++ goto restart_joinwait;
++ }
++
++ if (quit_threads)
++ node_state = LEFT_CLUSTER;
++
++ }
++
++
++ /* If we didn't hear any HELLO messages then form a new cluster */
++ if (node_state == NEWCLUSTER) {
+ form_cluster();
+ }
+ else
+ msg->cmd = CLUSTER_MEM_JOINREQ;
+ msg->votes = votes;
+ msg->expected_votes = cpu_to_le32(expected_votes);
-+ msg->major_version = cpu_to_le32(CNXMAN_MAJOR_VERSION);
-+ msg->minor_version = cpu_to_le32(CNXMAN_MINOR_VERSION);
-+ msg->patch_version = cpu_to_le32(CNXMAN_PATCH_VERSION);
++ msg->nodeid = cpu_to_le32(wanted_nodeid);
++ msg->major_version = cpu_to_le32(CNXMAN_MAJOR_VERSION);
++ msg->minor_version = cpu_to_le32(CNXMAN_MINOR_VERSION);
++ msg->patch_version = cpu_to_le32(CNXMAN_PATCH_VERSION);
+ msg->config_version = cpu_to_le32(config_version);
+ msg->addr_len = cpu_to_le32(address_length);
+ strcpy(msg->clustername, cluster_name);
+
+static int send_newcluster()
+{
-+ char buf[1];
++ char buf[5];
++ uint32_t lowip;
+
+ buf[0] = CLUSTER_MEM_NEWCLUSTER;
++ lowip = cpu_to_le32(low32_of_ip());
++ memcpy(&buf[1], &lowip, sizeof(lowip));
+
-+ return kcl_sendmsg(mem_socket, buf, 1, NULL, 0,
++ return kcl_sendmsg(mem_socket, buf, sizeof(uint32_t)+1,
++ NULL, 0,
+ MSG_NOACK);
+}
+
+ int status;
+
+ if (!mem_socket)
-+ return 0;
++ return 0;
+
+ saddr.scl_family = AF_CLUSTER;
+ saddr.scl_port = CLUSTER_PORT_MEMBERSHIP;
+ node = find_node_by_name(name);
+ if (node && node->state != NODESTATE_DEAD) {
+
-+ if ((node->state == NODESTATE_JOINING ||
-+ node->state == NODESTATE_REMOTEMEMBER))
++ if (node->state == NODESTATE_JOINING)
+ return +1;
+
+ printk(KERN_WARNING CMAN_NAME
+ (node = find_node_by_addr(addr, addrlen)) &&
+ node->state != NODESTATE_DEAD) {
+
-+ if ((node->state == NODESTATE_JOINING ||
-+ node->state == NODESTATE_REMOTEMEMBER))
++ if (node->state == NODESTATE_JOINING)
+ return +1;
+
+ printk(KERN_WARNING CMAN_NAME
+ && ++transition_restarts > cman_config.transition_restarts) {
+ printk(KERN_WARNING CMAN_NAME
+ ": too many transition restarts - will die\n");
-+ send_leave(CLUSTER_LEAVEFLAG_INCONSISTENT);
++ us->leave_reason = CLUSTER_LEAVEFLAG_INCONSISTENT;
+ node_state = LEFT_CLUSTER;
+ quit_threads = 1;
+ wake_up_process(membership_task);
+ list_for_each_safe(nodelist, temp, &cluster_members_list) {
+ node = list_entry(nodelist, struct cluster_node, list);
+
-+ if (node->state == NODESTATE_MEMBER) {
++ if (node->state == NODESTATE_MEMBER || node->state == NODESTATE_DEAD) {
+ unsigned int evotes;
+ unsigned int node_id;
+ unsigned short num_addrs = 0;
+ strcpy(&message[ptr], node->name);
+ ptr += len;
+
++ message[ptr++] = node->state;
++
+ /* Count the number of addresses this node has */
+ list_for_each(addrlist, &node->addr_list) {
+ num_addrs++;
+ list_for_each(addrlist, &node->addr_list) {
+
+ struct cluster_node_addr *nodeaddr =
-+ list_entry(addrlist,
-+ struct cluster_node_addr, list);
++ list_entry(addrlist,
++ struct cluster_node_addr, list);
+
+ memcpy(&message[ptr], nodeaddr->addr,
+ address_length);
+ message[1] = first_packet_flag;
+
+ up(&cluster_members_lock);
-+ status =
-+ kcl_sendmsg(mem_socket, message,
-+ last_node_start, saddr,
-+ saddr ? sizeof (struct sockaddr_cl) : 0,
-+ flags);
++ status = kcl_sendmsg(mem_socket, message,
++ last_node_start, saddr,
++ saddr ? sizeof (struct sockaddr_cl) : 0,
++ flags);
+
+ if (status < 0)
+ goto send_fail;
+ newnode->last_seq_acked = 0;
+ newnode->last_seq_sent = 0;
+ newnode->incarnation++;
++ do_gettimeofday(&newnode->join_time);
+ /* Don't overwrite the node ID */
+
+ if (state == NODESTATE_MEMBER) {
+ newnode->last_seq_acked = 0;
+ newnode->last_seq_sent = 0;
+ newnode->incarnation = 0;
++ do_gettimeofday(&newnode->join_time);
+ INIT_LIST_HEAD(&newnode->addr_list);
+ set_nodeid(newnode, node_id);
+
+ if (node->us) {
+ printk(KERN_INFO CMAN_NAME
+ ": killed by STARTTRANS or NOMINATE\n");
++ node_state = LEFT_CLUSTER;
+ quit_threads = 1;
+ wake_up_process(membership_task);
+ wake_up_interruptible(&cnxman_waitq);
+ /* We must leave the cluster as we are in a minority,
+ * the rest of them can fight it out amongst
+ * themselves. */
-+ send_leave(CLUSTER_LEAVEFLAG_INCONSISTENT);
-+
++ us->leave_reason = CLUSTER_LEAVEFLAG_INCONSISTENT;
+ agreeing_nodes = 0;
+ dissenting_nodes = 0;
+ kfree(node_opinion);
+
+ quorum = le32_to_cpu(endmsg->quorum);
+ set_quorate(le32_to_cpu(endmsg->total_votes));
++ highest_nodeid = get_highest_nodeid();
+
+ /* Tell any waiting barriers that we had a transition */
+ check_barrier_returns();
+ le32_to_cpu(startmsg->nodeid));
+
+ /* If the old master has died then remove it */
-+ node =
-+ find_node_by_nodeid(le32_to_cpu(startmsg->nodeid));
-+
-+ if (startmsg->reason == TRANS_DEADMASTER &&
-+ node && node->state == NODESTATE_MEMBER) {
-+ down(&cluster_members_lock);
-+ node->state = NODESTATE_DEAD;
-+ cluster_members--;
-+ up(&cluster_members_lock);
++ if (startmsg->reason == TRANS_DEADMASTER) {
++ remove_node(le32_to_cpu(startmsg->nodeid));
+ }
+
+ /* Store new master */
+ /* Another node has died (or been killed) */
+ if (startmsg->reason == TRANS_ANOTHERREMNODE) {
+ /* Remove new dead node */
-+ node =
-+ find_node_by_nodeid(le32_to_cpu(startmsg->nodeid));
-+ if (node && node->state == NODESTATE_MEMBER) {
-+ down(&cluster_members_lock);
-+ node->state = NODESTATE_DEAD;
-+ cluster_members--;
-+ up(&cluster_members_lock);
-+ }
++ remove_node(le32_to_cpu(startmsg->nodeid));
+ }
+ /* Restart the timer */
+ del_timer(&transition_timer);
+ joinwait_time = jiffies;
+ }
+
-+ return 0;
-+}
-+
-+/* Request to join the cluster. This makes us the master for this state
-+ * transition */
-+static int do_process_joinreq(struct msghdr *msg, int len)
-+{
-+ int status;
-+ static unsigned long last_joinreq = 0;
-+ static char last_name[MAX_CLUSTER_MEMBER_NAME_LEN];
-+ struct cl_mem_join_msg *joinmsg = msg->msg_iov->iov_base;
-+ struct cluster_node *node;
-+
-+ /* If we are in a state transition then tell the new node to wait a bit
-+ * longer */
-+ if (node_state != MEMBER) {
-+ if (node_state == MASTER || node_state == TRANSITION) {
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_WAIT);
-+ }
-+ return 0;
-+ }
-+
-+ /* Check version number */
++ return 0;
++}
++
++/* Check a JOINREQ message for validity,
++ return -1 if we can't let the node join our cluster */
++static int validate_joinmsg(struct cl_mem_join_msg *joinmsg, int len)
++{
++ struct cluster_node *node;
++
++ /* Check version number */
+ if (le32_to_cpu(joinmsg->major_version) == CNXMAN_MAJOR_VERSION) {
+ char *ptr = (char *) joinmsg;
+ char *name;
+ printk(KERN_WARNING CMAN_NAME
+ ": num_addr in JOIN-REQ message is rubbish: %d\n",
+ le16_to_cpu(joinmsg->num_addr));
-+ return 0;
++ return -1;
+ }
+
+ /* Check the cluster name matches */
+ printk(KERN_WARNING CMAN_NAME
+ ": attempt to join with cluster name '%s' refused\n",
+ joinmsg->clustername);
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
++ return -1;
+ }
+
+ /* Check we are not exceeding the maximum number of nodes */
+ printk(KERN_WARNING CMAN_NAME
+ ": Join request from %s rejected, exceeds maximum number of nodes\n",
+ name);
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
++ return -1;
+ }
+
+ /* Check that we don't exceed the two_node limit, if applicable */
+ if (two_node && cluster_members == 2) {
+ printk(KERN_WARNING CMAN_NAME ": Join request from %s "
+ "rejected, exceeds two node limit\n", name);
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
++ return -1;
+ }
+
+ if (le32_to_cpu(joinmsg->config_version) != config_version) {
+ "rejected, config version local %u remote %u\n",
+ name, config_version,
+ le32_to_cpu(joinmsg->config_version));
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
++ return -1;
++ }
++
++ /* Validate requested static node ID */
++ if (joinmsg->nodeid &&
++ (node = find_node_by_nodeid(le32_to_cpu(joinmsg->nodeid))) &&
++ (node->state != NODESTATE_DEAD ||
++ (strcmp(node->name, name)))) {
++ printk(KERN_WARNING CMAN_NAME ": Join request from %s "
++ "rejected, node ID %d already in use by %s\n",
++ name, node->node_id, node->name);
++ return -1;
++ }
++ if (joinmsg->nodeid &&
++ (node = find_node_by_name(name)) &&
++ (node->state != NODESTATE_DEAD ||
++ node->node_id != le32_to_cpu(joinmsg->nodeid))) {
++ printk(KERN_WARNING CMAN_NAME ": Join request from %s "
++ "rejected, wanted node %d but previously had %d\n",
++ name, le32_to_cpu(joinmsg->nodeid), node->node_id);
++ return -1;
+ }
+
-+ /* If these don't match then I don't know how the message
++ /* If these don't match then I don't know how the message
+ arrived! However, I can't take the chance */
+ if (le32_to_cpu(joinmsg->addr_len) != address_length) {
+ printk(KERN_WARNING CMAN_NAME ": Join request from %s "
+ "rejected, address length local: %u remote %u\n",
+ name, address_length,
+ le32_to_cpu(joinmsg->addr_len));
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
-+ }
-+
-+ /* Duplicate checking: Because joining messages do not have
-+ * sequence numbers we may get as many JOINREQ messages as we
-+ * have interfaces. This bit of code here just checks for
-+ * JOINREQ messages that come in from the same node in a small
-+ * period of time and removes the duplicates */
-+ if (time_before(jiffies, last_joinreq + 10 * HZ)
-+ && strcmp(name, last_name) == 0) {
-+ return 0;
-+ }
-+
-+ /* Do we already know about this node? */
-+ status = check_duplicate_node(name, msg, len);
-+
-+ if (status < 0) {
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
-+ return 0;
-+ }
-+
-+ /* OK, you can be in my gang */
-+ if (status == 0) {
-+ int i;
-+ struct sockaddr_cl *addr = msg->msg_name;
-+
-+ last_joinreq = jiffies;
-+ strcpy(last_name, name);
-+
-+ node =
-+ add_new_node(name, joinmsg->votes,
-+ le32_to_cpu(joinmsg->expected_votes),
-+ 0, NODESTATE_JOINING);
-+
-+ /* Add the node's addresses */
-+ if (list_empty(&node->addr_list)) {
-+ for (i = 0; i < le16_to_cpu(joinmsg->num_addr);
-+ i++) {
-+ add_node_address(node, ptr, address_length);
-+ ptr += address_length;
-+ }
-+ }
-+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_OK);
-+ joining_node = node;
-+ joining_temp_nodeid = addr->scl_nodeid;
-+
-+ /* Start the state transition */
-+ start_transition(TRANS_NEWNODE, node);
++ return -1;
+ }
+ }
+ else {
+ /* Version number mismatch, don't use any part of the message
+ * other than the version numbers as things may have moved */
-+ char buf[MAX_ADDR_PRINTED_LEN];
-+
+ printk(KERN_INFO CMAN_NAME
-+ ": Got join message from node running incompatible software. (us: %d.%d.%d, them: %d.%d.%d) addr: %s\n",
++ ": Got join message from node running incompatible software. (us: %d.%d.%d, them: %d.%d.%d)\n",
+ CNXMAN_MAJOR_VERSION, CNXMAN_MINOR_VERSION,
+ CNXMAN_PATCH_VERSION,
+ le32_to_cpu(joinmsg->major_version),
+ le32_to_cpu(joinmsg->minor_version),
-+ le32_to_cpu(joinmsg->patch_version),
-+ print_addr(msg->msg_name, msg->msg_namelen, buf));
++ le32_to_cpu(joinmsg->patch_version));
++ return -1;
++ }
++ return 0;
++}
++
++
++/* Request to join the cluster. This makes us the master for this state
++ * transition */
++static int do_process_joinreq(struct msghdr *msg, int len)
++{
++ static unsigned long last_joinreq = 0;
++ static char last_name[MAX_CLUSTER_MEMBER_NAME_LEN];
++ struct cl_mem_join_msg *joinmsg = msg->msg_iov->iov_base;
++ struct cluster_node *node;
++ char *ptr = (char *) joinmsg;
++ char *name;
++ int i;
++ struct sockaddr_cl *addr = msg->msg_name;
++
++ ptr += sizeof (*joinmsg);
++ name = ptr + le16_to_cpu(joinmsg->num_addr) * address_length;
+
++ /* If we are in a state transition then tell the new node to wait a bit
++ * longer */
++ if (node_state != MEMBER) {
++ if (node_state == MASTER || node_state == TRANSITION) {
++ send_joinack(msg->msg_name, msg->msg_namelen,
++ JOINACK_TYPE_WAIT);
++ }
++ return 0;
++ }
++
++ /* Reject application if message is invalid for any reason */
++ if (validate_joinmsg(joinmsg, len)) {
++ send_joinack(msg->msg_name, msg->msg_namelen,
++ JOINACK_TYPE_NAK);
++ return 0;
++ }
++
++ /* Do we already know about this node? */
++ if (check_duplicate_node(name, msg, len) < 0) {
+ send_joinack(msg->msg_name, msg->msg_namelen,
-+ JOINACK_TYPE_NAK);
++ JOINACK_TYPE_NAK);
++ return 0;
++ }
++
++ /* Duplicate checking: Because joining messages do not have
++ * sequence numbers we may get as many JOINREQ messages as we
++ * have interfaces. This bit of code here just checks for
++ * JOINREQ messages that come in from the same node in a small
++ * period of time and removes the duplicates */
++ if (time_before(jiffies, last_joinreq + 10 * HZ)
++ && strcmp(name, last_name) == 0) {
+ return 0;
+ }
+
++ /* OK, you can be in my gang */
++ last_joinreq = jiffies;
++ strcpy(last_name, name);
++
++ node = add_new_node(name, joinmsg->votes,
++ le32_to_cpu(joinmsg->expected_votes),
++ le32_to_cpu(joinmsg->nodeid),
++ NODESTATE_JOINING);
++
++ /* Add the node's addresses */
++ if (list_empty(&node->addr_list)) {
++ for (i = 0; i < le16_to_cpu(joinmsg->num_addr);
++ i++) {
++ add_node_address(node, ptr, address_length);
++ ptr += address_length;
++ }
++ }
++ send_joinack(msg->msg_name, msg->msg_namelen,
++ JOINACK_TYPE_OK);
++ joining_node = node;
++ joining_temp_nodeid = addr->scl_nodeid;
++
++ /* Start the state transition */
++ start_transition(TRANS_NEWNODE, node);
++
+ return 0;
+}
+
+ for (i=0; i<strlen(nodename); i++) {
+ value += nodename[i];
+ }
-+ return value & 0xF;
++ return (value & 0xF) + 1;
++}
++
++
++/* Return the low 32 bits of our IP address */
++static uint32_t low32_of_ip()
++{
++ struct cluster_node_addr *addr;
++ uint32_t lowip;
++
++ addr = list_entry(us->addr_list.next, struct cluster_node_addr, list);
++ memcpy(&lowip, addr->addr+address_length-sizeof(uint32_t), sizeof(uint32_t));
++ if (!lowip)
++ memcpy(&lowip, addr->addr - sizeof(uint32_t)*2, sizeof(uint32_t));
++
++ return lowip;
+}
+
+/* A new node has stated its intent to form a new cluster. we may have
+ P_MEMB("got NEWCLUSTER, backing down for %d seconds\n", node_hash());
+ start_time = jiffies + node_hash() * HZ;
+ }
++
++ if (node_state == NEWCLUSTER) {
++ uint32_t otherip;
++ char *newcmsg = (char *)msg->msg_iov->iov_base;
++
++ memcpy(&otherip, newcmsg+1, sizeof(otherip));
++ otherip = le32_to_cpu(otherip);
++ P_MEMB("got NEWCLUSTER, remote ip = %x, us = %x\n", otherip, low32_of_ip());
++ if (otherip < low32_of_ip())
++ node_state = STARTING;
++ }
++
+ if (node_state == MEMBER)
+ send_hello();
+
+
+ if (node->votes != newnode->votes ||
+ node->node_id != newnode->node_id ||
-+ node->state != NODESTATE_MEMBER) {
-+ C_MEMB
-+ (" - wrong info: votes=%d(exp: %d) id=%d(exp: %d) state = %d\n",
-+ node->votes, newnode->votes, node->node_id,
-+ newnode->node_id, node->state);
++ node->state != newnode->state) {
++ C_MEMB(" - wrong info: votes=%d(exp: %d) id=%d(exp: %d) state = %d\n",
++ node->votes, newnode->votes, node->node_id,
++ newnode->node_id, node->state);
+ return -1;
+ }
+ C_MEMB(" - OK\n");
+
+ if ((newnode =
+ add_new_node(node->name, node->votes, node->expected_votes,
-+ node->node_id, NODESTATE_MEMBER)) == NULL) {
++ node->node_id, node->state)) == NULL) {
+ P_MEMB("Error adding node\n");
+ return -1;
+ }
+ nodename[namelen] = '\0';
+ ptr += namelen;
+
++ node.state = buf[ptr++];
++
+ memcpy(&num_addr, &buf[ptr], sizeof (short));
+ num_addr = le16_to_cpu(num_addr);
+ ptr += sizeof (short);
+ /* Call the callback routine */
+ if (routine(&node, addrs, num_addr) < 0)
+ return -1;
-+ num_nodes++;
++
++ /* Return the number of MEMBER nodes */
++ if (node.state == NODESTATE_MEMBER)
++ num_nodes++;
+ }
+ return num_nodes;
+}
+ if (unpack_nodes(message + 2, len - 2, add_node) < 0) {
+ printk(CMAN_NAME
+ ": Error procssing joinconf message - giving up on cluster join\n");
-+ send_leave(CLUSTER_LEAVEFLAG_PANIC);
++ us->leave_reason = CLUSTER_LEAVEFLAG_PANIC;
++ node_state = LEFT_CLUSTER;
+ return -1;
+ }
+
+ if (message[1] & 1)
+ num_nodes = 0;
+
-+ num_nodes +=
-+ unpack_nodes(msg->msg_iov->iov_base + 2, len - 2, check_node);
++ num_nodes += unpack_nodes(msg->msg_iov->iov_base + 2,
++ len - 2, check_node);
+
+ /* Last message, check the count and reply */
+ if (message[1] & 2) {
+
+ if (node->state != NODESTATE_DEAD) {
+ printk(KERN_INFO CMAN_NAME
-+ ": Node %s is leaving the cluster, reason %d\n",
-+ node->name, reason);
++ ": Node %s is leaving the cluster, %s\n",
++ node->name, leave_string(reason));
+
+ node->leave_reason = reason;
+ }
+
+ /* We are starting up. Send a join message to the node whose HELLO we
+ * just received */
-+ if (node_state == STARTING || node_state == JOINWAIT) {
++ if (node_state == STARTING || node_state == JOINWAIT ||
++ node_state == JOINING || node_state == NEWCLUSTER) {
+ struct sockaddr_cl *addr = msg->msg_name;
+
+ printk(KERN_INFO CMAN_NAME ": sending membership request\n");
+ case STARTING:
+ strncpy(buf, "Starting", buflen);
+ break;
++ case NEWCLUSTER:
++ strncpy(buf, "New-Cluster?", buflen);
++ break;
+ case JOINING:
+ strncpy(buf, "Joining", buflen);
+ break;
+ strncpy(buf, "Rejected", buflen);
+ break;
+ case LEFT_CLUSTER:
-+ strncpy(buf, "Left-Cluster", buflen);
++ strncpy(buf, "Not-in-Cluster", buflen);
+ break;
+ case TRANSITION_COMPLETE:
+ strncpy(buf, "Transition-Complete", buflen);
+ return buf;
+}
+
++char *leave_string(int reason)
++{
++ switch (reason)
++ {
++ case CLUSTER_LEAVEFLAG_DOWN:
++ return "Shutdown";
++ case CLUSTER_LEAVEFLAG_KILLED:
++ return "Killed by another node";
++ case CLUSTER_LEAVEFLAG_PANIC:
++ return "Panic";
++ case CLUSTER_LEAVEFLAG_REMOVED:
++ return "Removed";
++ case CLUSTER_LEAVEFLAG_REJECTED:
++ return "Membership rejected";
++ default:
++ return "Don't know why";
++ }
++}
++
+#ifdef DEBUG_MEMB
+static char *msgname(int msg)
+{
+ * End:
+ */
diff -urN linux-orig/cluster/cman/proc.c linux-patched/cluster/cman/proc.c
---- linux-orig/cluster/cman/proc.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/proc.c 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,371 @@
+--- linux-orig/cluster/cman/proc.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/proc.c 2004-10-22 11:04:52.735550020 -0500
+@@ -0,0 +1,372 @@
+/******************************************************************************
+*******************************************************************************
+**
+ .name = "sm_debug_size",
+ .value = &cman_config.sm_debug_size,
+ },
++ {
++ .name = "newcluster_timeout",
++ .value = &cman_config.newcluster_timeout,
++ },
+};
+
+
+ int c = 0;
+ char node_buf[MAX_CLUSTER_MEMBER_NAME_LEN];
+
-+ if (!we_are_a_cluster_member) {
-+ c += sprintf(b+c, "Not a cluster member. State: %s\n",
-+ membership_state(node_buf,
-+ sizeof (node_buf)));
++ c += sprintf(b+c,
++ "Version: %d.%d.%d\n",
++ CNXMAN_MAJOR_VERSION, CNXMAN_MINOR_VERSION,
++ CNXMAN_PATCH_VERSION);
++
++ c += sprintf(b+c,
++ "Config version: %d\nCluster name: %s\nCluster ID: %d\nMembership state: %s\n",
++ config_version,
++ cluster_name, cluster_id,
++ membership_state(node_buf, sizeof (node_buf)));
++
++ if (!we_are_a_cluster_member)
+ return c;
-+ }
+
+ /* Total the votes */
+ down(&cluster_members_lock);
+ total_votes += quorum_device->votes;
+
+ c += sprintf(b+c,
-+ "Version: %d.%d.%d\nConfig version: %d\nCluster name: %s\nCluster ID: %d\nMembership state: %s\n",
-+ CNXMAN_MAJOR_VERSION, CNXMAN_MINOR_VERSION,
-+ CNXMAN_PATCH_VERSION,
-+ config_version,
-+ cluster_name, cluster_id,
-+ membership_state(node_buf, sizeof (node_buf)));
-+ c += sprintf(b+c,
+ "Nodes: %d\nExpected_votes: %d\nTotal_votes: %d\nQuorum: %d %s\n",
+ cluster_members, max_expected, total_votes,
+ get_quorum(),
+
+ /* Print the header */
+ if (*pos == 0) {
-+ seq_printf(m,
-+ "Node Votes Exp Sts Name\n");
-+ return csi;
++ seq_printf(m, "Node Votes Exp Sts Name\n");
+ }
+ return csi;
+}
+ case NODESTATE_JOINING:
+ state = 'J';
+ break;
-+ case NODESTATE_REMOTEMEMBER:
-+ state = 'R';
-+ break;
+ case NODESTATE_DEAD:
+ state = 'X';
+ break;
+ }
-+ seq_printf(m, " %3d %3d %3d %c %s\n",
++ seq_printf(m, "%4d %3d %3d %c %s\n",
+ node->node_id,
+ node->votes,
+ node->expected_votes,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
++ .owner = THIS_MODULE,
+};
+
+static struct file_operations service_fops = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
++ .owner = THIS_MODULE,
+};
+
+static int cman_config_read_proc(char *page, char **start, off_t off, int count,
+
+ procentry = create_proc_entry("cluster/status", S_IRUGO, NULL);
+ if (procentry)
-+ procentry->get_info = proc_cluster_status;
-+
-+ procentry = create_proc_entry("cluster/services", S_IRUGO, NULL);
-+ if (procentry)
-+ procentry->proc_fops = &service_fops;
-+
-+ /* Config entries */
-+ proc_cman_config = proc_mkdir("cluster/config/cman", 0);
-+ if (!proc_cman_config)
-+ return;
-+
-+ for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) {
-+ procentry = create_proc_entry(config_proc[i].name, 0660,
-+ proc_cman_config);
-+ if (procentry) {
-+ procentry->data = &config_proc[i];
-+ procentry->write_proc = cman_config_write_proc;
-+ procentry->read_proc = cman_config_read_proc;
-+ }
-+ }
-+
-+ procentry = create_proc_entry("cluster/sm_debug", S_IRUGO, NULL);
-+ if (procentry)
-+ procentry->get_info = sm_debug_info;
-+}
-+
-+void cleanup_proc_entries(void)
-+{
-+ int i, config_count;
-+
-+ remove_proc_entry("cluster/sm_debug", NULL);
-+
-+ config_count = sizeof(config_proc) / sizeof(struct config_proc_info);
-+
-+ if (proc_cman_config) {
-+ for (i=0; i<config_count; i++)
-+ remove_proc_entry(config_proc[i].name, proc_cman_config);
-+ }
-+ remove_proc_entry("cluster/config/cman", NULL);
-+ remove_proc_entry("cluster/config", NULL);
-+
-+ remove_proc_entry("cluster/nodes", NULL);
-+ remove_proc_entry("cluster/status", NULL);
-+ remove_proc_entry("cluster/services", NULL);
-+ remove_proc_entry("cluster/config", NULL);
-+ remove_proc_entry("cluster", NULL);
-+}
-diff -urN linux-orig/cluster/cman/sm.h linux-patched/cluster/cman/sm.h
---- linux-orig/cluster/cman/sm.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm.h 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,108 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#ifndef __SM_DOT_H__
-+#define __SM_DOT_H__
-+
-+/*
-+ * This is the main header file to be included in each Service Manager source
-+ * file.
-+ */
-+
-+#include <linux/list.h>
-+#include <linux/socket.h>
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#include <linux/file.h>
-+#include <net/sock.h>
-+
-+#include <cluster/cnxman.h>
-+#include <cluster/service.h>
-+
-+#define SG_LEVELS (4)
-+
-+#include "sm_internal.h"
-+#include "sm_barrier.h"
-+#include "sm_control.h"
-+#include "sm_daemon.h"
-+#include "sm_joinleave.h"
-+#include "sm_membership.h"
-+#include "sm_message.h"
-+#include "sm_misc.h"
-+#include "sm_recover.h"
-+#include "sm_services.h"
-+
-+extern struct list_head sm_sg[SG_LEVELS];
-+extern struct semaphore sm_sglock;
-+
-+#ifndef TRUE
-+#define TRUE (1)
-+#endif
-+
-+#ifndef FALSE
-+#define FALSE (0)
-+#endif
-+
-+#define SM_ASSERT(x, do) \
-+{ \
-+ if (!(x)) \
-+ { \
-+ printk("\nSM: Assertion failed on line %d of file %s\n" \
-+ "SM: assertion: \"%s\"\n" \
-+ "SM: time = %lu\n", \
-+ __LINE__, __FILE__, #x, jiffies); \
-+ {do} \
-+ printk("\n"); \
-+ panic("SM: Record message above and reboot.\n"); \
-+ } \
-+}
-+
-+#define SM_RETRY(do_this, until_this) \
-+for (;;) \
-+{ \
-+ do { do_this; } while (0); \
-+ if (until_this) \
-+ break; \
-+ printk("SM: out of memory: %s, %u\n", __FILE__, __LINE__); \
-+ schedule();\
-+}
++ procentry->get_info = proc_cluster_status;
+
++ procentry = create_proc_entry("cluster/services", S_IRUGO, NULL);
++ if (procentry)
++ procentry->proc_fops = &service_fops;
+
-+#define log_print(fmt, args...) printk("SM: "fmt"\n", ##args)
++ /* Config entries */
++ proc_cman_config = proc_mkdir("cluster/config/cman", 0);
++ if (!proc_cman_config)
++ return;
+
-+#define log_error(sg, fmt, args...) \
-+ printk("SM: %08x " fmt "\n", (sg)->global_id , ##args)
++ for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) {
++ procentry = create_proc_entry(config_proc[i].name, 0660,
++ proc_cman_config);
++ if (procentry) {
++ procentry->data = &config_proc[i];
++ procentry->write_proc = cman_config_write_proc;
++ procentry->read_proc = cman_config_read_proc;
++ }
++ }
+
++ procentry = create_proc_entry("cluster/sm_debug", S_IRUGO, NULL);
++ if (procentry)
++ procentry->get_info = sm_debug_info;
++}
+
-+#define SM_DEBUG_LOG
++void cleanup_proc_entries(void)
++{
++ int i, config_count;
+
-+#ifdef SM_DEBUG_CONSOLE
-+#define log_debug(sg, fmt, args...) \
-+ printk("SM: %08x " fmt "\n", (sg)->global_id , ##args)
-+#endif
++ remove_proc_entry("cluster/sm_debug", NULL);
+
-+#ifdef SM_DEBUG_LOG
-+#define log_debug(sg, fmt, args...) sm_debug_log(sg, fmt, ##args);
-+#endif
++ config_count = sizeof(config_proc) / sizeof(struct config_proc_info);
+
-+#ifdef SM_DEBUG_ALL
-+#define log_debug(sg, fmt, args...) \
-+do \
-+{ \
-+ printk("SM: %08x "fmt"\n", (sg)->global_id, ##args); \
-+ sm_debug_log(sg, fmt, ##args); \
-+} \
-+while (0)
-+#endif
++ if (proc_cman_config) {
++ for (i=0; i<config_count; i++)
++ remove_proc_entry(config_proc[i].name, proc_cman_config);
++ }
++ remove_proc_entry("cluster/config/cman", NULL);
++ remove_proc_entry("cluster/config", NULL);
+
-+#endif /* __SM_DOT_H__ */
++ remove_proc_entry("cluster/nodes", NULL);
++ remove_proc_entry("cluster/status", NULL);
++ remove_proc_entry("cluster/services", NULL);
++ remove_proc_entry("cluster/config", NULL);
++ remove_proc_entry("cluster", NULL);
++}
diff -urN linux-orig/cluster/cman/sm_barrier.c linux-patched/cluster/cman/sm_barrier.c
---- linux-orig/cluster/cman/sm_barrier.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_barrier.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_barrier.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_barrier.c 2004-10-22 11:04:52.778549340 -0500
@@ -0,0 +1,232 @@
+/******************************************************************************
+*******************************************************************************
+ }
+}
diff -urN linux-orig/cluster/cman/sm_barrier.h linux-patched/cluster/cman/sm_barrier.h
---- linux-orig/cluster/cman/sm_barrier.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_barrier.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_barrier.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_barrier.h 2004-10-22 11:04:52.782549276 -0500
@@ -0,0 +1,29 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_control.c linux-patched/cluster/cman/sm_control.c
---- linux-orig/cluster/cman/sm_control.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_control.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_control.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_control.c 2004-10-22 11:04:52.805548913 -0500
@@ -0,0 +1,156 @@
+/******************************************************************************
+*******************************************************************************
+ return error;
+}
diff -urN linux-orig/cluster/cman/sm_control.h linux-patched/cluster/cman/sm_control.h
---- linux-orig/cluster/cman/sm_control.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_control.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_control.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_control.h 2004-10-22 11:04:52.812548802 -0500
@@ -0,0 +1,22 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_daemon.c linux-patched/cluster/cman/sm_daemon.c
---- linux-orig/cluster/cman/sm_daemon.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_daemon.c 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,120 @@
+--- linux-orig/cluster/cman/sm_daemon.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_daemon.c 2004-10-22 11:04:52.819548691 -0500
+@@ -0,0 +1,100 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+static unsigned long daemon_flags;
+static struct task_struct * daemon_task;
-+static struct completion daemon_done;
-+static wait_queue_head_t daemon_wait;
+extern int sm_quorum;
+
+void init_serviced(void)
+{
+ daemon_flags = 0;
+ daemon_task = NULL;
-+ init_completion(&daemon_done);
-+ init_waitqueue_head(&daemon_wait);
+}
+
+void wake_serviced(int do_flag)
+{
+ set_bit(do_flag, &daemon_flags);
-+ wake_up(&daemon_wait);
++ wake_up_process(daemon_task);
+}
+
+static inline int got_work(void)
+
+static int serviced(void *arg)
+{
-+ DECLARE_WAITQUEUE(wait, current);
-+
-+ daemonize("cman_serviced");
-+ daemon_task = current;
-+ set_bit(DO_RUN, &daemon_flags);
-+ complete(&daemon_done);
-+
-+ for (;;) {
++ while (!kthread_should_stop()) {
+ if (test_and_clear_bit(DO_START_RECOVERY, &daemon_flags))
+ process_nodechange();
+
+ process_membership();
+ }
+
-+ if (!test_bit(DO_RUN, &daemon_flags))
-+ break;
-+
-+ current->state = TASK_INTERRUPTIBLE;
-+ add_wait_queue(&daemon_wait, &wait);
-+ if (!got_work() && test_bit(DO_RUN, &daemon_flags))
++ set_current_state(TASK_INTERRUPTIBLE);
++ if (!got_work())
+ schedule();
-+ remove_wait_queue(&daemon_wait, &wait);
-+ current->state = TASK_RUNNING;
++ set_current_state(TASK_RUNNING);
+ }
+
-+ complete(&daemon_done);
+ return 0;
+}
+
+int start_serviced(void)
+{
-+ int error;
-+
-+ error = kernel_thread(serviced, NULL, 0);
-+ if (error < 0)
-+ goto out;
++ struct task_struct *p;
+
-+ error = 0;
-+ wait_for_completion(&daemon_done);
++ p = kthread_run(serviced, NULL, "cman_serviced");
++ if (IS_ERR(p)) {
++ printk("can't start cman_serviced daemon");
++ return (IS_ERR(p));
++ }
+
-+ out:
-+ return error;
++ daemon_task = p;
++ return 0;
+}
+
+void stop_serviced(void)
+{
-+ clear_bit(DO_RUN, &daemon_flags);
-+ wake_up(&daemon_wait);
-+ wait_for_completion(&daemon_done);
++ kthread_stop(daemon_task);
+}
diff -urN linux-orig/cluster/cman/sm_daemon.h linux-patched/cluster/cman/sm_daemon.h
---- linux-orig/cluster/cman/sm_daemon.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_daemon.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_daemon.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_daemon.h 2004-10-22 11:04:52.827548564 -0500
@@ -0,0 +1,32 @@
+/******************************************************************************
+*******************************************************************************
+int start_serviced(void);
+
+#endif
+diff -urN linux-orig/cluster/cman/sm.h linux-patched/cluster/cman/sm.h
+--- linux-orig/cluster/cman/sm.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm.h 2004-10-22 11:04:52.831548501 -0500
+@@ -0,0 +1,109 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __SM_DOT_H__
++#define __SM_DOT_H__
++
++/*
++ * This is the main header file to be included in each Service Manager source
++ * file.
++ */
++
++#include <linux/list.h>
++#include <linux/socket.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/file.h>
++#include <linux/kthread.h>
++#include <net/sock.h>
++
++#include <cluster/cnxman.h>
++#include <cluster/service.h>
++
++#define SG_LEVELS (4)
++
++#include "sm_internal.h"
++#include "sm_barrier.h"
++#include "sm_control.h"
++#include "sm_daemon.h"
++#include "sm_joinleave.h"
++#include "sm_membership.h"
++#include "sm_message.h"
++#include "sm_misc.h"
++#include "sm_recover.h"
++#include "sm_services.h"
++
++extern struct list_head sm_sg[SG_LEVELS];
++extern struct semaphore sm_sglock;
++
++#ifndef TRUE
++#define TRUE (1)
++#endif
++
++#ifndef FALSE
++#define FALSE (0)
++#endif
++
++#define SM_ASSERT(x, do) \
++{ \
++ if (!(x)) \
++ { \
++ printk("\nSM: Assertion failed on line %d of file %s\n" \
++ "SM: assertion: \"%s\"\n" \
++ "SM: time = %lu\n", \
++ __LINE__, __FILE__, #x, jiffies); \
++ {do} \
++ printk("\n"); \
++ panic("SM: Record message above and reboot.\n"); \
++ } \
++}
++
++#define SM_RETRY(do_this, until_this) \
++for (;;) \
++{ \
++ do { do_this; } while (0); \
++ if (until_this) \
++ break; \
++ printk("SM: out of memory: %s, %u\n", __FILE__, __LINE__); \
++ schedule();\
++}
++
++
++#define log_print(fmt, args...) printk("SM: "fmt"\n", ##args)
++
++#define log_error(sg, fmt, args...) \
++ printk("SM: %08x " fmt "\n", (sg)->global_id , ##args)
++
++
++#define SM_DEBUG_LOG
++
++#ifdef SM_DEBUG_CONSOLE
++#define log_debug(sg, fmt, args...) \
++ printk("SM: %08x " fmt "\n", (sg)->global_id , ##args)
++#endif
++
++#ifdef SM_DEBUG_LOG
++#define log_debug(sg, fmt, args...) sm_debug_log(sg, fmt, ##args);
++#endif
++
++#ifdef SM_DEBUG_ALL
++#define log_debug(sg, fmt, args...) \
++do \
++{ \
++ printk("SM: %08x "fmt"\n", (sg)->global_id, ##args); \
++ sm_debug_log(sg, fmt, ##args); \
++} \
++while (0)
++#endif
++
++#endif /* __SM_DOT_H__ */
diff -urN linux-orig/cluster/cman/sm_internal.h linux-patched/cluster/cman/sm_internal.h
---- linux-orig/cluster/cman/sm_internal.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_internal.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_internal.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_internal.h 2004-10-22 11:04:52.841548343 -0500
@@ -0,0 +1,231 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif /* __SM_INTERNAL_DOT_H__ */
diff -urN linux-orig/cluster/cman/sm_joinleave.c linux-patched/cluster/cman/sm_joinleave.c
---- linux-orig/cluster/cman/sm_joinleave.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_joinleave.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_joinleave.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_joinleave.c 2004-10-22 11:04:52.866547947 -0500
@@ -0,0 +1,1291 @@
+/******************************************************************************
+*******************************************************************************
+ }
+}
diff -urN linux-orig/cluster/cman/sm_joinleave.h linux-patched/cluster/cman/sm_joinleave.h
---- linux-orig/cluster/cman/sm_joinleave.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_joinleave.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_joinleave.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_joinleave.h 2004-10-22 11:04:52.873547836 -0500
@@ -0,0 +1,23 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_membership.c linux-patched/cluster/cman/sm_membership.c
---- linux-orig/cluster/cman/sm_membership.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_membership.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_membership.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_membership.c 2004-10-22 11:04:52.881547710 -0500
@@ -0,0 +1,696 @@
+/******************************************************************************
+*******************************************************************************
+ up(&sm_sglock);
+}
diff -urN linux-orig/cluster/cman/sm_membership.h linux-patched/cluster/cman/sm_membership.h
---- linux-orig/cluster/cman/sm_membership.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_membership.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_membership.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_membership.h 2004-10-22 11:04:52.885547647 -0500
@@ -0,0 +1,20 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_message.c linux-patched/cluster/cman/sm_message.c
---- linux-orig/cluster/cman/sm_message.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_message.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_message.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_message.c 2004-10-22 11:04:52.931546919 -0500
@@ -0,0 +1,856 @@
+/******************************************************************************
+*******************************************************************************
+ return error;
+}
diff -urN linux-orig/cluster/cman/sm_message.h linux-patched/cluster/cman/sm_message.h
---- linux-orig/cluster/cman/sm_message.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_message.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_message.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_message.h 2004-10-22 11:04:52.935546855 -0500
@@ -0,0 +1,34 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_misc.c linux-patched/cluster/cman/sm_misc.c
---- linux-orig/cluster/cman/sm_misc.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_misc.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_misc.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_misc.c 2004-10-22 11:04:52.967546349 -0500
@@ -0,0 +1,442 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_misc.h linux-patched/cluster/cman/sm_misc.h
---- linux-orig/cluster/cman/sm_misc.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_misc.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_misc.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_misc.h 2004-10-22 11:04:52.971546286 -0500
@@ -0,0 +1,29 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_recover.c linux-patched/cluster/cman/sm_recover.c
---- linux-orig/cluster/cman/sm_recover.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_recover.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_recover.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_recover.c 2004-10-22 11:04:52.987546033 -0500
@@ -0,0 +1,524 @@
+/******************************************************************************
+*******************************************************************************
+ wake_serviced(DO_RECOVERIES);
+}
diff -urN linux-orig/cluster/cman/sm_recover.h linux-patched/cluster/cman/sm_recover.h
---- linux-orig/cluster/cman/sm_recover.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_recover.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_recover.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_recover.h 2004-10-22 11:04:52.997545874 -0500
@@ -0,0 +1,23 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_services.c linux-patched/cluster/cman/sm_services.c
---- linux-orig/cluster/cman/sm_services.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_services.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_services.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_services.c 2004-10-22 11:04:53.008545700 -0500
@@ -0,0 +1,426 @@
+/******************************************************************************
+*******************************************************************************
+struct list_head sm_sg[SG_LEVELS];
+struct semaphore sm_sglock;
diff -urN linux-orig/cluster/cman/sm_services.h linux-patched/cluster/cman/sm_services.h
---- linux-orig/cluster/cman/sm_services.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_services.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_services.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_services.h 2004-10-22 11:04:53.025545431 -0500
@@ -0,0 +1,20 @@
+/******************************************************************************
+*******************************************************************************
+
+#endif
diff -urN linux-orig/cluster/cman/sm_user.c linux-patched/cluster/cman/sm_user.c
---- linux-orig/cluster/cman/sm_user.c 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_user.c 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_user.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_user.c 2004-10-22 11:04:53.033545305 -0500
@@ -0,0 +1,569 @@
+/******************************************************************************
+*******************************************************************************
+ }
+}
diff -urN linux-orig/cluster/cman/sm_user.h linux-patched/cluster/cman/sm_user.h
---- linux-orig/cluster/cman/sm_user.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/cluster/cman/sm_user.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/cluster/cman/sm_user.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/cluster/cman/sm_user.h 2004-10-22 11:04:53.040545194 -0500
@@ -0,0 +1,21 @@
+/******************************************************************************
+*******************************************************************************
+*******************************************************************************
+******************************************************************************/
+
-+#ifndef __SM_USER_DOT_H__
-+#define __SM_USER_DOT_H__
++#ifndef __SM_USER_DOT_H__
++#define __SM_USER_DOT_H__
++
++int sm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
++void sm_sock_release(struct socket *sock);
++void sm_sock_bind(struct socket *sock);
++
++#endif
+diff -urN linux-orig/include/cluster/cnxman.h linux-patched/include/cluster/cnxman.h
+--- linux-orig/include/cluster/cnxman.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/cluster/cnxman.h 2004-10-22 11:04:52.648551397 -0500
+@@ -0,0 +1,87 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __CNXMAN_H
++#define __CNXMAN_H
++
++#include "linux/in6.h"
++#include "cluster/cnxman-socket.h"
++
++/* In-kernel API */
++
++/* This is the structure, per node, returned from the membership request */
++struct kcl_cluster_node {
++ unsigned int size;
++ unsigned int node_id;
++ unsigned int us;
++ unsigned int leave_reason;
++ unsigned int incarnation;
++ nodestate_t state;
++ struct list_head list;
++ char name[MAX_CLUSTER_MEMBER_NAME_LEN];
++ unsigned char votes;
++};
++
++struct cluster_node_addr {
++ struct list_head list;
++ unsigned char addr[sizeof(struct sockaddr_in6)];/* A large sockaddr */
++ int addr_len;
++};
++
++
++/* Reasons for a kernel membership callback */
++typedef enum { CLUSTER_RECONFIG, DIED, LEAVING, NEWNODE } kcl_callback_reason;
++
++/* Kernel version of above, the void *sock is a struct socket */
++struct kcl_multicast_sock {
++ void *sock;
++ int number; /* Socket number, to match up recvonly & bcast
++ * sockets */
++};
++
++extern int kcl_sendmsg(struct socket *sock, void *buf, int size,
++ struct sockaddr_cl *caddr, int addr_len,
++ unsigned int flags);
++extern int kcl_register_read_callback(struct socket *sock,
++ int (*routine) (char *, int, char *, int,
++ unsigned int));
++extern int kcl_add_callback(void (*callback) (kcl_callback_reason, long));
++extern int kcl_remove_callback(void (*callback) (kcl_callback_reason, long));
++extern int kcl_get_members(struct list_head *list);
++extern int kcl_get_member_ids(uint32_t * idbuf, int size);
++extern int kcl_get_all_members(struct list_head *list);
++extern int kcl_get_node_by_addr(unsigned char *addr, int addr_len,
++ struct kcl_cluster_node *n);
++extern int kcl_get_node_by_name(unsigned char *name,
++ struct kcl_cluster_node *n);
++extern int kcl_get_node_by_nodeid(int nodeid, struct kcl_cluster_node *n);
++extern int kcl_is_quorate(void);
++extern int kcl_addref_cluster(void);
++extern int kcl_releaseref_cluster(void);
++extern int kcl_cluster_name(char **cname);
++extern int kcl_get_current_interface(void);
++extern struct list_head *kcl_get_node_addresses(int nodeid);
++
++extern int kcl_barrier_register(char *name, unsigned int flags,
++ unsigned int nodes);
++extern int kcl_barrier_setattr(char *name, unsigned int attr,
++ unsigned long arg);
++extern int kcl_barrier_delete(char *name);
++extern int kcl_barrier_wait(char *name);
++extern int kcl_barrier_cancel(char *name);
+
-+int sm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
-+void sm_sock_release(struct socket *sock);
-+void sm_sock_bind(struct socket *sock);
++extern int kcl_register_quorum_device(char *name, int votes);
++extern int kcl_unregister_quorum_device(void);
++extern int kcl_quorum_device_available(int yesno);
+
+#endif
diff -urN linux-orig/include/cluster/cnxman-socket.h linux-patched/include/cluster/cnxman-socket.h
---- linux-orig/include/cluster/cnxman-socket.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/include/cluster/cnxman-socket.h 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,226 @@
+--- linux-orig/include/cluster/cnxman-socket.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/cluster/cnxman-socket.h 2004-10-22 11:04:52.682550859 -0500
+@@ -0,0 +1,227 @@
+/******************************************************************************
+*******************************************************************************
+**
+ */
+#define MSG_ALLINT 0x100000 /* Send out of all interfaces */
+#define MSG_REPLYEXP 0x200000 /* Reply is expected */
++#define MSG_BCASTSELF 0x400000 /* Broadcast message also gets send to us */
+
-+typedef enum { NODESTATE_REMOTEMEMBER, NODESTATE_JOINING, NODESTATE_MEMBER,
++typedef enum { NODESTATE_JOINING=1, NODESTATE_MEMBER,
+ NODESTATE_DEAD } nodestate_t;
+
+
+#define BARRIER_SETATTR_TIMEOUT 6
+
+#endif
-diff -urN linux-orig/include/cluster/cnxman.h linux-patched/include/cluster/cnxman.h
---- linux-orig/include/cluster/cnxman.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/include/cluster/cnxman.h 2004-09-03 18:13:05.000000000 +0800
-@@ -0,0 +1,87 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#ifndef __CNXMAN_H
-+#define __CNXMAN_H
-+
-+#include "linux/in6.h"
-+#include "cluster/cnxman-socket.h"
-+
-+/* In-kernel API */
-+
-+/* This is the structure, per node, returned from the membership request */
-+struct kcl_cluster_node {
-+ unsigned int size;
-+ unsigned int node_id;
-+ unsigned int us;
-+ unsigned int leave_reason;
-+ unsigned int incarnation;
-+ nodestate_t state;
-+ struct list_head list;
-+ char name[MAX_CLUSTER_MEMBER_NAME_LEN];
-+ unsigned char votes;
-+};
-+
-+struct cluster_node_addr {
-+ struct list_head list;
-+ unsigned char addr[sizeof(struct sockaddr_in6)];/* A large sockaddr */
-+ int addr_len;
-+};
-+
-+
-+/* Reasons for a kernel membership callback */
-+typedef enum { CLUSTER_RECONFIG, DIED, LEAVING, NEWNODE } kcl_callback_reason;
-+
-+/* Kernel version of above, the void *sock is a struct socket */
-+struct kcl_multicast_sock {
-+ void *sock;
-+ int number; /* Socket number, to match up recvonly & bcast
-+ * sockets */
-+};
-+
-+extern int kcl_sendmsg(struct socket *sock, void *buf, int size,
-+ struct sockaddr_cl *caddr, int addr_len,
-+ unsigned int flags);
-+extern int kcl_register_read_callback(struct socket *sock,
-+ int (*routine) (char *, int, char *, int,
-+ unsigned int));
-+extern int kcl_add_callback(void (*callback) (kcl_callback_reason, long));
-+extern int kcl_remove_callback(void (*callback) (kcl_callback_reason, long));
-+extern int kcl_get_members(struct list_head *list);
-+extern int kcl_get_member_ids(uint32_t * idbuf, int size);
-+extern int kcl_get_all_members(struct list_head *list);
-+extern int kcl_get_node_by_addr(unsigned char *addr, int addr_len,
-+ struct kcl_cluster_node *n);
-+extern int kcl_get_node_by_name(unsigned char *name,
-+ struct kcl_cluster_node *n);
-+extern int kcl_get_node_by_nodeid(int nodeid, struct kcl_cluster_node *n);
-+extern int kcl_is_quorate(void);
-+extern int kcl_addref_cluster(void);
-+extern int kcl_releaseref_cluster(void);
-+extern int kcl_cluster_name(char **cname);
-+extern int kcl_get_current_interface(void);
-+extern struct list_head *kcl_get_node_addresses(int nodeid);
-+
-+extern int kcl_barrier_register(char *name, unsigned int flags,
-+ unsigned int nodes);
-+extern int kcl_barrier_setattr(char *name, unsigned int attr,
-+ unsigned long arg);
-+extern int kcl_barrier_delete(char *name);
-+extern int kcl_barrier_wait(char *name);
-+extern int kcl_barrier_cancel(char *name);
-+
-+extern int kcl_register_quorum_device(char *name, int votes);
-+extern int kcl_unregister_quorum_device(void);
-+extern int kcl_quorum_device_available(int yesno);
-+
-+#endif
diff -urN linux-orig/include/cluster/service.h linux-patched/include/cluster/service.h
---- linux-orig/include/cluster/service.h 1970-01-01 07:30:00.000000000 +0730
-+++ linux-patched/include/cluster/service.h 2004-09-03 18:13:05.000000000 +0800
+--- linux-orig/include/cluster/service.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/cluster/service.h 2004-10-22 11:04:52.741549925 -0500
@@ -0,0 +1,102 @@
+/******************************************************************************
+*******************************************************************************
+void kcl_start_done(uint32_t local_id, int event_id);
+
+#endif
+diff -urN linux-orig/arch/alpha/Kconfig linux-orig2/arch/alpha/Kconfig
+--- linux-orig/arch/alpha/Kconfig 2004-10-18 16:55:37.000000000 -0500
++++ linux-orig2/arch/alpha/Kconfig 2004-10-22 11:29:33.507218717 -0500
+@@ -600,3 +600,4 @@
+
+ source "lib/Kconfig"
+
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/arm/Kconfig linux-orig2/arch/arm/Kconfig
+--- linux-orig/arch/arm/Kconfig 2004-10-18 16:54:31.000000000 -0500
++++ linux-orig2/arch/arm/Kconfig 2004-10-22 11:30:56.358918506 -0500
+@@ -690,3 +690,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/arm26/Kconfig linux-orig2/arch/arm26/Kconfig
+--- linux-orig/arch/arm26/Kconfig 2004-10-18 16:54:32.000000000 -0500
++++ linux-orig2/arch/arm26/Kconfig 2004-10-22 11:29:33.531218341 -0500
+@@ -222,3 +222,4 @@
+
+ source "lib/Kconfig"
+
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/cris/Kconfig linux-orig2/arch/cris/Kconfig
+--- linux-orig/arch/cris/Kconfig 2004-10-18 16:55:07.000000000 -0500
++++ linux-orig2/arch/cris/Kconfig 2004-10-22 11:31:11.965673644 -0500
+@@ -174,3 +174,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/i386/Kconfig linux-orig2/arch/i386/Kconfig
+--- linux-orig/arch/i386/Kconfig 2004-10-18 16:53:22.000000000 -0500
++++ linux-orig2/arch/i386/Kconfig 2004-10-22 11:29:33.533218309 -0500
+@@ -1194,6 +1194,8 @@
+
+ source "lib/Kconfig"
+
++source "cluster/Kconfig"
++
+ config X86_SMP
+ bool
+ depends on SMP && !X86_VOYAGER
+diff -urN linux-orig/arch/ia64/Kconfig linux-orig2/arch/ia64/Kconfig
+--- linux-orig/arch/ia64/Kconfig 2004-10-18 16:55:27.000000000 -0500
++++ linux-orig2/arch/ia64/Kconfig 2004-10-22 11:29:33.534218294 -0500
+@@ -390,3 +390,5 @@
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/m68k/Kconfig linux-orig2/arch/m68k/Kconfig
+--- linux-orig/arch/m68k/Kconfig 2004-10-18 16:54:32.000000000 -0500
++++ linux-orig2/arch/m68k/Kconfig 2004-10-22 11:31:38.187262279 -0500
+@@ -655,3 +655,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/mips/Kconfig linux-orig2/arch/mips/Kconfig
+--- linux-orig/arch/mips/Kconfig 2004-10-18 16:54:08.000000000 -0500
++++ linux-orig2/arch/mips/Kconfig 2004-10-22 11:29:33.541218184 -0500
+@@ -1563,3 +1563,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/parisc/Kconfig linux-orig2/arch/parisc/Kconfig
+--- linux-orig/arch/parisc/Kconfig 2004-10-18 16:54:37.000000000 -0500
++++ linux-orig2/arch/parisc/Kconfig 2004-10-22 11:31:57.146964867 -0500
+@@ -195,3 +195,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/ppc/Kconfig linux-orig2/arch/ppc/Kconfig
+--- linux-orig/arch/ppc/Kconfig 2004-10-18 16:55:29.000000000 -0500
++++ linux-orig2/arch/ppc/Kconfig 2004-10-22 11:29:33.550218043 -0500
+@@ -1231,3 +1231,5 @@
+ source "security/Kconfig"
+
+ source "crypto/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/ppc64/Kconfig linux-orig2/arch/ppc64/Kconfig
+--- linux-orig/arch/ppc64/Kconfig 2004-10-18 16:54:31.000000000 -0500
++++ linux-orig2/arch/ppc64/Kconfig 2004-10-22 11:32:11.150745212 -0500
+@@ -352,3 +352,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/s390/Kconfig linux-orig2/arch/s390/Kconfig
+--- linux-orig/arch/s390/Kconfig 2004-10-18 16:53:51.000000000 -0500
++++ linux-orig2/arch/s390/Kconfig 2004-10-22 11:32:31.175431141 -0500
+@@ -466,3 +466,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/sh/Kconfig linux-orig2/arch/sh/Kconfig
+--- linux-orig/arch/sh/Kconfig 2004-10-18 16:55:29.000000000 -0500
++++ linux-orig2/arch/sh/Kconfig 2004-10-22 11:32:47.169180310 -0500
+@@ -748,3 +748,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/sparc/Kconfig linux-orig2/arch/sparc/Kconfig
+--- linux-orig/arch/sparc/Kconfig 2004-10-18 16:53:05.000000000 -0500
++++ linux-orig2/arch/sparc/Kconfig 2004-10-22 11:33:06.891871022 -0500
+@@ -386,3 +386,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/sparc64/Kconfig linux-orig2/arch/sparc64/Kconfig
+--- linux-orig/arch/sparc64/Kconfig 2004-10-18 16:55:06.000000000 -0500
++++ linux-orig2/arch/sparc64/Kconfig 2004-10-22 11:33:19.290676599 -0500
+@@ -613,3 +613,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/arch/um/Kconfig linux-orig2/arch/um/Kconfig
+--- linux-orig/arch/um/Kconfig 2004-10-18 16:54:08.000000000 -0500
++++ linux-orig2/arch/um/Kconfig 2004-10-22 11:29:33.564217823 -0500
+@@ -225,6 +225,8 @@
+
+ source "lib/Kconfig"
+
++source "cluster/Kconfig"
++
+ menu "SCSI support"
+ depends on BROKEN
+
+diff -urN linux-orig/arch/x86_64/Kconfig linux-orig2/arch/x86_64/Kconfig
+--- linux-orig/arch/x86_64/Kconfig 2004-10-18 16:54:55.000000000 -0500
++++ linux-orig2/arch/x86_64/Kconfig 2004-10-22 11:33:37.130396876 -0500
+@@ -424,3 +424,5 @@
+ source "crypto/Kconfig"
+
+ source "lib/Kconfig"
++
++source "cluster/Kconfig"
+diff -urN linux-orig/cluster/cman/Makefile linux-orig2/cluster/cman/Makefile
+--- linux-orig/cluster/cman/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux-orig2/cluster/cman/Makefile 2004-10-22 11:29:33.566217791 -0500
+@@ -0,0 +1,6 @@
++cman-objs := cnxman.o config.o membership.o proc.o\
++ sm_barrier.o sm_control.o sm_daemon.o sm_joinleave.o\
++ sm_membership.o sm_message.o sm_misc.o sm_recover.o sm_services.o \
++ sm_user.o
++
++obj-$(CONFIG_CLUSTER) := cman.o
+diff -urN linux-orig/cluster/Kconfig linux-orig2/cluster/Kconfig
+--- linux-orig/cluster/Kconfig 1969-12-31 18:00:00.000000000 -0600
++++ linux-orig2/cluster/Kconfig 2004-10-22 11:29:33.565217807 -0500
+@@ -0,0 +1,13 @@
++menu "Cluster Support"
++
++config CLUSTER
++ tristate "Cluster support"
++ ---help---
++ Enable clustering support. This is not the high-performance clustering
++ made famous by beowulf. It is a high-availability cluster often using
++ shared storage.
++ The cluster manager is the heart(beat) of the cluster system. It is
++ needed by all the other components. It provides membership services
++ for those other subsystems.
++
++endmenu
+diff -urN linux-orig/cluster/Makefile linux-orig2/cluster/Makefile
+--- linux-orig/cluster/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux-orig2/cluster/Makefile 2004-10-22 11:29:33.566217791 -0500
+@@ -0,0 +1,3 @@
++obj-y := nocluster.o
++
++obj-$(CONFIG_CLUSTER) += cman/
+diff -urN linux-orig/cluster/nocluster.c linux-orig2/cluster/nocluster.c
+--- linux-orig/cluster/nocluster.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-orig2/cluster/nocluster.c 2004-10-22 11:29:33.567217776 -0500
+@@ -0,0 +1,20 @@
++/*
++ * cluster/nocluster.c
++ *
++ * Copy from net/nonet.c
++ * Dummy functions to allow us to configure cluster support entirely
++ * out of the kernel.
++ *
++ * Distributed under the terms of the GNU GPL version 2.
++ * Copyright (c) Matthew Wilcox 2003
++ */
++
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/kernel.h>
++
++void __init nocluster_init(void)
++{
++}
+diff -urN linux-orig/Makefile linux-orig2/Makefile
+--- linux-orig/Makefile 2004-10-18 16:54:38.000000000 -0500
++++ linux-orig2/Makefile 2004-10-22 11:29:33.507218717 -0500
+@@ -445,7 +445,7 @@
+
+ # Objects we will link into vmlinux / subdirs we need to visit
+ init-y := init/
+-drivers-y := drivers/ sound/
++drivers-y := drivers/ sound/ cluster/
+ net-y := net/
+ libs-y := lib/
+ core-y := usr/
# Add lock harness to the build system.
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Kconfig linux/fs/Kconfig
---- linux-2.6.9-rc1-mm3/fs/Kconfig 2004-09-03 13:48:43.462073082 -0500
-+++ linux/fs/Kconfig 2004-09-03 13:48:46.109467766 -0500
-@@ -1779,6 +1779,14 @@ config AFS_FS
+diff -urN -p linux-2.6.9/fs/Kconfig linux/fs/Kconfig
+--- linux-2.6.9/fs/Kconfig 2004-10-27 02:11:48.029120217 -0500
++++ linux/fs/Kconfig 2004-10-27 02:11:54.814210981 -0500
+@@ -1768,6 +1768,14 @@ config AFS_FS
config RXRPC
tristate
endmenu
menu "Partition Types"
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Makefile linux/fs/Makefile
---- linux-2.6.9-rc1-mm3/fs/Makefile 2004-09-03 13:48:43.462073082 -0500
-+++ linux/fs/Makefile 2004-09-03 13:48:46.110467538 -0500
-@@ -93,3 +93,5 @@
+diff -urN -p linux-2.6.9/fs/Makefile linux/fs/Makefile
+--- linux-2.6.9/fs/Makefile 2004-10-27 02:11:48.029120217 -0500
++++ linux/fs/Makefile 2004-10-27 02:11:54.815210700 -0500
+@@ -93,3 +93,4 @@ obj-$(CONFIG_AFS_FS) += afs/
obj-$(CONFIG_BEFS_FS) += befs/
obj-$(CONFIG_HOSTFS) += hostfs/
obj-$(CONFIG_HPPFS) += hppfs/
+obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/
-+obj-$(CONFIG_GFS_FS) += gfs/
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/Makefile 2004-09-03 13:48:46.110467538 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
+--- linux-2.6.9/fs/gfs_locking/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs_locking/Makefile 2004-10-27 02:11:54.815210700 -0500
@@ -0,0 +1,14 @@
+###############################################################################
+###############################################################################
+
+obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
+
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_harness/Makefile linux/fs/gfs_locking/lock_harness/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_harness/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_harness/Makefile 2004-09-03 13:48:46.110467538 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/lock_harness/Makefile linux/fs/gfs_locking/lock_harness/Makefile
+--- linux-2.6.9/fs/gfs_locking/lock_harness/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs_locking/lock_harness/Makefile 2004-10-27 02:11:54.815210700 -0500
@@ -0,0 +1,16 @@
+###############################################################################
+###############################################################################
+lock_harness-y := main.o
+
# Add GFS to the build system.
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Kconfig linux/fs/Kconfig
---- linux-2.6.9-rc1-mm3/fs/Kconfig 2004-09-03 13:48:49.783627367 -0500
-+++ linux/fs/Kconfig 2004-09-03 13:48:49.795624622 -0500
-@@ -285,13 +285,13 @@ config JFS_STATISTICS
+diff -urN -p linux-2.6.9/fs/Kconfig linux/fs/Kconfig
+--- linux-2.6.9/fs/Kconfig 2004-10-27 02:11:58.446188798 -0500
++++ linux/fs/Kconfig 2004-10-27 02:11:58.457185703 -0500
+@@ -283,13 +283,13 @@ config JFS_STATISTICS
to be made available to the user in the /proc/fs/jfs/ directory.
config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs)
-+# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/gfs)
++# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/GFS)
#
# NOTE: you can implement Posix ACLs without these helpers (XFS does).
# Never use this symbol for ifdefs.
#
bool
- depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4
-+ depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || GFS_FS || NFSD_V4
++ depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4 || GFS_FS
default y
config XFS_FS
-@@ -1787,6 +1787,20 @@ config LOCK_HARNESS
+@@ -1776,6 +1776,20 @@ config LOCK_HARNESS
If you want to use GFS (a cluster filesystem) say Y here.
endmenu
menu "Partition Types"
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs/Makefile linux/fs/gfs/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/Makefile 2004-09-03 13:48:49.795624622 -0500
-@@ -0,0 +1,50 @@
+diff -urN -p linux-2.6.9/fs/Makefile linux/fs/Makefile
+--- linux-2.6.9/fs/Makefile 2004-10-27 02:11:58.446188798 -0500
++++ linux/fs/Makefile 2004-10-27 02:11:58.457185703 -0500
+@@ -94,3 +94,4 @@ obj-$(CONFIG_BEFS_FS) += befs/
+ obj-$(CONFIG_HOSTFS) += hostfs/
+ obj-$(CONFIG_HPPFS) += hppfs/
+ obj-$(CONFIG_LOCK_HARNESS) += gfs_locking/
++obj-$(CONFIG_GFS_FS) += gfs/
+diff -urN -p linux-2.6.9/fs/gfs/Makefile linux/fs/gfs/Makefile
+--- linux-2.6.9/fs/gfs/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs/Makefile 2004-10-27 02:11:58.458185422 -0500
+@@ -0,0 +1,51 @@
+###############################################################################
+###############################################################################
+##
+ daemon.o \
+ dio.o \
+ dir.o \
++ eaops.o \
+ eattr.o \
+ file.o \
+ glock.o \
+ glops.o \
+ inode.o \
+ ioctl.o \
-+ locking.o \
+ log.o \
+ lops.o \
+ lvb.o \
+ ops_super.o \
+ ops_vm.o \
+ page.o \
++ proc.o \
+ quota.o \
+ recovery.o \
+ rgrp.o \
+ util.o
+
# Add lock_nolock to the build system.
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Kconfig linux/fs/Kconfig
---- linux-2.6.9-rc1-mm3/fs/Kconfig 2004-09-03 13:48:53.622748847 -0500
-+++ linux/fs/Kconfig 2004-09-03 13:48:53.633746330 -0500
-@@ -1801,6 +1801,12 @@ config GFS_FS
+diff -urN -p linux-2.6.9/fs/Kconfig linux/fs/Kconfig
+--- linux-2.6.9/fs/Kconfig 2004-10-27 02:12:02.208130033 -0500
++++ linux/fs/Kconfig 2004-10-27 02:12:02.219126938 -0500
+@@ -1790,6 +1790,12 @@ config GFS_FS
GFS is perfect consistency -- changes made to the filesystem on one
machine show up immediately on all other machines in the cluster.
endmenu
menu "Partition Types"
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile 2004-09-03 13:48:49.784627138 -0500
-+++ linux/fs/gfs_locking/Makefile 2004-09-03 13:48:53.634746101 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
+--- linux-2.6.9/fs/gfs_locking/Makefile 2004-10-27 02:11:58.446188798 -0500
++++ linux/fs/gfs_locking/Makefile 2004-10-27 02:12:02.220126656 -0500
@@ -11,4 +11,5 @@
###############################################################################
obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
+obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_nolock/Makefile linux/fs/gfs_locking/lock_nolock/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_nolock/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_nolock/Makefile 2004-09-03 13:48:53.634746101 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/lock_nolock/Makefile linux/fs/gfs_locking/lock_nolock/Makefile
+--- linux-2.6.9/fs/gfs_locking/lock_nolock/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs_locking/lock_nolock/Makefile 2004-10-27 02:12:02.220126656 -0500
@@ -0,0 +1,16 @@
+###############################################################################
+###############################################################################
+lock_nolock-y := main.o
+
# Add lock_dlm to the build system.
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Kconfig linux/fs/Kconfig
---- linux-2.6.9-rc1-mm3/fs/Kconfig 2004-09-03 13:48:57.245919384 -0500
-+++ linux/fs/Kconfig 2004-09-03 13:48:57.256916866 -0500
-@@ -1807,6 +1807,12 @@ config LOCK_NOLOCK
+diff -urN -p linux-2.6.9/fs/Kconfig linux/fs/Kconfig
+--- linux-2.6.9/fs/Kconfig 2004-10-27 02:12:05.867100243 -0500
++++ linux/fs/Kconfig 2004-10-27 02:12:05.877097429 -0500
+@@ -1796,6 +1796,12 @@ config LOCK_NOLOCK
help
A "fake" lock module that allows GFS to run as a local filesystem.
endmenu
menu "Partition Types"
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile 2004-09-03 13:48:57.245919384 -0500
-+++ linux/fs/gfs_locking/Makefile 2004-09-03 13:48:57.256916866 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
+--- linux-2.6.9/fs/gfs_locking/Makefile 2004-10-27 02:12:05.867100243 -0500
++++ linux/fs/gfs_locking/Makefile 2004-10-27 02:12:05.878097148 -0500
@@ -12,4 +12,5 @@
obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
+obj-$(CONFIG_LOCK_DLM) += lock_dlm/
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_dlm/Makefile linux/fs/gfs_locking/lock_dlm/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_dlm/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_dlm/Makefile 2004-09-03 13:48:57.257916637 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/lock_dlm/Makefile linux/fs/gfs_locking/lock_dlm/Makefile
+--- linux-2.6.9/fs/gfs_locking/lock_dlm/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs_locking/lock_dlm/Makefile 2004-10-27 02:12:05.878097148 -0500
@@ -0,0 +1,16 @@
+###############################################################################
+###############################################################################
+lock_dlm-y := main.o group.o lock.o mount.o thread.o plock.o
+
# Add lock_gulm to the build system.
-diff -urN -p linux-2.6.9-rc1-mm3/fs/Kconfig linux/fs/Kconfig
---- linux-2.6.9-rc1-mm3/fs/Kconfig 2004-09-03 13:49:00.962068274 -0500
-+++ linux/fs/Kconfig 2004-09-03 13:49:00.973065755 -0500
-@@ -1813,6 +1813,12 @@ config LOCK_DLM
+diff -urN -p linux-2.6.9/fs/Kconfig linux/fs/Kconfig
+--- linux-2.6.9/fs/Kconfig 2004-10-27 02:12:09.627042029 -0500
++++ linux/fs/Kconfig 2004-10-27 02:12:09.638038934 -0500
+@@ -1802,6 +1802,12 @@ config LOCK_DLM
help
A lock module that allows GFS to use a Distributed Lock Manager.
endmenu
menu "Partition Types"
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/Makefile 2004-09-03 13:49:00.962068274 -0500
-+++ linux/fs/gfs_locking/Makefile 2004-09-03 13:49:00.973065755 -0500
+diff -urN -p linux-2.6.9/fs/gfs_locking/Makefile linux/fs/gfs_locking/Makefile
+--- linux-2.6.9/fs/gfs_locking/Makefile 2004-10-27 02:12:09.628041748 -0500
++++ linux/fs/gfs_locking/Makefile 2004-10-27 02:12:09.639038653 -0500
@@ -13,4 +13,5 @@
obj-$(CONFIG_LOCK_HARNESS) += lock_harness/
obj-$(CONFIG_LOCK_NOLOCK) += lock_nolock/
obj-$(CONFIG_LOCK_DLM) += lock_dlm/
+obj-$(CONFIG_LOCK_GULM) += lock_gulm/
-diff -urN -p linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/Makefile linux/fs/gfs_locking/lock_gulm/Makefile
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/Makefile 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/Makefile 2004-09-03 13:49:00.973065755 -0500
-@@ -0,0 +1,32 @@
+diff -urN -p linux-2.6.9/fs/gfs_locking/lock_gulm/Makefile linux/fs/gfs_locking/lock_gulm/Makefile
+--- linux-2.6.9/fs/gfs_locking/lock_gulm/Makefile 1969-12-31 18:00:00.000000000 -0600
++++ linux/fs/gfs_locking/lock_gulm/Makefile 2004-10-27 02:12:09.639038653 -0500
+@@ -0,0 +1,31 @@
+###############################################################################
+###############################################################################
+##
+obj-$(CONFIG_LOCK_GULM) += lock_gulm.o
+
+lock_gulm-y := gulm_core.o \
++ gulm_firstlock.o \
+ gulm_fs.o \
+ gulm_jid.o \
++ gulm_lock_queue.o \
+ gulm_lt.o \
-+ gulm_procinfo.o \
++ gulm_main.o \
++ gulm_plock.o \
+ handler.o \
+ lg_core.o \
+ lg_lock.o \
+ lg_main.o \
-+ linux_gulm_main.o \
-+ load_info.o \
-+ util.o \
-+ utils_crc.o \
+ utils_tostr.o \
+ xdr_base.o \
+ xdr_io.o \
+ xdr_socket.o
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/acl.c linux/fs/gfs/acl.c
---- linux-2.6.9-rc1-mm3/fs/gfs/acl.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/acl.c 2004-09-07 16:26:15.728555458 -0500
-@@ -0,0 +1,397 @@
+diff -urN linux-orig/fs/gfs/acl.c linux-patched/fs/gfs/acl.c
+--- linux-orig/fs/gfs/acl.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/acl.c 2004-10-27 15:27:10.877631457 -0500
+@@ -0,0 +1,375 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <asm/semaphore.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
++#include <linux/posix_acl.h>
++#include <linux/posix_acl_xattr.h>
+#include <linux/xattr_acl.h>
+
+#include "gfs.h"
+#include "acl.h"
-+#include "dio.h"
+#include "eattr.h"
-+#include "glock.h"
-+#include "trans.h"
+#include "inode.h"
+
-+/*
-+ * Check to make sure that the acl is actually valid
-+ */
++/**
++ * gfs_acl_validate_set -
++ * @ip:
++ * @access:
++ * @er:
++ * @mode:
++ * @remove:
++ *
++ * Returns: errno
++ */
++
+int
-+gfs_validate_acl(struct gfs_inode *ip, const char *value, int size, int access)
++gfs_acl_validate_set(struct gfs_inode *ip, int access,
++ struct gfs_ea_request *er,
++ mode_t *mode, int *remove)
+{
-+ int err = 0;
-+ struct posix_acl *acl = NULL;
-+ struct gfs_sbd *sdp = ip->i_sbd;
++ struct posix_acl *acl;
++ int error;
+
-+ if ((current->fsuid != ip->i_di.di_uid) && !capable(CAP_FOWNER))
-+ return -EPERM;
-+ if (ip->i_di.di_type == GFS_FILE_LNK)
-+ return -EOPNOTSUPP;
-+ if (!access && ip->i_di.di_type != GFS_FILE_DIR)
-+ return -EACCES;
-+ if (!sdp->sd_args.ar_posixacls)
-+ return -EOPNOTSUPP;
++ error = gfs_acl_validate_remove(ip, access);
++ if (error)
++ return error;
+
-+ if (value) {
-+ acl = posix_acl_from_xattr(value, size);
-+ if (IS_ERR(acl))
-+ return PTR_ERR(acl);
-+ else if (acl) {
-+ err = posix_acl_valid(acl);
-+ posix_acl_release(acl);
-+ }
++ if (!er->er_data)
++ return -EINVAL;
++
++ acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
++ if (IS_ERR(acl))
++ return PTR_ERR(acl);
++
++ error = posix_acl_valid(acl);
++ if (error) {
++ posix_acl_release(acl);
++ return error;
+ }
-+ return err;
-+}
+
-+void
-+gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl)
-+{
-+ struct inode *inode;
-+ mode_t mode;
++ if (access) {
++ error = posix_acl_equiv_mode(acl, mode);
++ posix_acl_release(acl);
++ if (error < 0)
++ return error;
++ if (!error)
++ *remove = TRUE;
++ }
+
-+ inode = gfs_iget(ip, NO_CREATE);
-+ mode = inode->i_mode;
-+ posix_acl_equiv_mode(acl, &mode);
-+ inode->i_mode = mode;
-+ iput(inode);
-+ gfs_inode_attr_out(ip);
++ return 0;
+}
+
-+
+/**
-+ * gfs_replace_acl - replace the value of the ea to the value of the acl
++ * gfs_acl_validate_remove -
++ * @ip:
++ * @access:
+ *
-+ * NOTE: The new value must be the same size as the old one.
++ * Returns: errno
+ */
++
+int
-+gfs_replace_acl(struct inode *inode, struct posix_acl *acl, int access,
-+ struct gfs_ea_location location)
++gfs_acl_validate_remove(struct gfs_inode *ip, int access)
+{
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_easet_io req;
-+ int size;
-+ void *data;
-+ int error;
-+
-+ size = posix_acl_to_xattr(acl, NULL, 0);
-+ GFS_ASSERT(size == GFS_EA_DATA_LEN(location.ea),
-+ printk("new acl size = %d, ea size = %u\n", size,
-+ GFS_EA_DATA_LEN(location.ea)););
-+
-+ data = gmalloc(size);
-+
-+ posix_acl_to_xattr(acl, data, size);
-+
-+ req.es_data = data;
-+ req.es_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
-+ req.es_data_len = size;
-+ req.es_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
-+ req.es_cmd = GFS_EACMD_REPLACE;
-+ req.es_type = GFS_EATYPE_SYS;
-+
-+ error = replace_ea(ip->i_sbd, ip, location.ea, &req);
-+ if (!error)
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+
-+ kfree(data);
++ if (!ip->i_sbd->sd_args.ar_posix_acls)
++ return -EOPNOTSUPP;
++ if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER))
++ return -EPERM;
++ if (ip->i_di.di_type == GFS_FILE_LNK)
++ return -EOPNOTSUPP;
++ if (!access && ip->i_di.di_type != GFS_FILE_DIR)
++ return -EACCES;
+
-+ return error;
++ return 0;
+}
+
+/**
-+ * gfs_findacl - returns the requested posix acl
++ * gfs_acl_get -
++ * @ip:
++ * @access:
++ * @acl:
+ *
-+ * this function does not log the inode. It assumes that a lock is already
-+ * held on it.
++ * Returns: errno
+ */
++
+int
-+gfs_findacl(struct gfs_inode *ip, int access, struct posix_acl **acl_ptr,
-+ struct gfs_ea_location *location)
++gfs_acl_get(struct gfs_inode *ip, int access, struct posix_acl **acl)
+{
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct posix_acl *acl;
-+ uint32_t avail_size;
-+ void *data;
++ struct gfs_ea_request er;
++ struct gfs_ea_location el;
+ int error;
+
-+ avail_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
-+ *acl_ptr = NULL;
-+
+ if (!ip->i_di.di_eattr)
+ return 0;
+
-+ error = find_eattr(ip,
-+ (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT,
-+ (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN,
-+ GFS_EATYPE_SYS, location);
-+ if (error <= 0)
-+ return error;
-+
-+ data = gmalloc(GFS_EA_DATA_LEN(location->ea));
-+
-+ error = 0;
-+ if (GFS_EA_IS_UNSTUFFED(location->ea))
-+ error = read_unstuffed(data, ip, sdp, location->ea, avail_size,
-+ gfs_ea_memcpy);
-+ else
-+ gfs_ea_memcpy(data, GFS_EA_DATA(location->ea),
-+ GFS_EA_DATA_LEN(location->ea));
-+ if (error)
-+ goto out;
-+
-+ acl = posix_acl_from_xattr(data, GFS_EA_DATA_LEN(location->ea));
-+ if (IS_ERR(acl))
-+ error = PTR_ERR(acl);
-+ else
-+ *acl_ptr = acl;
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ if (access) {
++ er.er_name = GFS_POSIX_ACL_ACCESS;
++ er.er_name_len = GFS_POSIX_ACL_ACCESS_LEN;
++ } else {
++ er.er_name = GFS_POSIX_ACL_DEFAULT;
++ er.er_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
++ }
++ er.er_type = GFS_EATYPE_SYS;
+
-+ out:
-+ kfree(data);
++ error = gfs_ea_find(ip, &er, &el);
+ if (error)
-+ brelse(location->bh);
-+
-+ return error;
-+}
-+
-+int
-+gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr)
-+{
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_eaget_io req;
-+ struct posix_acl *acl;
-+ int size;
-+ void *data;
-+ int error = 0;
-+
-+ *acl_ptr = NULL;
-+
-+ if (!sdp->sd_args.ar_posixacls)
++ return error;
++ if (!el.el_ea)
+ return 0;
++ if (!GFS_EA_DATA_LEN(el.el_ea))
++ goto out;
+
-+ req.eg_name = (access) ? GFS_POSIX_ACL_ACCESS : GFS_POSIX_ACL_DEFAULT;
-+ req.eg_name_len = (access) ? GFS_POSIX_ACL_ACCESS_LEN : GFS_POSIX_ACL_DEFAULT_LEN;
-+ req.eg_type = GFS_EATYPE_SYS;
-+ req.eg_len = NULL;
-+ req.eg_data = NULL;
-+ req.eg_data_len = 0;
++ er.er_data = kmalloc(GFS_EA_DATA_LEN(el.el_ea), GFP_KERNEL);
++ error = -ENOMEM;
++ if (!er.er_data)
++ goto out;
+
-+ error = gfs_ea_read_permission(&req, ip);
++ error = gfs_ea_get_copy(ip, &el, er.er_data);
+ if (error)
-+ return error;
++ goto out_kfree;
+
-+ if (!ip->i_di.di_eattr)
-+ return error;
++ *acl = posix_acl_from_xattr(er.er_data, GFS_EA_DATA_LEN(el.el_ea));
++ if (IS_ERR(*acl))
++ error = PTR_ERR(*acl);
+
-+ size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
-+ if (size < 0) {
-+ if (size != -ENODATA)
-+ error = size;
-+ return error;
-+ }
-+
-+ data = gmalloc(size);
-+
-+ req.eg_data = data;
-+ req.eg_data_len = size;
-+
-+ size = get_ea(sdp, ip, &req, gfs_ea_memcpy);
-+ if (size < 0) {
-+ error = size;
-+ goto out_free;
-+ }
-+
-+ acl = posix_acl_from_xattr(data, size);
-+ if (IS_ERR(acl))
-+ error = PTR_ERR(acl);
-+ else
-+ *acl_ptr = acl;
++ out_kfree:
++ kfree(er.er_data);
+
-+ out_free:
-+ kfree(data);
++ out:
++ brelse(el.el_bh);
+
+ return error;
+}
+
++/**
++ * gfs_acl_new_prep -
++ * @dip:
++ * @type:
++ * @mode:
++ * @a_acl:
++ * @d_acl:
++ * @blocks:
++ * @data:
++ *
++ * Returns: errno
++ */
++
+int
-+gfs_setup_new_acl(struct gfs_inode *dip,
-+ unsigned int type, unsigned int *mode,
-+ struct posix_acl **acl_ptr)
++gfs_acl_new_prep(struct gfs_inode *dip,
++ unsigned int type, mode_t *mode,
++ void **a_data, void **d_data,
++ unsigned int *size,
++ unsigned int *blocks)
+{
-+ struct gfs_ea_location location;
+ struct posix_acl *acl = NULL;
-+ mode_t access_mode = *mode;
++ int set_a = FALSE, set_d = FALSE;
+ int error;
+
++ if (!dip->i_sbd->sd_args.ar_posix_acls)
++ return 0;
+ if (type == GFS_FILE_LNK)
+ return 0;
+
-+ error = gfs_findacl(dip, FALSE, &acl, &location);
++ error = gfs_acl_get(dip, FALSE, &acl);
+ if (error)
+ return error;
+ if (!acl) {
+ (*mode) &= ~current->fs->umask;
+ return 0;
+ }
-+ brelse(location.bh);
+
-+ if (type == GFS_FILE_DIR) {
-+ *acl_ptr = acl;
-+ return 0;
++ {
++ struct posix_acl *clone = posix_acl_clone(acl, GFP_KERNEL);
++ error = -ENOMEM;
++ if (!clone)
++ goto out;
++ posix_acl_release(acl);
++ acl = clone;
+ }
+
-+ error = posix_acl_create_masq(acl, &access_mode);
-+ *mode = access_mode;
++ error = posix_acl_create_masq(acl, mode);
++ if (error < 0)
++ goto out;
+ if (error > 0) {
-+ *acl_ptr = acl;
-+ return 0;
++ set_a = TRUE;
++ error = 0;
+ }
++ if (type == GFS_FILE_DIR)
++ set_d = TRUE;
++
++ if (set_a || set_d) {
++ struct gfs_ea_request er;
++ void *d;
++ unsigned int s = posix_acl_xattr_size(acl->a_count);
++ unsigned int b;
+
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
++ er.er_data_len = s;
++ error = gfs_ea_check_size(dip->i_sbd, &er);
++ if (error)
++ goto out;
++
++ b = DIV_RU(er.er_data_len, dip->i_sbd->sd_jbsize);
++ if (set_a && set_d)
++ b *= 2;
++ b++;
++
++ d = kmalloc(s, GFP_KERNEL);
++ error = -ENOMEM;
++ if (!d)
++ goto out;
++ posix_acl_to_xattr(acl, d, s);
++
++ if (set_a)
++ *a_data = d;
++ if (set_d)
++ *d_data = d;
++ *size = s;
++ *blocks = b;
++
++ error = 0;
++ }
++
++ out:
+ posix_acl_release(acl);
+
+ return error;
+}
+
+/**
-+ * gfs_init_default_acl - initializes the default acl
++ * gfs_acl_new_init -
++ * @dip:
++ * @ip:
++ * @a_data:
++ * @d_data:
++ * @size:
+ *
-+ * NOTE: gfs_init_access_acl must be called first
++ * Returns: errno
+ */
-+int
-+gfs_create_default_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
-+ int size)
++
++int gfs_acl_new_init(struct gfs_inode *dip, struct gfs_inode *ip,
++ void *a_data, void *d_data, unsigned int size)
+{
-+ struct gfs_easet_io req;
-+ struct gfs_ea_location avail;
-+ int error;
++ void *data = (a_data) ? a_data : d_data;
++ unsigned int x;
++ int error = 0;
+
-+ memset(&avail, 0, sizeof(struct gfs_ea_location));
++ ip->i_alloc = dip->i_alloc; /* Cheesy, but it works. */
+
-+ req.es_data = data;
-+ req.es_name = GFS_POSIX_ACL_DEFAULT;
-+ req.es_data_len = size;
-+ req.es_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
-+ req.es_cmd = GFS_EACMD_CREATE;
-+ req.es_type = GFS_EATYPE_SYS;
++ for (x = 0; x < 2; x++) {
++ struct gfs_ea_request er;
+
-+ error = find_sys_space(dip, ip, size, &avail);
-+ if (error)
-+ return error;
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ if (x) {
++ if (!a_data)
++ continue;
++ er.er_name = GFS_POSIX_ACL_ACCESS;
++ er.er_name_len = GFS_POSIX_ACL_ACCESS_LEN;
++ } else {
++ if (!d_data)
++ continue;
++ er.er_name = GFS_POSIX_ACL_DEFAULT;
++ er.er_name_len = GFS_POSIX_ACL_DEFAULT_LEN;
++ }
++ er.er_data = data;
++ er.er_data_len = size;
++ er.er_type = GFS_EATYPE_SYS;
+
-+ avail.ea = prep_ea(avail.ea);
++ error = gfs_ea_acl_init(ip, &er);
++ if (error)
++ break;
++ }
+
-+ error = write_ea(ip->i_sbd, dip, ip, avail.ea, &req);
-+ if (!error)
-+ gfs_trans_add_bh(ip->i_gl, avail.bh); /* Huh!?! */
++ ip->i_alloc = NULL;
+
-+ brelse(avail.bh);
++ kfree(data);
+
+ return error;
+}
+
+/**
-+ * gfs_init_access_acl - initialized the access acl
++ * gfs_acl_chmod -
++ * @ip:
++ * @attr:
+ *
-+ * NOTE: This must be the first extended attribute that is created for
-+ * this inode.
++ * Returns: errno
+ */
-+int
-+gfs_init_access_acl(struct gfs_inode *dip, struct gfs_inode *ip, void *data,
-+ int size)
-+{
-+ struct gfs_easet_io req;
-+
-+ req.es_data = data;
-+ req.es_name = GFS_POSIX_ACL_ACCESS;
-+ req.es_data_len = size;
-+ req.es_name_len = GFS_POSIX_ACL_ACCESS_LEN;
-+ req.es_cmd = GFS_EACMD_CREATE;
-+ req.es_type = GFS_EATYPE_SYS;
-+
-+ return init_new_inode_eattr(dip, ip, &req);
-+}
+
+int
-+gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
-+ struct posix_acl *acl)
++gfs_acl_chmod(struct gfs_inode *ip, struct iattr *attr)
+{
-+ struct buffer_head *dibh;
-+ void *data;
-+ int size;
++ struct gfs_ea_request er;
++ struct gfs_ea_location el;
++ struct posix_acl *acl;
+ int error;
+
-+ size = posix_acl_to_xattr(acl, NULL, 0);
-+
-+ data = gmalloc(size);
++ if (!ip->i_di.di_eattr)
++ goto simple;
+
-+ posix_acl_to_xattr(acl, data, size);
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_name = GFS_POSIX_ACL_ACCESS;
++ er.er_name_len = GFS_POSIX_ACL_ACCESS_LEN;
++ er.er_type = GFS_EATYPE_SYS;
+
-+ error = gfs_get_inode_buffer(ip, &dibh);
++ error = gfs_ea_find(ip, &er, &el);
+ if (error)
++ return error;
++ if (!el.el_ea)
++ goto simple;
++ if (!GFS_EA_DATA_LEN(el.el_ea))
++ goto simple;
++
++ er.er_data = kmalloc(GFS_EA_DATA_LEN(el.el_ea), GFP_KERNEL);
++ error = -ENOMEM;
++ if (!er.er_data)
+ goto out;
+
-+ error = gfs_init_access_acl(dip, ip, data, size);
++ error = gfs_ea_get_copy(ip, &el, er.er_data);
+ if (error)
-+ goto out_relse;
++ goto out_kfree;
+
-+ if (type == GFS_FILE_DIR) {
-+ error = gfs_create_default_acl(dip, ip, data, size);
-+ if (error)
-+ goto out_relse;
++ acl = posix_acl_from_xattr(er.er_data, GFS_EA_DATA_LEN(el.el_ea));
++ if (IS_ERR(acl)) {
++ error = PTR_ERR(acl);
++ goto out_kfree;
+ }
+
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ gfs_dinode_out(&ip->i_di, dibh->b_data);
-+
-+ out_relse:
-+ brelse(dibh);
-+
-+ out:
-+ kfree(data);
-+ posix_acl_release(acl);
-+
-+ return error;
-+}
-+
-+int
-+gfs_acl_setattr(struct inode *inode)
-+{
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct posix_acl *acl;
-+ struct gfs_ea_location location;
-+ int error;
++ error = posix_acl_chmod_masq(acl, attr->ia_mode);
++ if (error)
++ goto out_acl;
+
-+ if (S_ISLNK(inode->i_mode))
-+ return 0;
++ posix_acl_to_xattr(acl, er.er_data, GFS_EA_DATA_LEN(el.el_ea));
+
-+ memset(&location, 0, sizeof(struct gfs_ea_location));
++ error = gfs_ea_acl_chmod(ip, &el, attr, er.er_data);
+
-+ error = gfs_findacl(ip, TRUE, &acl, &location); /* Check error here? */
-+ if (!location.ea)
-+ return error;
++ out_acl:
++ posix_acl_release(acl);
+
-+ error = posix_acl_chmod_masq(acl, inode->i_mode);
-+ if (!error)
-+ error = gfs_replace_acl(inode, acl, TRUE, location);
++ out_kfree:
++ kfree(er.er_data);
+
-+ posix_acl_release(acl);
-+ brelse(location.bh);
++ out:
++ brelse(el.el_bh);
+
+ return error;
++
++ simple:
++ return gfs_setattr_simple(ip, attr);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/acl.h linux/fs/gfs/acl.h
---- linux-2.6.9-rc1-mm3/fs/gfs/acl.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/acl.h 2004-09-07 16:26:15.728555458 -0500
-@@ -0,0 +1,28 @@
+diff -urN linux-orig/fs/gfs/acl.h linux-patched/fs/gfs/acl.h
+--- linux-orig/fs/gfs/acl.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/acl.h 2004-10-27 15:27:10.877631457 -0500
+@@ -0,0 +1,46 @@
+/******************************************************************************
+*******************************************************************************
+**
+#ifndef __ACL_DOT_H__
+#define __ACL_DOT_H__
+
-+int gfs_setup_new_acl(struct gfs_inode *dip,
-+ unsigned int type, unsigned int *mode,
-+ struct posix_acl **acl_ptr);
-+int gfs_getacl(struct inode *inode, int access, struct posix_acl **acl_ptr);
-+int gfs_init_acl(struct gfs_inode *dip, struct gfs_inode *ip, unsigned int type,
-+ struct posix_acl *acl);
-+int gfs_acl_setattr(struct inode *inode);
-+int gfs_validate_acl(struct gfs_inode *ip, const char *value, int size,
-+ int access);
-+void gfs_acl_set_mode(struct gfs_inode *ip, struct posix_acl *acl);
++#define GFS_POSIX_ACL_ACCESS "posix_acl_access"
++#define GFS_POSIX_ACL_ACCESS_LEN (16)
++#define GFS_POSIX_ACL_DEFAULT "posix_acl_default"
++#define GFS_POSIX_ACL_DEFAULT_LEN (17)
++
++#define GFS_ACL_IS_ACCESS(name, len) \
++ ((len) == GFS_POSIX_ACL_ACCESS_LEN && \
++ !memcmp(GFS_POSIX_ACL_ACCESS, (name), (len)))
++
++#define GFS_ACL_IS_DEFAULT(name, len) \
++ ((len) == GFS_POSIX_ACL_DEFAULT_LEN && \
++ !memcmp(GFS_POSIX_ACL_DEFAULT, (name), (len)))
++
++struct gfs_ea_request;
++
++int gfs_acl_validate_set(struct gfs_inode *ip, int access,
++ struct gfs_ea_request *er,
++ mode_t *mode, int *remove);
++int gfs_acl_validate_remove(struct gfs_inode *ip, int access);
++int gfs_acl_get(struct gfs_inode *ip, int access, struct posix_acl **acl);
++int gfs_acl_new_prep(struct gfs_inode *dip,
++ unsigned int type, mode_t *mode,
++ void **a_data, void **d_data,
++ unsigned int *size,
++ unsigned int *blocks);
++int gfs_acl_new_init(struct gfs_inode *dip, struct gfs_inode *ip,
++ void *a_data, void *d_data, unsigned int size);
++int gfs_acl_chmod(struct gfs_inode *ip, struct iattr *attr);
+
+#endif /* __ACL_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/bits.c linux/fs/gfs/bits.c
---- linux-2.6.9-rc1-mm3/fs/gfs/bits.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/bits.c 2004-09-07 16:26:15.729555238 -0500
+diff -urN linux-orig/fs/gfs/bits.c linux-patched/fs/gfs/bits.c
+--- linux-orig/fs/gfs/bits.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/bits.c 2004-10-27 15:27:10.877631457 -0500
@@ -0,0 +1,183 @@
+/******************************************************************************
+*******************************************************************************
+
+ return count;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/bits.h linux/fs/gfs/bits.h
---- linux-2.6.9-rc1-mm3/fs/gfs/bits.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/bits.h 2004-09-07 16:26:15.729555238 -0500
+diff -urN linux-orig/fs/gfs/bits.h linux-patched/fs/gfs/bits.h
+--- linux-orig/fs/gfs/bits.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/bits.h 2004-10-27 15:27:10.877631457 -0500
@@ -0,0 +1,32 @@
+/******************************************************************************
+*******************************************************************************
+ unsigned char state);
+
+#endif /* __BITS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/bmap.c linux/fs/gfs/bmap.c
---- linux-2.6.9-rc1-mm3/fs/gfs/bmap.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/bmap.c 2004-09-07 16:26:15.731554797 -0500
-@@ -0,0 +1,1404 @@
+diff -urN linux-orig/fs/gfs/bmap.c linux-patched/fs/gfs/bmap.c
+--- linux-orig/fs/gfs/bmap.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/bmap.c 2004-10-27 15:27:10.878631225 -0500
+@@ -0,0 +1,1402 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+ metadata = (height != ip->i_di.di_height - 1) || gfs_is_jdata(ip);
+
-+ error = gfs_rindex_hold(sdp, &ri_gh);
++ error = gfs_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
+ if (error)
+ return error;
+
+
+ gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
+
-+ error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
-+ if (error)
-+ goto fail;
-+
+ for (x = 0; x < rlist.rl_rgrps; x++) {
+ struct gfs_rgrpd *rgd;
+ rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
+ rg_blocks += rgd->rd_ri.ri_length;
+ }
+
++ error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
++ if (error)
++ goto fail;
++
+ /* Trans may require:
+ All the bitmaps that were reserved.
+ One block for the dinode.
+ }
+
+ *p = 0;
++ GFS_ASSERT_INODE(ip->i_di.di_blocks, ip,);
+ ip->i_di.di_blocks--;
+ }
-+
+ if (bstart) {
+ if (metadata)
+ gfs_metafree(ip, bstart, blen);
+ gfs_rlist_free(&rlist);
+
+ out:
-+ gfs_glock_dq_uninit(&ri_gh);
++ gfs_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
+
+ return 0;
+
+ *
+ * If this is a journaled file, copy out the data too.
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+static int
+ struct buffer_head *bh, uint64_t *top, uint64_t *bottom,
+ unsigned int height, void *data)
+{
-+ struct gfs_sbd *sdp = ip->i_sbd;
+ struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
-+ struct buffer_head *data_bh;
-+ uint64_t *bp, bn;
+ int error;
+
+ error = gfs_add_bh_to_ub(ub, bh);
+ height + 1 != ip->i_di.di_height)
+ return 0;
+
-+ for (bp = top; bp < bottom; bp++)
-+ if (*bp) {
-+ bn = gfs64_to_cpu(*bp);
++ for (; top < bottom; top++)
++ if (*top) {
++ struct buffer_head *data_bh;
+
-+ error = gfs_dread(sdp, bn, ip->i_gl,
-+ DIO_START | DIO_WAIT, &data_bh);
++ error = gfs_dread(ip->i_sbd,
++ gfs64_to_cpu(*top), ip->i_gl,
++ DIO_START | DIO_WAIT,
++ &data_bh);
+ if (error)
+ return error;
+
+ * @ip: the file
+ * @ub: the structure representing the meta
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
+{
-+ struct buffer_head *dibh;
-+ struct metapath *mp;
+ int error;
+
+ if (gfs_is_stuffed(ip)) {
++ struct buffer_head *dibh;
+ error = gfs_get_inode_buffer(ip, &dibh);
+ if (!error) {
+ error = gfs_add_bh_to_ub(ub, dibh);
+ brelse(dibh);
+ }
+ } else {
-+ mp = find_metapath(ip, 0);
++ struct metapath *mp = find_metapath(ip, 0);
+ error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub);
+ kfree(mp);
+ }
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/bmap.h linux/fs/gfs/bmap.h
---- linux-2.6.9-rc1-mm3/fs/gfs/bmap.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/bmap.h 2004-09-07 16:26:15.731554797 -0500
+diff -urN linux-orig/fs/gfs/bmap.h linux-patched/fs/gfs/bmap.h
+--- linux-orig/fs/gfs/bmap.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/bmap.h 2004-10-27 15:27:10.878631225 -0500
@@ -0,0 +1,48 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_get_file_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
+
+#endif /* __BMAP_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/daemon.c linux/fs/gfs/daemon.c
---- linux-2.6.9-rc1-mm3/fs/gfs/daemon.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/daemon.c 2004-09-07 16:26:15.732554576 -0500
+diff -urN linux-orig/fs/gfs/daemon.c linux-patched/fs/gfs/daemon.c
+--- linux-orig/fs/gfs/daemon.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/daemon.c 2004-10-27 15:27:10.878631225 -0500
@@ -0,0 +1,259 @@
+/******************************************************************************
+*******************************************************************************
+
+ return 0;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/daemon.h linux/fs/gfs/daemon.h
---- linux-2.6.9-rc1-mm3/fs/gfs/daemon.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/daemon.h 2004-09-07 16:26:15.732554576 -0500
+diff -urN linux-orig/fs/gfs/daemon.h linux-patched/fs/gfs/daemon.h
+--- linux-orig/fs/gfs/daemon.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/daemon.h 2004-10-27 15:27:10.878631225 -0500
@@ -0,0 +1,24 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_inoded(void *data);
+
+#endif /* __DAEMON_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/dio.c linux/fs/gfs/dio.c
---- linux-2.6.9-rc1-mm3/fs/gfs/dio.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/dio.c 2004-09-07 16:26:15.733554356 -0500
-@@ -0,0 +1,1302 @@
+diff -urN linux-orig/fs/gfs/dio.c linux-patched/fs/gfs/dio.c
+--- linux-orig/fs/gfs/dio.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/dio.c 2004-10-27 15:27:10.878631225 -0500
+@@ -0,0 +1,1305 @@
+/******************************************************************************
+*******************************************************************************
+**
+ * gfs_sync_meta - sync all the buffers in a filesystem
+ * @sdp: the filesystem
+ *
++ * Flush metadata blocks to on-disk journal, then
++ * Flush metadata blocks (now in AIL) to on-disk in-place locations
++ * Periodically keep checking until done (AIL empty)
+ */
+
+void
+ out:
+ brelse(first_bh);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/dio.h linux/fs/gfs/dio.h
---- linux-2.6.9-rc1-mm3/fs/gfs/dio.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/dio.h 2004-09-07 16:26:15.734554135 -0500
+diff -urN linux-orig/fs/gfs/dio.h linux-patched/fs/gfs/dio.h
+--- linux-orig/fs/gfs/dio.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/dio.h 2004-10-27 15:27:10.878631225 -0500
@@ -0,0 +1,195 @@
+/******************************************************************************
+*******************************************************************************
+}
+
+#endif /* __DIO_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/dir.c linux/fs/gfs/dir.c
---- linux-2.6.9-rc1-mm3/fs/gfs/dir.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/dir.c 2004-09-07 16:26:15.737553474 -0500
-@@ -0,0 +1,2273 @@
+diff -urN linux-orig/fs/gfs/dir.c linux-patched/fs/gfs/dir.c
+--- linux-orig/fs/gfs/dir.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/dir.c 2004-10-27 15:27:10.878631225 -0500
+@@ -0,0 +1,2274 @@
+/******************************************************************************
+*******************************************************************************
+**
+ uint64_t leaf_no, void *data)
+{
+ struct gfs_sbd *sdp = dip->i_sbd;
-+ struct gfs_holder ri_gh;
+ struct gfs_leaf tmp_leaf;
+ struct gfs_rgrp_list rlist;
+ struct buffer_head *bh, *dibh;
+ if (error)
+ goto fail;
+
-+ error = gfs_rindex_hold(sdp, &ri_gh);
++ error = gfs_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
+ if (error)
+ goto fail_qs;
+
+
+ gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
+
-+ error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
-+ if (error)
-+ goto fail_rlist;
-+
+ for (x = 0; x < rlist.rl_rgrps; x++) {
+ struct gfs_rgrpd *rgd;
+ rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
+ rg_blocks += rgd->rd_ri.ri_length;
+ }
+
++ error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
++ if (error)
++ goto fail_rlist;
++
+ /* Trans may require:
+ All the bitmaps that were reserved.
+ One block for the dinode.
+
+ gfs_metafree(dip, blk, 1);
+
++ GFS_ASSERT_INODE(dip->i_di.di_blocks, dip,);
+ dip->i_di.di_blocks--;
+ }
+
+
+ gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
+ gfs_rlist_free(&rlist);
-+ gfs_glock_dq_uninit(&ri_gh);
++ gfs_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
+ gfs_quota_unhold_m(dip);
+ gfs_alloc_put(dip);
+ kfree(ht);
+
+ fail_rlist:
+ gfs_rlist_free(&rlist);
-+ gfs_glock_dq_uninit(&ri_gh);
++ gfs_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
+
+ fail_qs:
+ gfs_quota_unhold_m(dip);
+ * @leaf_no: the leaf number
+ * @data: a pointer to a struct gfs_user_buffer structure
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+static int
-+do_gdm(struct gfs_inode *dip, uint32_t index, uint32_t len, uint64_t leaf_no,
++do_gdm(struct gfs_inode *dip,
++ uint32_t index, uint32_t len, uint64_t leaf_no,
+ void *data)
+{
+ struct gfs_user_buffer *ub = (struct gfs_user_buffer *)data;
+ * @dip: the directory
+ * @ub: the structure representing the meta
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+ GFS_ASSERT_INODE(dip->i_di.di_type == GFS_FILE_DIR, dip,);
+ return foreach_leaf(dip, do_gdm, ub);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/dir.h linux/fs/gfs/dir.h
---- linux-2.6.9-rc1-mm3/fs/gfs/dir.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/dir.h 2004-09-07 16:26:15.737553474 -0500
+diff -urN linux-orig/fs/gfs/dir.h linux-patched/fs/gfs/dir.h
+--- linux-orig/fs/gfs/dir.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/dir.h 2004-10-27 15:27:10.879630993 -0500
@@ -0,0 +1,55 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_get_dir_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
+
+#endif /* __DIR_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/eattr.c linux/fs/gfs/eattr.c
---- linux-2.6.9-rc1-mm3/fs/gfs/eattr.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/eattr.c 2004-09-07 16:26:15.740552812 -0500
-@@ -0,0 +1,2340 @@
+diff -urN linux-orig/fs/gfs/eaops.c linux-patched/fs/gfs/eaops.c
+--- linux-orig/fs/gfs/eaops.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/eaops.c 2004-10-27 15:27:10.879630993 -0500
+@@ -0,0 +1,235 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
+#include <asm/uaccess.h>
++#include <linux/xattr.h>
+#include <linux/xattr_acl.h>
+
+#include "gfs.h"
+#include "acl.h"
-+#include "dio.h"
++#include "eaops.h"
+#include "eattr.h"
-+#include "glock.h"
-+#include "inode.h"
-+#include "ioctl.h"
-+#include "quota.h"
-+#include "rgrp.h"
-+#include "trans.h"
+
-+#define GFS_EA_REC_LEN(x) gfs32_to_cpu((x)->ea_rec_len)
-+#define GFS_EA_NAME(x) ((char *)(x) + sizeof(struct gfs_ea_header))
-+#define GFS_EA_DATA_PTRS(x) ((uint64_t *)((char *)(x) + sizeof(struct gfs_ea_header) + (((x)->ea_name_len + 7) & ~7)))
-+
-+#define GFS_EA_NEXT(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_REC_LEN(x))
-+#define GFS_EA_FREESPACE(x) (struct gfs_ea_header *)((char *)(x) + GFS_EA_SIZE(x))
++/**
++ * gfs_ea_name2type - get the type of the ea, and trucate the type from the name
++ * @namep: ea name, possibly with type appended
++ *
++ * Returns: GFS_EATYPE_XXX
++ */
+
-+#define GFS_EAREQ_IS_STUFFED(x, y) (((sizeof(struct gfs_ea_header) + (x)->es_data_len + (x)->es_name_len + 7) & ~7) <= y)
++unsigned int
++gfs_ea_name2type(const char *name, char **truncated_name)
++{
++ unsigned int type;
+
-+#define GFS_EADATA_NUM_PTRS(x, y) (((x) + (y) - 1) / (y))
++ if (strncmp(name, "system.", 7) == 0) {
++ type = GFS_EATYPE_SYS;
++ if (truncated_name)
++ *truncated_name = strchr(name, '.') + 1;
++ } else if (strncmp(name, "user.", 5) == 0) {
++ type = GFS_EATYPE_USR;
++ if (truncated_name)
++ *truncated_name = strchr(name, '.') + 1;
++ } else {
++ type = GFS_EATYPE_UNUSED;
++ if (truncated_name)
++ *truncated_name = NULL;
++ }
+
-+#define GFS_EA_SIZE(x) ((sizeof(struct gfs_ea_header) + (x)->ea_name_len + (GFS_EA_IS_UNSTUFFED(x)? (8 * (x)->ea_num_ptrs) : GFS_EA_DATA_LEN(x)) + 7) & ~ 7)
++ return type;
++}
+
-+#define GFS_EACMD_VALID(x) ((x) <= GFS_EACMD_REMOVE)
++/**
++ * user_eo_get -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
++ */
+
-+#define GFS_EA_IS_LAST(x) ((x)->ea_flags & GFS_EAFLAG_LAST)
++static int
++user_eo_get(struct gfs_inode *ip, struct gfs_ea_request *er)
++{
++ {
++ struct inode *inode = ip->i_vnode;
++ int error = permission(inode, MAY_READ, NULL);
++ if (error)
++ return error;
++ }
+
-+#define GFS_EA_STRLEN(x) ((x)->ea_name_len + 1 + (((x)->ea_type == GFS_EATYPE_USR)? 5 : 7))
++ return gfs_ea_get_i(ip, er);
++}
+
-+#define GFS_FIRST_EA(x) ((struct gfs_ea_header *) ((x)->b_data + sizeof(struct gfs_meta_header)))
++/**
++ * user_eo_set -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
++ */
+
-+#define EA_ALLOC 1
-+#define EA_DEALLOC 2
++static int
++user_eo_set(struct gfs_inode *ip, struct gfs_ea_request *er)
++{
++ {
++ struct inode *inode = ip->i_vnode;
++ if (S_ISREG(inode->i_mode) ||
++ (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
++ int error = permission(inode, MAY_WRITE, NULL);
++ if (error)
++ return error;
++ } else
++ return -EPERM;
++ }
+
-+static struct buffer_head *alloc_eattr_blk(struct gfs_sbd *sdp,
-+ struct gfs_inode *alloc_ip,
-+ struct gfs_inode *ip,
-+ uint64_t * block);
++ return gfs_ea_set_i(ip, er);
++}
+
+/**
-+ * can_replace - returns true if ea is large enough to hold the data in
-+ * the request
++ * user_eo_remove -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
+ */
+
-+static __inline__ int
-+can_replace(struct gfs_ea_header *ea, struct gfs_easet_io *req,
-+ uint32_t avail_size)
++static int
++user_eo_remove(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ int data_space =
-+ GFS_EA_REC_LEN(ea) - sizeof (struct gfs_ea_header) -
-+ ea->ea_name_len;
++ {
++ struct inode *inode = ip->i_vnode;
++ if (S_ISREG(inode->i_mode) ||
++ (S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
++ int error = permission(inode, MAY_WRITE, NULL);
++ if (error)
++ return error;
++ } else
++ return -EPERM;
++ }
+
-+ if (GFS_EAREQ_IS_STUFFED(req, avail_size) && !GFS_EA_IS_UNSTUFFED(ea))
-+ return (req->es_data_len <= data_space);
-+ else
-+ return (GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size) <=
-+ ea->ea_num_ptrs);
++ return gfs_ea_remove_i(ip, er);
+}
+
+/**
-+ * get_req_size - returns the acutal number of bytes the request will take up
-+ * (not counting any unstuffed data blocks)
++ * system_eo_get -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
+ */
+
-+static __inline__ uint32_t
-+get_req_size(struct gfs_easet_io *req, uint32_t avail_size)
++static int
++system_eo_get(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ uint32_t size =
-+ ((sizeof (struct gfs_ea_header) + req->es_data_len +
-+ req->es_name_len + 7) & ~7);
-+
-+ if (size <= avail_size)
-+ return size;
++ if (!GFS_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
++ !GFS_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
++ !capable(CAP_SYS_ADMIN))
++ return -EPERM;
+
-+ return ((sizeof (struct gfs_ea_header) + req->es_name_len + 7) & ~7) +
-+ (8 * GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size));
++ return gfs_ea_get_i(ip, er);
+}
+
+/**
-+ * gfs_ea_write_permission - decides if the user has permission to write to
-+ * the ea
-+ * @req: the write request
-+ * @ip: inode of file with the ea
++ * system_eo_set -
++ * @ip:
++ * @er:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
-+int
-+gfs_ea_write_permission(struct gfs_easet_io *req, struct gfs_inode *ip)
++static int
++system_eo_set(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ struct inode *inode = gfs_iget(ip, NO_CREATE);
-+ int error = 0;
++ if (GFS_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
++ int remove = FALSE;
++ int error;
+
-+ GFS_ASSERT_INODE(inode, ip,);
++ er->er_mode = ip->i_vnode->i_mode;
++ error = gfs_acl_validate_set(ip, TRUE, er,
++ &er->er_mode, &remove);
++ if (error)
++ return error;
++ error = gfs_ea_set_i(ip, er);
++ if (error)
++ return error;
++ if (remove)
++ gfs_ea_remove_i(ip, er);
++ return 0;
+
-+ if (req->es_type == GFS_EATYPE_USR) {
-+ if (!S_ISREG(inode->i_mode) &&
-+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-+ error = -EPERM;
-+ else {
-+ error = permission(inode, MAY_WRITE, NULL);
-+ if (error == -EACCES)
-+ error = -EPERM;
-+ }
-+ } else if (req->es_type == GFS_EATYPE_SYS) {
-+ if (IS_ACCESS_ACL(req->es_name, req->es_name_len))
-+ error = gfs_validate_acl(ip, req->es_data,
-+ req->es_data_len, 1);
-+ else if (IS_DEFAULT_ACL(req->es_name, req->es_name_len))
-+ error = gfs_validate_acl(ip, req->es_data,
-+ req->es_data_len, 0);
-+ else {
-+ if (!capable(CAP_SYS_ADMIN))
-+ error = -EPERM;
-+ }
-+ } else
-+ error = -EOPNOTSUPP;
++ } else if (GFS_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
++ int error = gfs_acl_validate_set(ip, FALSE, er,
++ NULL, NULL);
++ if (error)
++ return error;
++ return gfs_ea_set_i(ip, er);
+
-+ iput(inode);
++ }
+
-+ return error;
++ return -EPERM;
+}
+
+/**
-+ * gfs_ea_read_permission - decides if the user has permission to read from
-+ * the ea
-+ * @req: the read request
-+ * @ip: inode of file with the ea
++ * system_eo_remove -
++ * @ip:
++ * @er:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
-+int
-+gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip)
++static int
++system_eo_remove(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ struct inode *inode = gfs_iget(ip, NO_CREATE);
-+ int error = 0;
++ if (GFS_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
++ int error = gfs_acl_validate_remove(ip, TRUE);
++ if (error)
++ return error;
+
-+ GFS_ASSERT_INODE(inode, ip,);
++ } else if (GFS_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
++ int error = gfs_acl_validate_remove(ip, FALSE);
++ if (error)
++ return error;
+
-+ if (req->eg_type == GFS_EATYPE_USR){
-+ error = permission(inode, MAY_READ, NULL);
-+ if (error == -EACCES)
-+ error = -EPERM;
-+ }
-+ else if (req->eg_type == GFS_EATYPE_SYS) {
-+ if (IS_ACCESS_ACL(req->eg_name, req->eg_name_len) ||
-+ IS_DEFAULT_ACL(req->eg_name, req->eg_name_len))
-+ error = 0;
-+ else{
-+ if (!capable(CAP_SYS_ADMIN))
-+ error = -EPERM;
-+ }
+ } else
-+ error = -EOPNOTSUPP;
-+
-+ iput(inode);
++ return -EPERM;
+
-+ return error;
++ return gfs_ea_remove_i(ip, er);
+}
+
-+/**
-+ * gfs_es_memcpy - gfs memcpy wrapper with a return value
-+ *
-+ */
++struct gfs_eattr_operations gfs_user_eaops = {
++ .eo_get = user_eo_get,
++ .eo_set = user_eo_set,
++ .eo_remove = user_eo_remove,
++ .eo_name = "user",
++};
+
-+int
-+gfs_ea_memcpy(void *dest, void *src, unsigned long size)
-+{
-+ memcpy(dest, src, size);
-+ return 0;
-+}
++struct gfs_eattr_operations gfs_system_eaops = {
++ .eo_get = system_eo_get,
++ .eo_set = system_eo_set,
++ .eo_remove = system_eo_remove,
++ .eo_name = "system",
++};
+
-+/**
-+ * gfs_ea_copy_to_user - copy_to_user wrapper
-+ */
++struct gfs_eattr_operations *gfs_ea_ops[] = {
++ NULL,
++ &gfs_user_eaops,
++ &gfs_system_eaops,
++};
+
-+int
-+gfs_ea_copy_to_user(void *dest, void *src, unsigned long size)
-+{
-+ int error;
-+ error = (copy_to_user(dest, src, size)) ? -EFAULT : 0;
-+ return error;
-+}
++
+diff -urN linux-orig/fs/gfs/eaops.h linux-patched/fs/gfs/eaops.h
+--- linux-orig/fs/gfs/eaops.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/eaops.h 2004-10-27 15:27:10.879630993 -0500
+@@ -0,0 +1,34 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __EAOPS_DOT_H__
++#define __EAOPS_DOT_H__
++
++struct gfs_ea_request;
++
++struct gfs_eattr_operations {
++ int (*eo_get) (struct gfs_inode *ip, struct gfs_ea_request *er);
++ int (*eo_set) (struct gfs_inode *ip, struct gfs_ea_request *er);
++ int (*eo_remove) (struct gfs_inode *ip, struct gfs_ea_request *er);
++ char *eo_name;
++};
++
++unsigned int gfs_ea_name2type(const char *name, char **truncated_name);
++
++extern struct gfs_eattr_operations gfs_user_eaops;
++extern struct gfs_eattr_operations gfs_system_eaops;
++
++extern struct gfs_eattr_operations *gfs_ea_ops[];
++
++#endif /* __EAOPS_DOT_H__ */
++
+diff -urN linux-orig/fs/gfs/eattr.c linux-patched/fs/gfs/eattr.c
+--- linux-orig/fs/gfs/eattr.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/eattr.c 2004-10-27 15:27:10.879630993 -0500
+@@ -0,0 +1,1968 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/smp_lock.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/completion.h>
++#include <linux/buffer_head.h>
++#include <asm/uaccess.h>
++#include <linux/xattr.h>
++#include <linux/xattr_acl.h>
++
++#include "gfs.h"
++#include "acl.h"
++#include "dio.h"
++#include "eaops.h"
++#include "eattr.h"
++#include "glock.h"
++#include "inode.h"
++#include "ioctl.h"
++#include "quota.h"
++#include "rgrp.h"
++#include "trans.h"
+
+/**
-+ * Returns: 1 if find_direct_eattr should stop checking (if the eattr was found
-+ * location will be set)
-+ * 0 if find_eattr should keep on checking
-+ * -EXXX on error
++ * ea_calc_size - returns the acutal number of bytes the request will take up
++ * (not counting any unstuffed data blocks)
++ * @sdp:
++ * @er:
++ * @size:
++ *
++ * Returns: TRUE if the EA should be stuffed
+ */
-+int
-+find_direct_eattr(struct gfs_inode *ip, uint64_t blkno, char *name,
-+ int name_len, int type, struct gfs_ea_location *location)
-+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_ea_header *curr, *prev = NULL;
-+
-+ err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ curr =
-+ (struct gfs_ea_header *) ((bh)->b_data +
-+ sizeof (struct gfs_meta_header));
-+ if (curr->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(curr))
-+ goto out_drelse;
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ if (type != curr->ea_type && ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ if (type == GFS_EATYPE_SYS)
-+ err = 1;
-+ goto out_drelse;
-+ }
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+
-+ if (type == curr->ea_type && name_len == curr->ea_name_len &&
-+ !memcmp(name, GFS_EA_NAME(curr), name_len)) {
-+ location->bh = bh;
-+ location->ea = curr;
-+ location->prev = prev;
-+ err = 1;
-+ goto out;
-+ }
-+ if (GFS_EA_IS_LAST(curr))
-+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
+
-+ out_drelse:
-+ brelse(bh);
++static int
++ea_calc_size(struct gfs_sbd *sdp,
++ struct gfs_ea_request *er,
++ unsigned int *size)
++{
++ *size = GFS_EAREQ_SIZE_STUFFED(er);
++ if (*size <= sdp->sd_jbsize)
++ return TRUE;
+
-+ out:
-+ return err;
++ *size = GFS_EAREQ_SIZE_UNSTUFFED(sdp, er);
++ return FALSE;
+}
+
+/**
-+ * find_eattr - find a matching eattr
++ * gfs_ea_check_size -
++ * @ip:
++ * @er:
+ *
-+ * Returns: 1 if ea found, 0 if no ea found, -EXXX on error
++ * Returns: errno
+ */
++
+int
-+find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
-+ struct gfs_ea_location *location)
++gfs_ea_check_size(struct gfs_sbd *sdp, struct gfs_ea_request *er)
+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ uint64_t *eablk, *end;
++ unsigned int size;
+
-+ memset(location, 0, sizeof (struct gfs_ea_location));
-+
-+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ err =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
-+ DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto fail;
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
-+ eablk =
-+ (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
-+ end =
-+ eablk +
-+ ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
-+ while (eablk < end && *eablk) {
-+ err =
-+ find_direct_eattr(ip, gfs64_to_cpu(*eablk), name,
-+ name_len, type, location);
-+ if (err || location->ea)
-+ break;
-+ eablk++;
-+ }
-+ brelse(bh);
-+ if (err < 0)
-+ goto fail;
-+ } else {
-+ err =
-+ find_direct_eattr(ip, ip->i_di.di_eattr, name, name_len,
-+ type, location);
-+ if (err < 0)
-+ goto fail;
-+ }
++ if (er->er_data_len > GFS_EA_MAX_DATA_LEN)
++ return -ERANGE;
+
-+ return (location->ea != NULL);
++ ea_calc_size(sdp, er, &size);
++ if (size > sdp->sd_jbsize)
++ return -ERANGE; /* This can only happen with 512 byte blocks */
+
-+ fail:
-+ return err;
++ return 0;
+}
+
-+static void
-+make_space(struct gfs_inode *ip, struct buffer_head *bh, uint32_t size,
-+ uint64_t blkno, struct gfs_ea_location *avail)
-+{
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ uint32_t free_size, avail_size;
-+ struct gfs_ea_header *ea, *new_ea;
-+ void *buf;
-+
-+ free_size = 0;
-+ avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ ea = GFS_FIRST_EA(bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
-+ if (ea->ea_type == GFS_EATYPE_UNUSED) {
-+ free_size = GFS_EA_REC_LEN(ea);
-+ ea = GFS_EA_NEXT(ea);
-+ }
-+ while (free_size < size) {
-+ free_size += (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
-+ if (GFS_EA_IS_LAST(ea))
-+ break;
-+ ea = GFS_EA_NEXT(ea);
-+ }
-+ if (free_size < size)
-+ goto out;
-+ buf = gmalloc(avail_size);
-+
-+ free_size = avail_size;
-+ ea = GFS_FIRST_EA(bh);
-+ if (ea->ea_type == GFS_EATYPE_UNUSED)
-+ ea = GFS_EA_NEXT(ea);
-+ new_ea = (struct gfs_ea_header *) buf;
-+ new_ea->ea_flags = 0;
-+ new_ea->ea_rec_len = cpu_to_gfs32(size);
-+ new_ea->ea_num_ptrs = 0;
-+ new_ea->ea_type = GFS_EATYPE_UNUSED;
-+ free_size -= size;
-+ new_ea = GFS_EA_NEXT(new_ea);
-+ while (1) {
-+ memcpy(new_ea, ea, GFS_EA_SIZE(ea));
-+ if (GFS_EA_IS_LAST(ea))
-+ break;
-+ new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
-+ free_size -= GFS_EA_SIZE(ea);
-+ ea = GFS_EA_NEXT(ea);
-+ new_ea = GFS_EA_NEXT(new_ea);
-+ }
-+ new_ea->ea_rec_len = cpu_to_gfs32(free_size);
-+ memcpy(GFS_FIRST_EA(bh), buf, avail_size);
-+ kfree(buf);
-+ avail->ea = GFS_FIRST_EA(bh);
-+ avail->prev = NULL;
-+ avail->bh = bh;
++typedef int (*ea_call_t) (struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ void *private);
+
-+ out:
-+ return;
-+}
++/**
++ * ea_foreach_i -
++ * @ip:
++ * @bh:
++ * @eabc:
++ * @data:
++ *
++ * Returns: errno
++ */
+
+static int
-+expand_to_indirect(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
-+ struct buffer_head **bh)
++ea_foreach_i(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ ea_call_t ea_call, void *data)
+{
-+ int err;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct buffer_head *bh1 = NULL, *bh2 = NULL, *indbh = NULL;
-+ uint64_t blkno, *blkptr;
-+ uint32_t free_size, avail_size;
-+ struct gfs_ea_header *prev, *curr, *new_ea = NULL;
-+
-+ avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ free_size = avail_size;
-+ ip->i_di.di_flags |= GFS_DIF_EA_INDIRECT;
-+ blkno = ip->i_di.di_eattr;
-+ err = gfs_metaalloc(alloc_ip, &ip->i_di.di_eattr);
-+ if (err)
-+ goto out;
-+ ip->i_di.di_blocks++;
-+ err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_NEW | DIO_START |
-+ DIO_WAIT, &indbh);
-+ if (err)
-+ goto out;
-+ bh1 = *bh;
-+ *bh = indbh;
-+ gfs_trans_add_bh(ip->i_gl, indbh);
-+ gfs_metatype_set(sdp, indbh, GFS_METATYPE_IN, GFS_FORMAT_IN);
-+ memset((indbh)->b_data + sizeof (struct gfs_meta_header), 0,
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
-+ blkptr = (uint64_t *) ((indbh)->b_data + sizeof (struct gfs_indirect));
-+ *blkptr++ = cpu_to_gfs64(blkno);
-+ prev = NULL;
-+ curr = GFS_FIRST_EA(bh1);
-+ while (curr->ea_type != GFS_EATYPE_USR) {
-+ if (GFS_EA_IS_LAST(curr))
-+ goto out_drelse1;
-+ free_size -= GFS_EA_REC_LEN(curr);
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ if (!prev || prev->ea_type == GFS_EATYPE_UNUSED)
-+ goto out_drelse1;
-+ gfs_trans_add_bh(ip->i_gl, bh1);
-+ prev->ea_rec_len = cpu_to_gfs32(GFS_EA_REC_LEN(prev) + free_size);
-+ prev->ea_flags |= GFS_EAFLAG_LAST;
-+ bh2 = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
-+ if (!bh2) {
-+ err = -EIO;
-+ goto out_drelse1;
-+ }
-+ free_size = avail_size;
-+ new_ea = GFS_FIRST_EA(bh2);
-+ while (1) {
-+ memcpy(new_ea, curr, GFS_EA_SIZE(curr));
-+ if (GFS_EA_IS_LAST(curr))
-+ break;
-+ new_ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(curr));
-+ free_size -= GFS_EA_SIZE(curr);
-+ curr = GFS_EA_NEXT(curr);
-+ new_ea = GFS_EA_NEXT(new_ea);
-+ }
-+ new_ea->ea_rec_len = cpu_to_gfs32(free_size);
-+ *blkptr = cpu_to_gfs64(blkno);
-+ brelse(bh2);
++ struct gfs_ea_header *ea, *prev = NULL;
++ int error = 0;
+
-+ out_drelse1:
-+ brelse(bh1);
++ gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_EA);
+
-+ out:
-+ return err;
-+}
++ for (ea = GFS_EA_BH2FIRST(bh);; prev = ea, ea = GFS_EA2NEXT(ea)) {
++ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
++ GFS_ASSERT_INODE(bh->b_data <= (char *)ea &&
++ (char *)GFS_EA2NEXT(ea) <=
++ bh->b_data + bh->b_size, ip,);
++ GFS_ASSERT_INODE(GFS_EATYPE_VALID(ea->ea_type), ip,);
+
-+static void
-+find_direct_sys_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
-+ struct gfs_ea_location *avail)
-+{
-+ struct gfs_ea_header *curr, *prev = NULL;
-+
-+ curr = GFS_FIRST_EA(bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (curr->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_REC_LEN(curr) >= size) {
-+ avail->ea = curr;
-+ avail->prev = NULL;
-+ avail->bh = bh;
-+ goto out;
-+ }
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ while (curr->ea_type == GFS_EATYPE_SYS) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
-+ avail->ea = curr;
-+ avail->prev = prev;
-+ avail->bh = bh;
-+ goto out;
-+ }
-+ if (GFS_EA_IS_LAST(curr))
++ error = ea_call(ip, bh, ea, prev, data);
++ if (error)
++ return error;
++
++ if (GFS_EA_IS_LAST(ea)) {
++ GFS_ASSERT_INODE((char *)GFS_EA2NEXT(ea) ==
++ bh->b_data + bh->b_size,
++ ip,);
+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
++ }
+ }
-+ make_space(ip, bh, size, ip->i_di.di_eattr, avail);
+
-+ out:
-+ return;
++ return error;
+}
+
+/**
-+ * int find_indirect_space
-+ *
-+ * @space:
-+ * @blktype: returns the type of block GFS_EATYPE_...
++ * ea_foreach -
++ * @ip:
++ * @ea_call:
++ * @data:
+ *
-+ * returns 0 on success, -EXXX on failure
++ * Returns: errno
+ */
++
+static int
-+find_indirect_space(struct gfs_inode *ip, uint64_t blkno, int type,
-+ int size, struct gfs_ea_location *avail, int *blktype)
++ea_foreach(struct gfs_inode *ip,
++ ea_call_t ea_call,
++ void *data)
+{
-+ int err;
+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_ea_header *curr, *prev = NULL;
++ int error;
+
-+ err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ curr = GFS_FIRST_EA(bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (curr->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(curr)) {
-+ avail->ea = curr;
-+ avail->prev = NULL;
-+ avail->bh = bh;
-+ *blktype = GFS_EATYPE_UNUSED;
-+ goto out;
-+ }
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ if (type != curr->ea_type) {
-+ *blktype = curr->ea_type;
-+ goto out_drelse;
-+ } else
-+ *blktype = type;
-+ if (prev && GFS_EA_REC_LEN(prev) >= size) {
-+ avail->ea = prev;
-+ avail->prev = NULL;
-+ avail->bh = bh;
-+ goto out;
-+ }
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
-+ avail->ea = curr;
-+ avail->prev = prev;
-+ avail->bh = bh;
-+ goto out;
++ error = gfs_dread(ip->i_sbd,
++ ip->i_di.di_eattr, ip->i_gl,
++ DIO_START | DIO_WAIT, &bh);
++ if (error)
++ return error;
++
++ if (!(ip->i_di.di_flags & GFS_DIF_EA_INDIRECT))
++ error = ea_foreach_i(ip, bh, ea_call, data);
++ else {
++ struct buffer_head *eabh;
++ uint64_t *eablk, *end;
++
++ gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_IN);
++
++ eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect));
++ end = eablk + ip->i_sbd->sd_inptrs;
++
++ for (; eablk < end; eablk++) {
++ uint64_t bn;
++
++ if (!*eablk)
++ break;
++ bn = gfs64_to_cpu(*eablk);
++
++ error = gfs_dread(ip->i_sbd, bn, ip->i_gl,
++ DIO_START | DIO_WAIT, &eabh);
++ if (error)
++ break;
++ error = ea_foreach_i(ip, eabh, ea_call, data);
++ brelse(eabh);
++ if (error)
++ break;
+ }
-+ if (GFS_EA_IS_LAST(curr))
-+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
+ }
+
-+ out_drelse:
+ brelse(bh);
+
-+ out:
-+ return err;
++ return error;
+}
+
++struct ea_find {
++ struct gfs_ea_request *ef_er;
++ struct gfs_ea_location *ef_el;
++};
++
++/**
++ * ea_find_i -
++ * @ip:
++ * @bh:
++ * @ea:
++ * @prev:
++ * @private:
++ *
++ * Returns: -errno on error, 1 if search is over,
++ * 0 if search should continue
++ */
++
+static int
-+find_indirect_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip,
-+ int size, struct buffer_head *bh,
-+ struct gfs_ea_location *avail)
++ea_find_i(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ void *private)
+{
-+ int err = 0;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ uint64_t *eablk, *end, *first_usr_blk = NULL;
-+ int blktype;
-+ uint64_t blkno;
++ struct ea_find *ef = (struct ea_find *)private;
++ struct gfs_ea_request *er = ef->ef_er;
+
-+ eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
-+ end =
-+ eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
++ if (ea->ea_type == GFS_EATYPE_UNUSED)
++ return 0;
+
-+ while (eablk < end && *eablk) {
-+ err =
-+ find_indirect_space(ip, gfs64_to_cpu(*eablk),
-+ GFS_EATYPE_SYS, size, avail, &blktype);
-+ if (err)
-+ goto out;
-+ if (blktype == GFS_EATYPE_USR && !first_usr_blk)
-+ first_usr_blk = eablk;
-+ if (avail->ea) {
-+ if (!first_usr_blk)
-+ goto out;
-+ gfs_trans_add_bh(ip->i_gl, bh);
-+ blkno = *eablk;
-+ *eablk = *first_usr_blk;
-+ *first_usr_blk = blkno;
-+ goto out;
++ if (ea->ea_type == er->er_type) {
++ if (ea->ea_name_len == er->er_name_len &&
++ !memcmp(GFS_EA2NAME(ea), er->er_name, ea->ea_name_len)) {
++ struct gfs_ea_location *el = ef->ef_el;
++ get_bh(bh);
++ el->el_bh = bh;
++ el->el_ea = ea;
++ el->el_prev = prev;
++ return 1;
+ }
-+ eablk++;
-+ }
-+ if (eablk >= end) {
-+ err = -ENOSPC;
-+ goto out;
-+ }
-+ avail->bh = alloc_eattr_blk(sdp, alloc_ip, ip, &blkno);
-+ if (!avail->bh) {
-+ err = -EIO;
-+ goto out;
+ }
-+ avail->ea = GFS_FIRST_EA(avail->bh);
-+ avail->prev = NULL;
-+ gfs_trans_add_bh(ip->i_gl, bh);
-+ if (first_usr_blk) {
-+ *eablk = *first_usr_blk;
-+ *first_usr_blk = cpu_to_gfs64(blkno);
-+ } else
-+ *eablk = cpu_to_gfs64(blkno);
+
-+ out:
-+ return err;
++#if 0
++ else if ((ip->i_di.di_flags & GFS_DIF_EA_PACKED) &&
++ er->er_type == GFS_EATYPE_SYS)
++ return 1;
++#endif
++
++ return 0;
+}
+
++/**
++ * gfs_ea_find - find a matching eattr
++ * @ip:
++ * @er:
++ * @el:
++ *
++ * Returns: errno
++ */
++
+int
-+find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
-+ struct gfs_ea_location *avail)
++gfs_ea_find(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ struct gfs_ea_location *el)
+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
++ struct ea_find ef;
++ int error;
+
-+ err =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
-+ &bh);
-+ if (err)
-+ goto out;
++ ef.ef_er = er;
++ ef.ef_el = el;
+
-+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
-+ err = find_indirect_sys_space(alloc_ip, ip, size, bh, avail);
-+ } else {
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ find_direct_sys_space(ip, size, bh, avail);
-+ if (!avail->ea) {
-+ err = expand_to_indirect(alloc_ip, ip, &bh);
-+ if (err)
-+ goto out_drelse;
-+ err =
-+ find_indirect_sys_space(alloc_ip, ip, size, bh,
-+ avail);
-+ }
-+ }
++ memset(el, 0, sizeof(struct gfs_ea_location));
+
-+ out_drelse:
-+ if (avail->bh != bh)
-+ brelse(bh);
++ error = ea_foreach(ip, ea_find_i, &ef);
++ if (error > 0)
++ return 0;
+
-+ out:
-+ return err;
++ return error;
+}
+
++/**
++ * ea_dealloc_unstuffed -
++ * @ip:
++ * @bh:
++ * @ea:
++ * @prev:
++ * @private:
++ *
++ * Take advantage of the fact that all unstuffed blocks are
++ * allocated from the same RG. But watch, this may not always
++ * be true.
++ *
++ * Returns: errno
++ */
++
+static int
-+get_blk_type(struct gfs_inode *ip, uint64_t blkno, int *blktype)
++ea_dealloc_unstuffed(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ void *private)
+{
-+ int err = 0;
++ int *leave = (int *)private;
+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct buffer_head *bh;
-+ struct gfs_ea_header *ea;
-+
-+ err = gfs_dread(sdp, blkno, ip->i_gl, DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ ea = GFS_FIRST_EA(bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
-+ if (ea->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(ea)) {
-+ *blktype = GFS_EATYPE_UNUSED;
-+ goto out_drelse;
-+ }
-+ ea = GFS_EA_NEXT(ea);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
-+ }
-+ *blktype = ea->ea_type;
-+
-+ out_drelse:
-+ brelse(bh);
++ struct gfs_rgrpd *rgd;
++ struct gfs_holder rg_gh;
++ struct buffer_head *dibh;
++ uint64_t *dataptrs, bn = 0;
++ uint64_t bstart = 0;
++ unsigned int blen = 0;
++ unsigned int x;
++ int error;
+
-+ out:
-+ return err;
-+}
++ if (GFS_EA_IS_STUFFED(ea))
++ return 0;
+
-+static void
-+find_direct_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
-+ struct gfs_ea_location *avail)
-+{
-+ struct gfs_ea_header *curr, *prev = NULL;
-+
-+ curr = GFS_FIRST_EA(bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (curr->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(curr)) {
-+ avail->ea = curr;
-+ avail->prev = NULL;
-+ avail->bh = bh;
-+ goto out;
-+ }
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ if (curr->ea_type == GFS_EATYPE_USR
-+ && GFS_EA_REC_LEN(prev) >= size) {
-+ avail->ea = prev;
-+ avail->prev = NULL;
-+ avail->bh = bh;
-+ goto out;
-+ }
-+ }
-+ while (curr->ea_type != GFS_EATYPE_USR) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (GFS_EA_IS_LAST(curr))
++ dataptrs = GFS_EA2DATAPTRS(ea);
++ for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++)
++ if (*dataptrs) {
++ bn = gfs64_to_cpu(*dataptrs);
+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
-+ avail->ea = curr;
-+ avail->prev = prev;
-+ avail->bh = bh;
-+ goto out;
+ }
-+ if (GFS_EA_IS_LAST(curr))
-+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
++ if (!bn)
++ return 0;
+
-+ out:
-+ return;
-+}
++ rgd = gfs_blk2rgrpd(sdp, bn);
++ GFS_ASSERT_INODE(rgd, ip,);
+
-+static int
-+find_indirect_usr_space(struct gfs_inode *ip, int size, struct buffer_head *bh,
-+ struct gfs_ea_location *avail)
-+{
-+ int err = 0;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ uint64_t *eablk, *end, *last_sys_blk = NULL, *first_usr_blk = NULL;
-+ int blktype;
-+ uint64_t blkno;
++ error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
++ if (error)
++ return error;
+
-+ eablk = (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
-+ end =
-+ eablk + ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
++ error = gfs_trans_begin(sdp, 2 + rgd->rd_ri.ri_length, 1);
++ if (error)
++ goto out_gunlock;
+
-+ while (eablk < end && *eablk) {
-+ err =
-+ find_indirect_space(ip, gfs64_to_cpu(*eablk),
-+ GFS_EATYPE_USR, size, avail, &blktype);
-+ if (err)
-+ goto out;
-+ if (blktype == GFS_EATYPE_SYS)
-+ last_sys_blk = eablk;
-+ if (blktype == GFS_EATYPE_USR && !first_usr_blk)
-+ first_usr_blk = eablk;
-+ if (avail->ea) {
-+ if (first_usr_blk)
-+ goto out;
-+ first_usr_blk = eablk + 1;
-+ while (first_usr_blk < end && *first_usr_blk) {
-+ err =
-+ get_blk_type(ip,
-+ gfs64_to_cpu(*first_usr_blk),
-+ &blktype);
-+ if (blktype == GFS_EATYPE_SYS)
-+ last_sys_blk = first_usr_blk;
-+ if (blktype == GFS_EATYPE_USR)
-+ break;
-+ first_usr_blk++;
-+ }
-+ if (last_sys_blk > eablk) {
-+ gfs_trans_add_bh(ip->i_gl, bh);
-+ blkno = *eablk;
-+ *eablk = *last_sys_blk;
-+ *last_sys_blk = blkno;
-+ }
-+ goto out;
++ gfs_trans_add_bh(ip->i_gl, bh);
++
++ dataptrs = GFS_EA2DATAPTRS(ea);
++ for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) {
++ if (!*dataptrs)
++ break;
++ bn = gfs64_to_cpu(*dataptrs);
++
++ if (bstart + blen == bn)
++ blen++;
++ else {
++ if (bstart)
++ gfs_metafree(ip, bstart, blen);
++ bstart = bn;
++ blen = 1;
+ }
-+ eablk++;
-+ }
+
-+ if (eablk >= end) {
-+ err = -ENOSPC;
-+ goto out;
-+ }
-+ avail->bh = alloc_eattr_blk(sdp, ip, ip, &blkno);
-+ if (!avail->bh) {
-+ err = -EIO;
-+ goto out;
++ *dataptrs = 0;
++ GFS_ASSERT_INODE(ip->i_di.di_blocks, ip,);
++ ip->i_di.di_blocks--;
+ }
-+ avail->ea = GFS_FIRST_EA(avail->bh);
-+ avail->prev = NULL;
-+ gfs_trans_add_bh(ip->i_gl, bh);
-+ *eablk = cpu_to_gfs64(blkno);
-+
-+ out:
-+ return err;
-+}
++ if (bstart)
++ gfs_metafree(ip, bstart, blen);
+
-+static int
-+find_usr_space(struct gfs_inode *ip, int size, struct gfs_ea_location *avail)
-+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
++ if (prev && !leave) {
++ uint32_t len;
+
-+ err =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
-+ &bh);
-+ if (err)
-+ goto out;
++ len = GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea);
++ prev->ea_rec_len = cpu_to_gfs32(len);
+
-+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
-+ err = find_indirect_usr_space(ip, size, bh, avail);
++ if (GFS_EA_IS_LAST(ea))
++ prev->ea_flags |= GFS_EAFLAG_LAST;
+ } else {
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ find_direct_usr_space(ip, size, bh, avail);
-+ if (!avail->ea) {
-+ err = expand_to_indirect(ip, ip, &bh);
-+ if (err)
-+ goto out_drelse;
-+ err = find_indirect_usr_space(ip, size, bh, avail);
-+ }
++ ea->ea_type = GFS_EATYPE_UNUSED;
++ ea->ea_num_ptrs = 0;
+ }
+
-+ out_drelse:
-+ if (avail->bh != bh)
-+ brelse(bh);
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ ip->i_di.di_ctime = get_seconds();
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
+
-+ out:
-+ return err;
++ gfs_trans_end(sdp);
++
++ out_gunlock:
++ gfs_glock_dq_uninit(&rg_gh);
++
++ return error;
+}
+
++/**
++ * ea_remove_unstuffed -
++ * @ip:
++ * @bh:
++ * @ea:
++ * @prev:
++ * @leave:
++ *
++ * Returns: errno
++ */
++
+static int
-+find_space(struct gfs_inode *ip, int size, int type,
-+ struct gfs_ea_location *avail)
++ea_remove_unstuffed(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ int leave)
+{
-+ int err;
++ struct gfs_alloc *al;
++ int error;
+
-+ memset(avail, 0, sizeof (struct gfs_ea_location));
++ al = gfs_alloc_get(ip);
+
-+ if (type == GFS_EATYPE_SYS)
-+ err = find_sys_space(ip, ip, size, avail);
-+ else
-+ err = find_usr_space(ip, size, avail);
++ error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
++ if (error)
++ goto out_alloc;
+
-+ return err;
-+}
++ error = gfs_rindex_hold(ip->i_sbd, &al->al_ri_gh);
++ if (error)
++ goto out_quota;
+
-+static int
-+can_replace_in_block(struct gfs_inode *ip, int size,
-+ struct gfs_ea_location found, struct gfs_ea_header **space)
++ error = ea_dealloc_unstuffed(ip,
++ bh, ea, prev,
++ (leave) ? &error : NULL);
++
++ gfs_glock_dq_uninit(&al->al_ri_gh);
++
++ out_quota:
++ gfs_quota_unhold_m(ip);
++
++ out_alloc:
++ gfs_alloc_put(ip);
++
++ return error;
++}
++
++/**************************************************************************************************/
++
++/**
++ * gfs_ea_repack_i -
++ * @ip:
++ *
++ * Returns: errno
++ */
++
++int
++gfs_ea_repack_i(struct gfs_inode *ip)
+{
-+ struct gfs_ea_header *curr, *prev = NULL;
++ return -ENOSYS;
++}
+
-+ *space = NULL;
-+ curr = GFS_FIRST_EA(found.bh);
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (curr->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_REC_LEN(curr) >= size) {
-+ *space = curr;
-+ goto out;
-+ }
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
-+ }
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(curr), ip,);
-+ if (curr == found.ea) {
-+ /*
-+ * See if there will be enough space after the old version of the eattr
-+ * is deleted.
-+ */
-+ if (prev) {
-+ if (prev->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_REC_LEN(prev) +
-+ GFS_EA_REC_LEN(curr) >= size) {
-+ *space = prev;
-+ goto out;
-+ }
-+ } else if (GFS_EA_REC_LEN(prev) +
-+ GFS_EA_REC_LEN(curr) >=
-+ GFS_EA_SIZE(prev) + size) {
-+ *space = prev;
-+ goto out;
-+ }
-+ } else if (GFS_EA_REC_LEN(curr) >= size) {
-+ *space = curr;
-+ goto out;
-+ }
-+ } else if (GFS_EA_REC_LEN(curr) >= GFS_EA_SIZE(curr) + size) {
-+ *space = curr;
-+ goto out;
++/**
++ * gfs_ea_repack -
++ * @ip:
++ *
++ * Returns: errno
++ */
++
++int gfs_ea_repack(struct gfs_inode *ip)
++{
++ struct gfs_holder gh;
++ int error;
++
++ error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
++ if (error)
++ return error;
++
++ /* Some sort of permissions checking would be nice */
++
++ error = gfs_ea_repack_i(ip);
++
++ gfs_glock_dq_uninit(&gh);
++
++ return error;
++}
++
++struct ea_list {
++ struct gfs_ea_request *ei_er;
++ unsigned int ei_size;
++};
++
++/**
++ * ea_list_i -
++ * @ip:
++ * @bh:
++ * @ea:
++ * @prev:
++ * @private:
++ *
++ * Returns: errno
++ */
++
++static int
++ea_list_i(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ void *private)
++{
++ struct ea_list *ei = (struct ea_list *)private;
++ struct gfs_ea_request *er = ei->ei_er;
++ unsigned int ea_size = GFS_EA_STRLEN(ea);
++
++ if (ea->ea_type == GFS_EATYPE_UNUSED)
++ return 0;
++
++ if (er->er_data_len) {
++ char *prefix;
++ unsigned int l;
++ char c = 0;
++
++ if (ei->ei_size + ea_size > er->er_data_len)
++ return -ERANGE;
++
++ if (ea->ea_type == GFS_EATYPE_USR) {
++ prefix = "user.";
++ l = 5;
++ } else {
++ prefix = "system.";
++ l = 7;
+ }
-+ if (GFS_EA_IS_LAST(curr))
-+ break;
-+ prev = curr;
-+ curr = GFS_EA_NEXT(curr);
++
++ memcpy(er->er_data + ei->ei_size,
++ prefix, l);
++ memcpy(er->er_data + ei->ei_size + l,
++ GFS_EA2NAME(ea),
++ ea->ea_name_len);
++ memcpy(er->er_data + ei->ei_size +
++ ea_size - 1,
++ &c, 1);
+ }
+
-+ out:
-+ return (*space != NULL);
++ ei->ei_size += ea_size;
++
++ return 0;
+}
+
+/**
-+ * read_unstuffed - actually copies the unstuffed data into the
-+ * request buffer
++ * gfs_ea_list -
++ * @ip:
++ * @er:
++ *
++ * Returns: actual size of data on success, -errno on error
+ */
+
+int
-+read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
-+ struct gfs_ea_header *ea, uint32_t avail_size,
-+ gfs_ea_copy_fn_t copy_fn)
++gfs_ea_list(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ struct buffer_head *bh[66]; /* This is the maximum number of data ptrs possible */
-+ int err = 0;
-+ int max = GFS_EADATA_NUM_PTRS(GFS_EA_DATA_LEN(ea), avail_size);
-+ int i, j, left = GFS_EA_DATA_LEN(ea);
-+ char *outptr, *buf;
-+ uint64_t *indptr = GFS_EA_DATA_PTRS(ea);
++ struct gfs_holder i_gh;
++ int error;
+
-+ for (i = 0; i < max; i++) {
-+ err =
-+ gfs_dread(sdp, gfs64_to_cpu(*indptr), ip->i_gl, DIO_START,
-+ &bh[i]);
-+ indptr++;
-+ if (err) {
-+ for (j = 0; j < i; j++)
-+ brelse(bh[j]);
-+ goto out;
-+ }
++ if (!er->er_data || !er->er_data_len) {
++ er->er_data = NULL;
++ er->er_data_len = 0;
+ }
+
-+ outptr = dest;
++ error = gfs_glock_nq_init(ip->i_gl,
++ LM_ST_SHARED, LM_FLAG_ANY,
++ &i_gh);
++ if (error)
++ return error;
++
++ if (ip->i_di.di_eattr) {
++ struct ea_list ei = { .ei_er = er, .ei_size = 0 };
++
++ error = ea_foreach(ip, ea_list_i, &ei);
++ if (!error)
++ error = ei.ei_size;
++ }
++
++ gfs_glock_dq_uninit(&i_gh);
++
++ return error;
++}
++
++/**
++ * ea_get_unstuffed - actually copies the unstuffed data into the
++ * request buffer
++ * @ip:
++ * @ea:
++ * @data:
++ *
++ * Returns: errno
++ */
++
++static int
++ea_get_unstuffed(struct gfs_inode *ip, struct gfs_ea_header *ea,
++ char *data)
++{
++ struct gfs_sbd *sdp = ip->i_sbd;
++ struct buffer_head **bh;
++ unsigned int amount = GFS_EA_DATA_LEN(ea);
++ unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
++ uint64_t *dataptrs = GFS_EA2DATAPTRS(ea);
++ unsigned int x;
++ int error = 0;
+
-+ for (i = 0; i < max; i++) {
-+ err = gfs_dreread(sdp, bh[i], DIO_WAIT);
-+ if (err) {
-+ for (j = i; j < max; j++)
-+ brelse(bh[j]);
++ bh = kmalloc(nptrs * sizeof(struct buffer_head *), GFP_KERNEL);
++ if (!bh)
++ return -ENOMEM;
++
++ for (x = 0; x < nptrs; x++) {
++ error = gfs_dread(sdp, gfs64_to_cpu(*dataptrs), ip->i_gl,
++ DIO_START, bh + x);
++ if (error) {
++ while (x--)
++ brelse(bh[x]);
+ goto out;
+ }
-+ gfs_metatype_check(sdp, bh[i], GFS_METATYPE_EA);
-+ buf = (bh[i])->b_data + sizeof (struct gfs_meta_header);
-+ err =
-+ copy_fn(outptr, buf,
-+ (avail_size > left) ? left : avail_size);
-+ if (err) {
-+ for (j = i; j < max; j++)
-+ brelse(bh[j]);
++ dataptrs++;
++ }
++
++ for (x = 0; x < nptrs; x++) {
++ error = gfs_dreread(sdp, bh[x], DIO_WAIT);
++ if (error) {
++ for (; x < nptrs; x++)
++ brelse(bh[x]);
+ goto out;
+ }
-+ left -= avail_size;
-+ outptr += avail_size;
-+ brelse(bh[i]);
++
++ gfs_metatype_check2(sdp, bh[x], GFS_METATYPE_ED, GFS_METATYPE_EA);
++
++ memcpy(data,
++ bh[x]->b_data + sizeof(struct gfs_meta_header),
++ (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
++
++ amount -= sdp->sd_jbsize;
++ data += sdp->sd_jbsize;
++
++ brelse(bh[x]);
+ }
+
-+ out:
++ out:
++ kfree(bh);
+
-+ return err;
++ return error;
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
++ * gfs_ea_get_copy -
++ * @ip:
++ * @el:
++ * @data:
+ *
-+ * Function description
++ * Returns: errno
++ */
++
++int
++gfs_ea_get_copy(struct gfs_inode *ip,
++ struct gfs_ea_location *el,
++ char *data)
++{
++ if (GFS_EA_IS_STUFFED(el->el_ea)) {
++ memcpy(data,
++ GFS_EA2DATA(el->el_ea),
++ GFS_EA_DATA_LEN(el->el_ea));
++ return 0;
++ } else
++ return ea_get_unstuffed(ip, el->el_ea,
++ data);
++}
++
++/**
++ * gfs_ea_get_i -
++ * @ip:
++ * @er:
+ *
-+ * Returns: what is returned
++ * Returns: actual size of data on success, -errno on error
+ */
++
+int
-+get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
-+ gfs_ea_copy_fn_t copy_fn)
++gfs_ea_get_i(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ int err;
-+ struct gfs_ea_location location;
-+ uint32_t avail_size;
++ struct gfs_ea_location el;
++ int error;
+
-+ avail_size = sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
++ if (!ip->i_di.di_eattr)
++ return -ENODATA;
+
-+ err = find_eattr(ip, req->eg_name, req->eg_name_len, req->eg_type,
-+ &location);
-+ if (err != 1) {
-+ if (err == 0)
-+ err = -ENODATA;
-+ goto out;
-+ }
++ error = gfs_ea_find(ip, er, &el);
++ if (error)
++ return error;
++ if (!el.el_ea)
++ return -ENODATA;
+
-+ if (req->eg_data_len) {
-+ if (req->eg_data_len < GFS_EA_DATA_LEN(location.ea))
-+ err = -ERANGE;
-+ else if (GFS_EA_IS_UNSTUFFED(location.ea))
-+ err =
-+ read_unstuffed(req->eg_data, ip, sdp, location.ea,
-+ avail_size, copy_fn);
++ if (er->er_data_len) {
++ if (GFS_EA_DATA_LEN(el.el_ea) > er->er_data_len)
++ error = -ERANGE;
+ else
-+ err = copy_fn(req->eg_data, GFS_EA_DATA(location.ea),
-+ GFS_EA_DATA_LEN(location.ea));
-+ if (!err)
-+ err = GFS_EA_DATA_LEN(location.ea);
-+ } else
-+ err = GFS_EA_DATA_LEN(location.ea);
++ error = gfs_ea_get_copy(ip, &el, er->er_data);
++ }
++ if (!error)
++ error = GFS_EA_DATA_LEN(el.el_ea);
+
-+ brelse(location.bh);
++ brelse(el.el_bh);
+
-+ out:
-+ return err;
++ return error;
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
-+ *
-+ * Function description
++ * gfs_ea_get -
++ * @ip:
++ * @er:
+ *
-+ * Returns: what is returned
++ * Returns: actual size of data on success, -errno on error
+ */
+
-+struct gfs_ea_header *
-+prep_ea(struct gfs_ea_header *ea)
++int
++gfs_ea_get(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ struct gfs_ea_header *new = ea;
++ struct gfs_holder i_gh;
++ int error;
+
-+ if (ea->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(ea))
-+ ea->ea_flags = GFS_EAFLAG_LAST;
-+ else
-+ ea->ea_flags = 0;
-+ } else {
-+ new = GFS_EA_FREESPACE(ea);
-+ new->ea_rec_len =
-+ cpu_to_gfs32(GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea));
-+ ea->ea_rec_len = cpu_to_gfs32(GFS_EA_SIZE(ea));
-+ if (GFS_EA_IS_LAST(ea)) {
-+ ea->ea_flags &= ~GFS_EAFLAG_LAST;
-+ new->ea_flags = GFS_EAFLAG_LAST;
-+ } else
-+ new->ea_flags = 0;
++ if (!er->er_name_len ||
++ er->er_name_len > GFS_EA_MAX_NAME_LEN)
++ return -EINVAL;
++ if (!er->er_data || !er->er_data_len) {
++ er->er_data = NULL;
++ er->er_data_len = 0;
+ }
+
-+ return new;
++ error = gfs_glock_nq_init(ip->i_gl,
++ LM_ST_SHARED, LM_FLAG_ANY,
++ &i_gh);
++ if (error)
++ return error;
++
++ error = gfs_ea_ops[er->er_type]->eo_get(ip, er);
++
++ gfs_glock_dq_uninit(&i_gh);
++
++ return error;
+}
+
+/**
-+ * replace_ea - replaces the existing data with the request data
++ * ea_alloc_blk - allocates a new block for extended attributes.
++ * @ip: A pointer to the inode that's getting extended attributes
++ * @bhp:
++ *
++ * Returns: errno
+ */
-+int
-+replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_ea_header *ea,
-+ struct gfs_easet_io *req)
++
++static int
++ea_alloc_blk(struct gfs_inode *ip,
++ struct buffer_head **bhp)
+{
-+ int err = 0;
-+ int i;
-+ uint32_t copy_size, data_left = req->es_data_len;
-+ struct buffer_head *bh;
-+ uint64_t *datablk = GFS_EA_DATA_PTRS(ea);
-+ const char *dataptr = req->es_data;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+
-+ ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
-+ if (!GFS_EA_IS_UNSTUFFED(ea))
-+ memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
-+ else {
-+ for (i = 0; i < ea->ea_num_ptrs && data_left > 0; i++) {
-+ err = gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
-+ DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
-+ gfs_trans_add_bh(ip->i_gl, bh);
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
-+ copy_size =
-+ (data_left > avail_size) ? avail_size : data_left;
-+ memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
-+ dataptr, copy_size);
-+ dataptr += copy_size;
-+ data_left -= copy_size;
-+ datablk++;
-+ brelse(bh);
-+ }
-+ GFS_ASSERT_INODE(data_left == 0, ip,
-+ printk
-+ ("req->es_data_len = %u, ea->ea_num_ptrs = %d\n",
-+ req->es_data_len, ea->ea_num_ptrs);
-+ );
-+ }
++ struct gfs_sbd *sdp = ip->i_sbd;
++ struct gfs_ea_header *ea;
++ uint64_t block;
++ int error;
+
-+ out:
-+ return err;
++ error = gfs_metaalloc(ip, &block);
++ if (error)
++ return error;
++
++ error = gfs_dread(sdp, block, ip->i_gl,
++ DIO_NEW | DIO_START | DIO_WAIT, bhp);
++ if (error)
++ return error;
++
++ gfs_trans_add_bh(ip->i_gl, *bhp);
++ gfs_metatype_set(sdp, *bhp, GFS_METATYPE_EA, GFS_FORMAT_EA);
++
++ ea = GFS_EA_BH2FIRST(*bhp);
++ ea->ea_rec_len = cpu_to_gfs32(sdp->sd_jbsize);
++ ea->ea_type = GFS_EATYPE_UNUSED;
++ ea->ea_flags = GFS_EAFLAG_LAST;
++ ea->ea_num_ptrs = 0;
++
++ ip->i_di.di_blocks++;
++
++ return 0;
+}
+
+/**
-+ * write_ea - writes the request info to an ea, creating new blocks if
++ * ea_write - writes the request info to an ea, creating new blocks if
+ * necessary
-+ *
-+ * @sdp: superblock pointer
-+ * @alloc_ip: inode that has the blocks reserved for allocation
+ * @ip: inode that is being modified
+ * @ea: the location of the new ea in a block
-+ * @req: the write request
++ * @er: the write request
+ *
+ * Note: does not update ea_rec_len or the GFS_EAFLAG_LAST bin of ea_flags
+ *
-+ * returns : 0 on success, -EXXX on error
++ * returns : errno
+ */
+
-+int
-+write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip, struct gfs_inode *ip,
-+ struct gfs_ea_header *ea, struct gfs_easet_io *req)
++static int
++ea_write(struct gfs_inode *ip,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_request *er)
+{
-+ int err = 0;
-+ uint64_t *blkptr;
-+ uint64_t temp;
-+ const char *dataptr;
-+ uint32_t data_left, copy;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ int i;
-+ struct buffer_head *bh = NULL;
++ struct gfs_sbd *sdp = ip->i_sbd;
+
-+ ea->ea_data_len = cpu_to_gfs32(req->es_data_len);
-+ ea->ea_name_len = req->es_name_len;
-+ ea->ea_type = req->es_type;
++ ea->ea_data_len = cpu_to_gfs32(er->er_data_len);
++ ea->ea_name_len = er->er_name_len;
++ ea->ea_type = er->er_type;
+ ea->ea_pad = 0;
+
-+ memcpy(GFS_EA_NAME(ea), req->es_name, req->es_name_len);
++ memcpy(GFS_EA2NAME(ea), er->er_name, er->er_name_len);
+
-+ if (GFS_EAREQ_IS_STUFFED(req, avail_size)) {
++ if (GFS_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) {
+ ea->ea_num_ptrs = 0;
-+ memcpy(GFS_EA_DATA(ea), req->es_data, req->es_data_len);
++ memcpy(GFS_EA2DATA(ea), er->er_data, er->er_data_len);
+ } else {
-+ blkptr = GFS_EA_DATA_PTRS(ea);
-+ dataptr = req->es_data;
-+ data_left = req->es_data_len;
-+ ea->ea_num_ptrs =
-+ GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
-+
-+ for (i = 0; i < ea->ea_num_ptrs; i++) {
-+ if ((bh =
-+ alloc_eattr_blk(sdp, alloc_ip, ip,
-+ &temp)) == NULL) {
-+ err = -EIO;
-+ goto out;
-+ }
-+ copy =
-+ (data_left > avail_size) ? avail_size : data_left;
-+ memcpy((bh)->b_data + sizeof (struct gfs_meta_header),
-+ dataptr, copy);
-+ *blkptr = cpu_to_gfs64(temp);
-+ dataptr += copy;
-+ data_left -= copy;
-+ blkptr++;
++ uint64_t *dataptr = GFS_EA2DATAPTRS(ea);
++ const char *data = er->er_data;
++ unsigned int data_len = er->er_data_len;
++ unsigned int copy;
++ unsigned int x;
++
++ ea->ea_num_ptrs = DIV_RU(er->er_data_len, sdp->sd_jbsize);
++ for (x = 0; x < ea->ea_num_ptrs; x++) {
++ struct buffer_head *bh;
++ uint64_t block;
++ int error;
++
++ error = gfs_metaalloc(ip, &block);
++ if (error)
++ return error;
++
++ error = gfs_dread(sdp, block, ip->i_gl,
++ DIO_NEW | DIO_START | DIO_WAIT, &bh);
++ if (error)
++ return error;
++
++ gfs_trans_add_bh(ip->i_gl, bh);
++ gfs_metatype_set(sdp, bh, GFS_METATYPE_ED, GFS_FORMAT_ED);
++ ip->i_di.di_blocks++;
++
++ copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize : data_len;
++ memcpy(bh->b_data + sizeof(struct gfs_meta_header),
++ data,
++ copy);
++
++ *dataptr++ = cpu_to_gfs64((uint64_t)bh->b_blocknr);
++ data += copy;
++ data_len -= copy;
++
+ brelse(bh);
+ }
+
-+ GFS_ASSERT_INODE(!data_left, ip,);
++ GFS_ASSERT_INODE(!data_len, ip,);
+ }
+
-+ out:
-+
-+ return err;
++ return 0;
+}
+
++typedef int (*ea_skeleton_call_t) (struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ void *private);
+/**
-+ * erase_ea_data_ptrs - deallocate all the unstuffed data blocks pointed to
-+ * ea records in this block
-+ * @sdp: the superblock
-+ * @ip: the inode
-+ * @blk: the block to check for data pointers
-+ *
++ * ea_alloc_skeleton -
++ * @ip:
++ * @er:
++ * @blks:
++ * @skeleton_call:
++ * @private:
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+static int
-+erase_ea_data_ptrs(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct buffer_head *dibh, uint64_t blk)
++ea_alloc_skeleton(struct gfs_inode *ip, struct gfs_ea_request *er,
++ unsigned int blks,
++ ea_skeleton_call_t skeleton_call, void *private)
+{
-+ struct gfs_holder rgd_gh;
-+ int i, err = 0;
-+ uint64_t *datablk;
-+ struct buffer_head *eabh;
-+ char *buf;
-+ struct gfs_ea_header *ea;
-+ struct gfs_rgrpd *rgd = NULL;
++ struct gfs_alloc *al;
++ struct buffer_head *dibh;
++ int error;
+
-+ err = gfs_dread(sdp, blk, ip->i_gl, DIO_WAIT | DIO_START, &eabh);
-+ if (err)
-+ goto fail;
++ al = gfs_alloc_get(ip);
+
-+ gfs_metatype_check(sdp, eabh, GFS_METATYPE_EA);
-+ buf = (eabh)->b_data + sizeof (struct gfs_meta_header);
-+ ea = (struct gfs_ea_header *) buf;
++ error = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
++ if (error)
++ goto out;
+
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
-+ if (GFS_EA_IS_UNSTUFFED(ea)) {
-+ datablk = GFS_EA_DATA_PTRS(ea);
-+ rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*datablk));
-+ GFS_ASSERT_INODE(rgd, ip,
-+ printk("block = %" PRIu64 "\n",
-+ gfs64_to_cpu(*datablk)););
-+ err =
-+ gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-+ &rgd_gh);
-+ if (err)
-+ goto fail_eabh;
-+ /* Trans may require:
-+ One block for the RG header. One block for each ea data block. One
-+ One block for the dinode. One block for the current ea block.
-+ One block for a quote change.
-+ FIXME */
-+ err =
-+ gfs_trans_begin(sdp,
-+ 3 + ea->ea_num_ptrs, 1);
-+ if (err)
-+ goto fail_glock_rg;
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
-+ gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
-+ ip->i_di.di_blocks--;
-+ }
-+ ea->ea_num_ptrs = 0;
-+ gfs_trans_add_bh(ip->i_gl, eabh);
-+ gfs_dinode_out(&ip->i_di, (dibh)->b_data);
-+ gfs_trans_end(sdp);
-+ gfs_glock_dq_uninit(&rgd_gh);
++ error = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
++ if (error)
++ goto out_gunlock_q;
++
++ al->al_requested_meta = blks;
++
++ error = gfs_inplace_reserve(ip);
++ if (error)
++ goto out_gunlock_q;
++
++ /* Trans may require:
++ A modified dinode, multiple EA metadata blocks, and all blocks for a RG
++ bitmap */
++
++ error = gfs_trans_begin(ip->i_sbd,
++ 1 + blks + al->al_rgd->rd_ri.ri_length, 1);
++ if (error)
++ goto out_ipres;
++
++ error = skeleton_call(ip, er, private);
++ if (error)
++ goto out_end_trans;
++
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ if (er->er_mode) {
++ ip->i_vnode->i_mode = er->er_mode;
++ gfs_inode_attr_out(ip);
+ }
-+ if (GFS_EA_IS_LAST(ea))
-+ break;
-+ ea = GFS_EA_NEXT(ea);
++ ip->i_di.di_ctime = get_seconds();
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
+ }
+
-+ brelse(eabh);
++ out_end_trans:
++ gfs_trans_end(ip->i_sbd);
+
-+ return err;
++ out_ipres:
++ gfs_inplace_release(ip);
+
-+ fail_glock_rg:
-+ gfs_glock_dq_uninit(&rgd_gh);
++ out_gunlock_q:
++ gfs_quota_unlock_m(ip);
+
-+ fail_eabh:
-+ brelse(eabh);
++ out:
++ gfs_alloc_put(ip);
+
-+ fail:
-+ return err;
++ return error;
+}
+
+/**
-+ * gfs_ea_dealloc - deallocate the extended attribute fork
-+ * @ip: the inode
++ * ea_init_i - initializes a new eattr block
++ * @ip:
++ * @er:
++ * @private:
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
-+int
-+gfs_ea_dealloc(struct gfs_inode *ip)
++static int
++ea_init_i(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ void *private)
+{
-+ struct gfs_holder ri_gh, rgd_gh;
-+ int err = 0;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct buffer_head *dibh, *indbh = NULL;
-+ uint64_t *startblk, *eablk, *end, *next;
-+ uint64_t temp;
-+ int num_blks;
-+ struct gfs_rgrpd *rgd = NULL;
++ struct buffer_head *bh;
++ int error;
+
-+ if (!ip->i_di.di_eattr)
-+ goto out;
++ error = ea_alloc_blk(ip, &bh);
++ if (error)
++ return error;
+
-+ gfs_alloc_get(ip);
++ ip->i_di.di_eattr = bh->b_blocknr;
++ error = ea_write(ip, GFS_EA_BH2FIRST(bh), er);
+
-+ err = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-+ if (err)
-+ goto out_alloc;
++ brelse(bh);
+
-+ err = gfs_rindex_hold(sdp, &ri_gh);
-+ if (err)
-+ goto out_unhold_q;
++ return error;
++}
+
-+ err = gfs_get_inode_buffer(ip, &dibh);
-+ if (err)
-+ goto out_rindex_release;
++/**
++ * ea_init - initializes a new eattr block
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
++ */
+
-+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ err =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
-+ DIO_WAIT | DIO_START, &indbh);
-+ if (err)
-+ goto out_dibh;
++static int
++ea_init(struct gfs_inode *ip, struct gfs_ea_request *er)
++{
++ unsigned int jbsize = ip->i_sbd->sd_jbsize;
++ unsigned int blks = 1;
+
-+ gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN);
++ if (GFS_EAREQ_SIZE_STUFFED(er) > jbsize)
++ blks += DIV_RU(er->er_data_len, jbsize);
+
-+ eablk =
-+ (uint64_t *) ((indbh)->b_data +
-+ sizeof (struct gfs_indirect));
-+ end =
-+ eablk +
-+ ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
-+
-+ while (*eablk && eablk < end) {
-+ err =
-+ erase_ea_data_ptrs(sdp, ip, dibh,
-+ gfs64_to_cpu(*eablk));
-+ if (err)
-+ goto out_indbh;
-+ eablk++;
-+ }
-+
-+ startblk = eablk - 1;
-+ end =
-+ (uint64_t *) ((indbh)->b_data +
-+ sizeof (struct gfs_indirect));
-+
-+ while (startblk >= end) {
-+ rgd = gfs_blk2rgrpd(sdp, gfs64_to_cpu(*startblk));
-+ GFS_ASSERT_INODE(rgd, ip,);
-+
-+ num_blks = 1;
-+ next = eablk = startblk - 1;
-+
-+ while (eablk >= end) {
-+ if (rgd ==
-+ gfs_blk2rgrpd(sdp, gfs64_to_cpu(*eablk))) {
-+ if (eablk != next) {
-+ temp = *eablk;
-+ *eablk = *next;
-+ *next = temp;
-+ }
-+ num_blks++;
-+ next--;
-+ }
-+ eablk--;
-+ }
++ return ea_alloc_skeleton(ip, er,
++ blks,
++ ea_init_i, NULL);
++}
+
-+ err =
-+ gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-+ &rgd_gh);
-+ if (err)
-+ goto out_rindex_release;
++/**
++ * ea_split_ea -
++ * @ea:
++ *
++ * Returns: the new ea
++ */
+
-+ /* Trans may require:
-+ One block for the RG header. One block for each block from this
-+ resource group. One block for the indirect ea block,
-+ One block for the quote change */
++static struct gfs_ea_header *
++ea_split_ea(struct gfs_ea_header *ea)
++{
++ uint32_t ea_size = GFS_EA_SIZE(ea);
++ struct gfs_ea_header *new = (struct gfs_ea_header *)((char *)ea + ea_size);
++ uint32_t new_size = GFS_EA_REC_LEN(ea) - ea_size;
++ int last = ea->ea_flags & GFS_EAFLAG_LAST;
+
-+ err =
-+ gfs_trans_begin(sdp, 3 + num_blks,
-+ 1);
-+ if (err)
-+ goto out_gunlock_rg;
++ ea->ea_rec_len = cpu_to_gfs32(ea_size);
++ ea->ea_flags ^= last;
+
-+ gfs_trans_add_bh(ip->i_gl, dibh);
++ new->ea_rec_len = cpu_to_gfs32(new_size);
++ new->ea_flags = last;
+
-+ while (startblk > next) {
-+ gfs_metafree(ip, gfs64_to_cpu(*startblk), 1);
-+ ip->i_di.di_blocks--;
-+ *startblk = 0;
-+ startblk--;
-+ }
++ return new;
++}
+
-+ gfs_trans_add_bh(ip->i_gl, indbh);
-+ gfs_dinode_out(&ip->i_di, (dibh)->b_data);
++/**
++ * ea_set_remove_stuffed -
++ * @ip:
++ * @ea:
++ *
++ */
+
-+ gfs_trans_end(sdp);
++static void
++ea_set_remove_stuffed(struct gfs_inode *ip, struct gfs_ea_location *el)
++{
++ struct gfs_ea_header *ea = el->el_ea;
++ struct gfs_ea_header *prev = el->el_prev;
++ uint32_t len;
+
-+ gfs_glock_dq_uninit(&rgd_gh);
-+ }
++ gfs_trans_add_bh(ip->i_gl, el->el_bh);
+
-+ brelse(indbh);
-+ indbh = NULL;
-+ } else {
-+ err = erase_ea_data_ptrs(sdp, ip, dibh, ip->i_di.di_eattr);
-+ if (err)
-+ goto out_rindex_release;
++ if (!prev || !GFS_EA_IS_STUFFED(ea)) {
++ ea->ea_type = GFS_EATYPE_UNUSED;
++ return;
++ } else if (GFS_EA2NEXT(prev) != ea) {
++ prev = GFS_EA2NEXT(prev);
++ GFS_ASSERT_INODE(GFS_EA2NEXT(prev) == ea, ip,);
+ }
+
-+ rgd = gfs_blk2rgrpd(sdp, ip->i_di.di_eattr);
-+ GFS_ASSERT_INODE(rgd, ip,
-+ printk("block = %" PRIu64 "\n", ip->i_di.di_eattr);
-+ );
++ len = GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea);
++ prev->ea_rec_len = cpu_to_gfs32(len);
+
-+ err = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
-+ if (err)
-+ goto out_rindex_release;
++ if (GFS_EA_IS_LAST(ea))
++ prev->ea_flags |= GFS_EAFLAG_LAST;
++}
+
-+ err = gfs_trans_begin(sdp, 3, 1);
-+ if (err)
-+ goto out_gunlock_rg;
++struct ea_set {
++ int ea_split;
+
-+ gfs_metafree(ip, ip->i_di.di_eattr, 1);
++ struct gfs_ea_request *es_er;
++ struct gfs_ea_location *es_el;
+
-+ ip->i_di.di_blocks--;
-+ ip->i_di.di_eattr = 0;
++ struct buffer_head *es_bh;
++ struct gfs_ea_header *es_ea;
++};
+
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ gfs_dinode_out(&ip->i_di, (dibh)->b_data);
++/**
++ * ea_set_simple_noalloc -
++ * @ip:
++ * @ea:
++ * @es:
++ *
++ * Returns: errno
++ */
+
-+ gfs_trans_end(sdp);
++static int
++ea_set_simple_noalloc(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct ea_set *es)
++{
++ struct gfs_ea_request *er = es->es_er;
++ int error;
+
-+ out_gunlock_rg:
-+ gfs_glock_dq_uninit(&rgd_gh);
++ error = gfs_trans_begin(ip->i_sbd, 3, 0);
++ if (error)
++ return error;
+
-+ out_indbh:
-+ if (indbh)
-+ brelse(indbh);
++ gfs_trans_add_bh(ip->i_gl, bh);
+
-+ out_dibh:
-+ brelse(dibh);
++ if (es->ea_split)
++ ea = ea_split_ea(ea);
+
-+ out_rindex_release:
-+ gfs_glock_dq_uninit(&ri_gh);
++ ea_write(ip, ea, er);
+
-+ out_unhold_q:
-+ gfs_quota_unhold_m(ip);
++ if (es->es_el)
++ ea_set_remove_stuffed(ip, es->es_el);
+
-+ out_alloc:
-+ gfs_alloc_put(ip);
++ {
++ struct buffer_head *dibh;
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ if (er->er_mode) {
++ ip->i_vnode->i_mode = er->er_mode;
++ gfs_inode_attr_out(ip);
++ }
++ ip->i_di.di_ctime = get_seconds();
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
++ }
+
-+ out:
++ gfs_trans_end(ip->i_sbd);
+
-+ return err;
++ return error;
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
-+ *
-+ * Function description
++ * ea_set_simple_alloc -
++ * @ip:
++ * @er:
++ * @private:
+ *
-+ * Returns: what is returned
++ * Returns: errno
+ */
+
-+static void
-+remove_ea(struct gfs_inode *ip, struct gfs_ea_header *ea,
-+ struct gfs_ea_header *prev)
++static int
++ea_set_simple_alloc(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ void *private)
+{
-+ uint64_t *datablk;
-+ int i;
-+
-+ if (GFS_EA_IS_UNSTUFFED(ea)) {
-+ datablk = GFS_EA_DATA_PTRS(ea);
-+ for (i = 0; i < ea->ea_num_ptrs; i++, datablk++) {
-+ gfs_metafree(ip, gfs64_to_cpu(*datablk), 1);
-+ ip->i_di.di_blocks--;
-+ }
-+ }
++ struct ea_set *es = (struct ea_set *)private;
++ struct gfs_ea_header *ea = es->es_ea;
++ int error;
+
-+ ea->ea_type = GFS_EATYPE_UNUSED;
-+ ea->ea_num_ptrs = 0;
++ gfs_trans_add_bh(ip->i_gl, es->es_bh);
+
-+ if (prev && prev != ea) {
-+ prev->ea_rec_len =
-+ cpu_to_gfs32(GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea));
-+ if (GFS_EA_IS_LAST(ea))
-+ prev->ea_flags |= GFS_EAFLAG_LAST;
-+ }
-+}
++ if (es->ea_split)
++ ea = ea_split_ea(ea);
+
-+int
-+init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
-+ struct gfs_easet_io *req)
-+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_ea_header *ea;
-+
-+ err = gfs_metaalloc(dip, &ip->i_di.di_eattr);
-+ if (err)
-+ goto out;
++ error = ea_write(ip, ea, er);
++ if (error)
++ return error;
+
-+ err = gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
-+ DIO_NEW | DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
++ if (es->es_el)
++ ea_set_remove_stuffed(ip, es->es_el);
+
-+ gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
++ return 0;
++}
+
-+ ip->i_di.di_blocks++;
++/**
++ * ea_set_simple -
++ * @ip:
++ * @el:
++ *
++ * Returns: errno
++ */
+
-+ ea = GFS_FIRST_EA(bh);
-+ ea->ea_flags = GFS_EAFLAG_LAST;
-+ ea->ea_rec_len =
-+ cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
-+ ea->ea_num_ptrs = 0;
-+ ea->ea_type = GFS_EATYPE_UNUSED;
-+ err = write_ea(sdp, dip, ip, ea, req);
-+ if (err)
-+ goto out_drelse;
++static int
++ea_set_simple(struct gfs_inode *ip,
++ struct buffer_head *bh,
++ struct gfs_ea_header *ea,
++ struct gfs_ea_header *prev,
++ void *private)
++{
++ struct ea_set *es = (struct ea_set *)private;
++ unsigned int size;
++ int stuffed;
++ int error;
+
-+ gfs_trans_add_bh(ip->i_gl, bh);
++ stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size);
+
-+ out_drelse:
-+ brelse(bh);
++ if (ea->ea_type == GFS_EATYPE_UNUSED) {
++ if (GFS_EA_REC_LEN(ea) < size)
++ return 0;
++ if (!GFS_EA_IS_STUFFED(ea)) {
++ error = ea_remove_unstuffed(ip, bh, ea, prev, TRUE);
++ if (error)
++ return error;
++ }
++ es->ea_split = FALSE;
++ } else if (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea) >= size)
++ es->ea_split = TRUE;
++ else
++ return 0;
+
-+ out:
-+ return err;
-+}
++ if (stuffed) {
++ error = ea_set_simple_noalloc(ip, bh, ea, es);
++ if (error)
++ return error;
++ } else {
++ unsigned int blks;
+
-+int
-+do_init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_easet_io *req)
-+{
-+ int err;
-+ struct buffer_head *bh;
-+ struct gfs_ea_header *ea;
++ es->es_bh = bh;
++ es->es_ea = ea;
++ blks = 2 + DIV_RU(es->es_er->er_data_len,
++ ip->i_sbd->sd_jbsize);
+
-+ bh = alloc_eattr_blk(sdp, ip, ip, &ip->i_di.di_eattr);
-+ if (bh) {
-+ ea = GFS_FIRST_EA(bh);
-+ err = write_ea(sdp, ip, ip, ea, req);
-+ brelse(bh);
-+ } else
-+ err = -EIO;
++ error = ea_alloc_skeleton(ip, es->es_er,
++ blks,
++ ea_set_simple_alloc, es);
++ if (error)
++ return error;
++ }
+
-+ return err;
++ return 1;
+}
+
+/**
-+ * init_eattr - initializes a new eattr block
++ * ea_set_block -
++ * @ip:
++ * @er:
++ * @private:
++ *
++ * Returns: errno
+ */
+
+static int
-+init_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req)
++ea_set_block(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ void *private)
+{
-+ int err = 0;
-+ struct gfs_alloc *al;
-+ uint32_t ea_metablks;
-+ struct buffer_head *dibh;
-+ struct posix_acl *acl = NULL;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+
-+ ea_metablks =
-+ GFS_EAREQ_IS_STUFFED(req,
-+ avail_size) ? 1 : (1 +
-+ GFS_EADATA_NUM_PTRS(req->
-+ es_data_len,
-+ avail_size));
-+
-+ if (IS_ACCESS_ACL(req->es_name, req->es_name_len)){
-+ acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
-+ if (IS_ERR(acl)) {
-+ err = PTR_ERR(acl);
-+ goto out;
-+ }
-+ }
++ struct gfs_sbd *sdp = ip->i_sbd;
++ struct buffer_head *indbh, *newbh;
++ uint64_t *eablk;
++ int error;
+
-+ al = gfs_alloc_get(ip);
++ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
++ uint64_t *end;
+
-+ err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-+ if (err)
-+ goto out_alloc;
++ error = gfs_dread(sdp,
++ ip->i_di.di_eattr, ip->i_gl,
++ DIO_START | DIO_WAIT, &indbh);
++ if (error)
++ return error;
+
-+ al->al_requested_meta = ea_metablks;
++ gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN);
+
-+ err = gfs_inplace_reserve(ip);
-+ if (err)
-+ goto out_gunlock_q;
++ eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs_indirect));
++ end = eablk + sdp->sd_inptrs;
+
-+ err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
-+ if (err)
-+ goto out_ipres;
++ for (; eablk < end; eablk++)
++ if (!*eablk)
++ break;
+
-+ err = gfs_get_inode_buffer(ip, &dibh);
-+ if (err)
-+ goto out_ipres;
++ if (eablk == end) {
++ brelse(indbh);
++ return -ENOSPC;
++ }
+
-+ /* Trans may require:
-+ A modified dinode, multiple EA metadata blocks, and all blocks for a RG
-+ bitmap */
++ gfs_trans_add_bh(ip->i_gl, indbh);
++ } else {
++ uint64_t blk;
+
-+ err =
-+ gfs_trans_begin(sdp,
-+ 1 + ea_metablks + al->al_rgd->rd_ri.ri_length, 1);
-+ if (err)
-+ goto out_dibh;
++ error = gfs_metaalloc(ip, &blk);
++ if (error)
++ return error;
+
-+ err = do_init_eattr(sdp, ip, req);
-+ if (err)
-+ goto out_end_trans;
++ error = gfs_dread(sdp,
++ blk, ip->i_gl,
++ DIO_NEW | DIO_START | DIO_WAIT, &indbh);
++ if (error)
++ return error;
+
-+ if (acl)
-+ gfs_acl_set_mode(ip, acl);
++ gfs_trans_add_bh(ip->i_gl, indbh);
++ gfs_metatype_set(sdp, indbh, GFS_METATYPE_IN, GFS_FORMAT_IN);
++ gfs_buffer_clear_tail(indbh, sizeof(struct gfs_meta_header));
+
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ gfs_dinode_out(&ip->i_di, (dibh)->b_data);
++ eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs_indirect));
++ *eablk = cpu_to_gfs64(ip->i_di.di_eattr);
++ ip->i_di.di_eattr = blk;
++ ip->i_di.di_flags |= GFS_DIF_EA_INDIRECT;
++ ip->i_di.di_blocks++;
+
-+ out_end_trans:
-+ gfs_trans_end(sdp);
++ eablk++;
++ }
+
-+ out_dibh:
-+ brelse(dibh);
++ error = ea_alloc_blk(ip, &newbh);
++ if (error)
++ goto out;
+
-+ out_ipres:
-+ gfs_inplace_release(ip);
++ *eablk = cpu_to_gfs64((uint64_t)newbh->b_blocknr);
++ error = ea_write(ip, GFS_EA_BH2FIRST(newbh), er);
++ brelse(newbh);
++ if (error)
++ goto out;
+
-+ out_gunlock_q:
-+ gfs_quota_unlock_m(ip);
++ if (private)
++ ea_set_remove_stuffed(ip, (struct gfs_ea_location *)private);
+
-+ out_alloc:
-+ gfs_alloc_put(ip);
-+ posix_acl_release(acl);
++ out:
++ brelse(indbh);
+
-+ out:
-+ return err;
++ return error;
+}
+
+/**
-+ * alloc_eattr_blk - allocates a new block for extended attributes.
-+ * @sdp: A pointer to the superblock
-+ * @alloc_ip: A pointer to the inode that has reserved the blocks for
-+ * allocation
-+ * @ip: A pointer to the inode that's getting extended attributes
-+ * @block: the block allocated
++ * ea_set_i -
++ * @ip:
++ * @el:
+ *
-+ * Returns: the buffer head on success, NULL on failure
++ * Returns: errno
+ */
+
-+static struct buffer_head *
-+alloc_eattr_blk(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
-+ struct gfs_inode *ip, uint64_t * block)
++static int
++ea_set_i(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ struct gfs_ea_location *el)
+{
-+ int err = 0;
-+ struct buffer_head *bh = NULL;
-+ struct gfs_ea_header *ea;
-+
-+ err = gfs_metaalloc(alloc_ip, block);
-+ if (err)
-+ goto out;
-+
-+ err =
-+ gfs_dread(sdp, *block, ip->i_gl, DIO_NEW | DIO_START | DIO_WAIT, &bh);
-+ if (err)
-+ goto out;
++ {
++ struct ea_set es;
++ int error;
+
-+ gfs_metatype_set(sdp, bh, GFS_METATYPE_EA, GFS_FORMAT_EA);
++ memset(&es, 0, sizeof(struct ea_set));
++ es.es_er = er;
++ es.es_el = el;
+
-+ ip->i_di.di_blocks++;
++ error = ea_foreach(ip, ea_set_simple, &es);
++ if (error > 0)
++ return 0;
++ if (error)
++ return error;
++ }
++ {
++ unsigned int blks = 2;
++ if (!(ip->i_di.di_flags & GFS_DIF_EA_INDIRECT))
++ blks++;
++ if (GFS_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize)
++ blks += DIV_RU(er->er_data_len,
++ ip->i_sbd->sd_jbsize);
+
-+ ea = GFS_FIRST_EA(bh);
-+ ea->ea_flags = GFS_EAFLAG_LAST;
-+ ea->ea_rec_len =
-+ cpu_to_gfs32(sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header));
-+ ea->ea_num_ptrs = 0;
-+ ea->ea_type = GFS_EATYPE_UNUSED;
++ return ea_alloc_skeleton(ip, er, blks, ea_set_block, el);
++ }
++}
+
-+ gfs_trans_add_bh(ip->i_gl, bh);
++/**
++ * ea_set_remove_unstuffed -
++ * @ip:
++ * @el:
++ *
++ * Returns: errno
++ */
+
-+ out:
++static int
++ea_set_remove_unstuffed(struct gfs_inode *ip, struct gfs_ea_location *el)
++{
++ if (el->el_prev && GFS_EA2NEXT(el->el_prev) != el->el_ea) {
++ el->el_prev = GFS_EA2NEXT(el->el_prev);
++ GFS_ASSERT_INODE(GFS_EA2NEXT(el->el_prev) == el->el_ea, ip,);
++ }
+
-+ return bh;
++ return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, FALSE);
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
-+ *
-+ * Function description
++ * gfs_ea_set_i -
++ * @ip:
++ * @er:
+ *
-+ * Returns: what is returned
++ * Returns: errno
+ */
+
-+static int
-+list_direct_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct buffer_head *bh, struct gfs_eaget_io *req,
-+ gfs_ea_copy_fn_t copy_fn, uint32_t * size)
++int
++gfs_ea_set_i(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ int err = 0;
-+ struct gfs_ea_header *ea;
-+ char buf[256];
-+ char *ptr;
-+
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_EA);
++ struct gfs_ea_location el;
++ int error;
+
-+ ea = (struct gfs_ea_header *) ((bh)->b_data +
-+ sizeof (struct gfs_meta_header));
-+ if (ea->ea_type == GFS_EATYPE_UNUSED) {
-+ if (GFS_EA_IS_LAST(ea))
-+ goto out;
-+ else
-+ ea = GFS_EA_NEXT(ea);
++ if (!ip->i_di.di_eattr) {
++ if (er->er_flags & XATTR_REPLACE)
++ return -ENODATA;
++ return ea_init(ip, er);
+ }
+
-+ while (1) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
-+
-+ if (req->eg_data_len) {
-+ if (*size > req->eg_data_len) {
-+ err = -ERANGE;
-+ break;
-+ }
-+ ptr = buf;
++ error = gfs_ea_find(ip, er, &el);
++ if (error)
++ return error;
+
-+ GFS_ASSERT_INODE(GFS_EATYPE_VALID(ea->ea_type), ip,);
-+ if (ea->ea_type == GFS_EATYPE_USR) {
-+ memcpy(ptr, "user.", 5);
-+ ptr += 5;
-+ } else {
-+ memcpy(ptr, "system.", 7);
-+ ptr += 7;
-+ }
-+ memcpy(ptr, GFS_EA_NAME(ea), ea->ea_name_len);
-+ ptr += ea->ea_name_len;
-+ *ptr = 0;
-+ err =
-+ copy_fn(req->eg_data + *size, buf,
-+ GFS_EA_STRLEN(ea));
-+ if (err)
-+ break;
++ if (el.el_ea) {
++ if (IS_APPEND(ip->i_vnode)) {
++ brelse(el.el_bh);
++ return -EPERM;
+ }
+
-+ *size = *size + GFS_EA_STRLEN(ea);
++ error = -EEXIST;
++ if (!(er->er_flags & XATTR_CREATE)) {
++ int unstuffed = !GFS_EA_IS_STUFFED(el.el_ea);
++ error = ea_set_i(ip, er, &el);
++ if (!error && unstuffed)
++ ea_set_remove_unstuffed(ip, &el);
++ }
+
-+ if (GFS_EA_IS_LAST(ea))
-+ break;
-+ ea = GFS_EA_NEXT(ea);
++ brelse(el.el_bh);
++ } else {
++ error = -ENODATA;
++ if (!(er->er_flags & XATTR_REPLACE))
++ error = ea_set_i(ip, er, NULL);
+ }
+
-+ out:
-+
-+ return err;
++ return error;
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
-+ *
-+ * Function description
++ * gfs_ea_set -
++ * @ip:
++ * @er:
+ *
-+ * Returns: what is returned
++ * Returns: errno
+ */
+
-+static int
-+list_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
-+ gfs_ea_copy_fn_t copy_fn)
++int
++gfs_ea_set(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ int err;
-+ struct buffer_head *bh, *eabh;
-+ uint64_t *eablk, *end;
-+ uint32_t size = 0;
-+
-+ err =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl, DIO_START | DIO_WAIT,
-+ &bh);
-+ if (err)
-+ goto out;
++ struct gfs_holder i_gh;
++ int error;
+
-+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ gfs_metatype_check(sdp, bh, GFS_METATYPE_IN);
-+ eablk =
-+ (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
-+ end =
-+ eablk +
-+ ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
-+
-+ while (*eablk && eablk < end) {
-+ err =
-+ gfs_dread(sdp, gfs64_to_cpu(*eablk), ip->i_gl,
-+ DIO_START | DIO_WAIT, &eabh);
-+ if (err)
-+ goto out_drelse;
-+ err = list_direct_ea(sdp, ip, eabh, req, copy_fn, &size);
-+ brelse(eabh);
-+ if (err)
-+ goto out_drelse;
-+ eablk++;
-+ }
-+ } else {
-+ err = list_direct_ea(sdp, ip, bh, req, copy_fn, &size);
-+ if (err)
-+ goto out_drelse;
++ if (!er->er_name_len ||
++ er->er_name_len > GFS_EA_MAX_NAME_LEN)
++ return -EINVAL;
++ if (!er->er_data || !er->er_data_len) {
++ er->er_data = NULL;
++ er->er_data_len = 0;
+ }
++ error = gfs_ea_check_size(ip->i_sbd, er);
++ if (error)
++ return error;
+
-+ if (!err)
-+ err = size;
++ error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
++ if (error)
++ return error;
+
-+ out_drelse:
-+ brelse(bh);
++ if (IS_IMMUTABLE(ip->i_vnode))
++ error = -EPERM;
++ else
++ error = gfs_ea_ops[er->er_type]->eo_set(ip, er);
+
-+ out:
++ gfs_glock_dq_uninit(&i_gh);
+
-+ return err;
++ return error;
+}
+
+/**
-+ * gfs_get_eattr - read an extended attribute, or a list of ea names
-+ * @sdp: pointer to the superblock
-+ * @ip: pointer to the inode for the target file
-+ * @req: the request information
-+ * @copy_fn: the function to use to do the actual copying
++ * ea_remove_stuffed -
++ * @ip:
++ * @el:
++ * @mode:
+ *
-+ * Returns: actual size of data on success, -EXXX on error
++ * Returns: errno
+ */
-+int
-+gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn)
++
++static int
++ea_remove_stuffed(struct gfs_inode *ip,
++ struct gfs_ea_location *el)
+{
-+ struct gfs_holder i_gh;
-+ int err;
++ struct gfs_ea_header *ea = el->el_ea;
++ struct gfs_ea_header *prev = el->el_prev;
++ int error;
+
-+ if (req->eg_name) {
-+ err = gfs_ea_read_permission(req, ip);
-+ if (err)
-+ goto out;
-+ }
++ error = gfs_trans_begin(ip->i_sbd, 2, 0);
++ if (error)
++ return error;
+
-+ /* This seems to be a read. Are we sure we don't want to acquire the lock in LM_ST_SHARED? */
++ gfs_trans_add_bh(ip->i_gl, el->el_bh);
+
-+ err = gfs_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-+ if (err)
-+ goto out;
++ if (prev) {
++ uint32_t len;
+
-+ if (ip->i_di.di_eattr == 0) {
-+ if (!req->eg_name) {
-+ if (!req->eg_data_len && req->eg_len) {
-+ uint32_t no_data = 0;
++ len = GFS_EA_REC_LEN(prev) + GFS_EA_REC_LEN(ea);
++ prev->ea_rec_len = cpu_to_gfs32(len);
+
-+ err =
-+ copy_fn(req->eg_len, &no_data,
-+ sizeof (uint32_t));
-+ }
-+ } else
-+ err = -ENODATA;
++ if (GFS_EA_IS_LAST(ea))
++ prev->ea_flags |= GFS_EAFLAG_LAST;
++ } else
++ ea->ea_type = GFS_EATYPE_UNUSED;
+
-+ goto out_gunlock;
++ {
++ struct buffer_head *dibh;
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ ip->i_di.di_ctime = get_seconds();
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
+ }
+
-+ if (req->eg_name)
-+ err = get_ea(sdp, ip, req, copy_fn);
-+ else
-+ err = list_ea(sdp, ip, req, copy_fn);
-+
-+ out_gunlock:
-+ gfs_glock_dq_uninit(&i_gh);
-+
-+ out:
++ gfs_trans_end(ip->i_sbd);
+
-+ return err;
++ return 0;
+}
+
-+static int
-+do_set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
-+ struct gfs_ea_location location)
++/**
++ * gfs_ea_remove_i -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
++ */
++
++int
++gfs_ea_remove_i(struct gfs_inode *ip, struct gfs_ea_request *er)
+{
-+ int err = 0;
-+ int req_size;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ struct gfs_ea_location space;
-+
-+ req_size = get_req_size(req, avail_size);
-+
-+ if (location.ea) {
-+ struct gfs_ea_header *new_space;
-+ if (req->es_cmd == GFS_EACMD_REMOVE) {
-+ remove_ea(ip, location.ea, location.prev);
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+ goto out;
-+ }
-+ if (can_replace(location.ea, req, avail_size)) {
-+ err = replace_ea(sdp, ip, location.ea, req);
-+ if (!err)
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+ goto out;
-+ }
-+ /*
-+ * This part is kind of confusing. If the inode has direct EAs
-+ * Then adding another EA can't run it out of space, so it is safe to
-+ * delete the EA before looking for space. If the inode has indirect
-+ * EAs, there may not be enough space left, so first you check for space
-+ * and they you delete the EA.
-+ */
-+ if ((ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) == 0) {
-+ remove_ea(ip, location.ea, location.prev);
-+ err = find_space(ip, req_size, req->es_type, &space);
-+ if (err)
-+ goto out;
-+ new_space = prep_ea(space.ea);
-+ err = write_ea(sdp, ip, ip, new_space, req);
-+ if (!err) {
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+ gfs_trans_add_bh(ip->i_gl, space.bh);
-+ }
-+ brelse(space.bh);
-+ goto out;
-+ }
-+ if (can_replace_in_block(ip, req_size, location, &new_space)) {
-+ remove_ea(ip, location.ea, location.prev);
-+ new_space = prep_ea(new_space);
-+ err = write_ea(sdp, ip, ip, new_space, req);
-+ if (!err)
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+ goto out;
-+ }
-+ err = find_space(ip, req_size, req->es_type, &space);
-+ if (err)
-+ /* You can return a non IO error here. If there is no space left,
-+ * you can return -ENOSPC. So you must not have added a buffer to
-+ * the transaction yet.
-+ */
-+ goto out;
-+ remove_ea(ip, location.ea, location.prev);
-+ new_space = prep_ea(space.ea);
-+ err = write_ea(sdp, ip, ip, new_space, req);
-+ if (!err) {
-+ gfs_trans_add_bh(ip->i_gl, location.bh);
-+ gfs_trans_add_bh(ip->i_gl, space.bh);
-+ }
-+ brelse(space.bh);
-+ goto out;
-+ }
-+ err = find_space(ip, req_size, req->es_type, &space);
-+ if (err)
-+ /* you can also get -ENOSPC here */
-+ goto out;
-+ space.ea = prep_ea(space.ea);
-+ err = write_ea(sdp, ip, ip, space.ea, req);
-+ if (!err)
-+ gfs_trans_add_bh(ip->i_gl, space.bh);
-+ brelse(space.bh);
++ struct gfs_ea_location el;
++ int error;
+
-+ out:
-+ return err;
++ if (!ip->i_di.di_eattr)
++ return -ENODATA;
++
++ error = gfs_ea_find(ip, er, &el);
++ if (error)
++ return error;
++ if (!el.el_ea)
++ return -ENODATA;
++
++ if (GFS_EA_IS_STUFFED(el.el_ea))
++ error = ea_remove_stuffed(ip, &el);
++ else
++ error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, FALSE);
++
++ brelse(el.el_bh);
++
++ return error;
+}
+
-+static int
-+set_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_easet_io *req,
-+ struct gfs_ea_location location)
-+{
-+ int err;
-+ struct gfs_alloc *al;
-+ struct gfs_rgrpd *rgd = NULL;
-+ struct buffer_head *dibh;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ int unstuffed_ea_blks = 0;
-+ struct gfs_holder ri_gh, rgd_gh;
-+ struct posix_acl *acl = NULL;
++/**
++ * gfs_ea_remove - sets (or creates or replaces) an extended attribute
++ * @ip: pointer to the inode of the target file
++ * @er: request information
++ *
++ * Returns: errno
++ */
+
-+ if (IS_ACCESS_ACL(req->es_name, req->es_name_len) && req->es_data){
-+ acl = posix_acl_from_xattr(req->es_data, req->es_data_len);
-+ if (IS_ERR(acl)) {
-+ err = PTR_ERR(acl);
-+ goto out;
-+ }
-+ }
++int
++gfs_ea_remove(struct gfs_inode *ip, struct gfs_ea_request *er)
++{
++ struct gfs_holder i_gh;
++ int error;
+
-+ err = gfs_get_inode_buffer(ip, &dibh);
-+ if (err)
-+ goto out_acl;
-+ al = gfs_alloc_get(ip);
++ if (!er->er_name_len ||
++ er->er_name_len > GFS_EA_MAX_NAME_LEN)
++ return -EINVAL;
+
-+ err = gfs_quota_lock_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-+ if (err)
-+ goto out_alloc;
++ error = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
++ if (error)
++ return error;
+
-+ /*
-+ * worst case, you need to switch from direct to indirect, which can
-+ * take up to 3 new blocks, and you need to create enough unstuffed data
-+ * blocks to hold all the data
-+ */
-+ al->al_requested_meta = 3 + GFS_EADATA_NUM_PTRS(req->es_data_len, avail_size);
++ if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
++ error = -EPERM;
++ else
++ error = gfs_ea_ops[er->er_type]->eo_remove(ip, er);
+
-+ err = gfs_inplace_reserve(ip);
-+ if (err)
-+ goto out_lock_quota;
++ gfs_glock_dq_uninit(&i_gh);
+
-+ err = gfs_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
-+ if (err)
-+ goto out_reserve;
++ return error;
++}
+
-+ if (location.ea && GFS_EA_IS_UNSTUFFED(location.ea)) {
-+ /*
-+ * If there is an EA, we might need to delete it.
-+ * Since all unstuffed data blocks are added at the same time,
-+ * they are all from the same resource group.
-+ */
-+ err = gfs_rindex_hold(sdp, &ri_gh);
-+ if (err)
-+ goto out_reserve;
-+ rgd =
-+ gfs_blk2rgrpd(sdp,
-+ gfs64_to_cpu(*GFS_EA_DATA_PTRS(location.ea)));
-+ GFS_ASSERT_INODE(rgd, ip,
-+ printk("block = %" PRIu64 "\n",
-+ gfs64_to_cpu(*GFS_EA_DATA_PTRS
-+ (location.ea)));
-+ );
-+ err =
-+ gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
-+ if (err)
-+ goto out_rindex;
-+ unstuffed_ea_blks = location.ea->ea_num_ptrs;
-+ }
++/**
++ * gfs_ea_acl_init -
++ * @ip:
++ * @er:
++ *
++ * Returns: errno
++ */
+
-+ /*
-+ * The transaction may require:
-+ * Modifying the dinode block, Modifying the indirect ea block,
-+ * modifying an ea block, all the allocation blocks, all the blocks for
-+ * a RG bitmap, the RG header block, a RG block for each unstuffed data
-+ * block you might be deleting.
-+ */
-+ err = gfs_trans_begin(sdp, 4 + al->al_requested_meta +
-+ al->al_rgd->rd_ri.ri_length + unstuffed_ea_blks,
-+ 1);
-+ if (err)
-+ goto out_lock_rg;
++int
++gfs_ea_acl_init(struct gfs_inode *ip, struct gfs_ea_request *er)
++{
++ int error;
+
-+ err = do_set_ea(sdp, ip, req, location);
++ if (!ip->i_di.di_eattr)
++ return ea_init_i(ip, er, NULL);
+
-+ if (!err) {
-+ if (acl)
-+ gfs_acl_set_mode(ip, acl);
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ gfs_dinode_out(&ip->i_di, (dibh)->b_data);
-+ }
++ {
++ struct buffer_head *bh;
++ struct gfs_ea_header *ea;
++ unsigned int size;
+
-+ gfs_trans_end(sdp);
++ ea_calc_size(ip->i_sbd, er, &size);
+
-+ out_lock_rg:
-+ if (rgd)
-+ gfs_glock_dq_uninit(&rgd_gh);
++ error = gfs_dread(ip->i_sbd,
++ ip->i_di.di_eattr, ip->i_gl,
++ DIO_START | DIO_WAIT, &bh);
++ if (error)
++ return error;
+
-+ out_rindex:
-+ if (rgd)
-+ gfs_glock_dq_uninit(&ri_gh);
++ gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_EA);
+
-+ out_reserve:
-+ gfs_inplace_release(ip);
++ ea = GFS_EA_BH2FIRST(bh);
++ if (GFS_EA_REC_LEN(ea) - GFS_EA_SIZE(ea) >= size) {
++ ea = ea_split_ea(ea);
++ ea_write(ip, ea, er);
++ brelse(bh);
++ return 0;
++ }
+
-+ out_lock_quota:
-+ gfs_quota_unlock_m(ip);
++ brelse(bh);
++ }
+
-+ out_alloc:
-+ gfs_alloc_put(ip);
-+ brelse(dibh);
++ error = ea_set_block(ip, er, NULL);
++ GFS_ASSERT_INODE(error != -ENOSPC, ip,);
++ if (error)
++ return error;
+
-+ out_acl:
-+ posix_acl_release(acl);
++ {
++ struct buffer_head *dibh;
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (error)
++ return error;
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
+
-+ out:
-+ return err;
++ return error;
+}
+
+/**
-+ * gfs_set_eattr - sets (or creates or replaces) an extended attribute
-+ * @sdp: pointer to the superblock
-+ * @ip: pointer to the inode of the target file
-+ * @req: request information
++ * ea_acl_chmod_unstuffed -
++ * @ip:
++ * @ea:
++ * @data:
+ *
-+ * Returns: 0 on success -EXXX on error
++ * Returns: errno
+ */
-+int
-+gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_easet_io *req)
-+{
-+ struct gfs_holder i_gh;
-+ int err;
-+ uint32_t req_size;
-+ uint32_t avail_size =
-+ sdp->sd_sb.sb_bsize - sizeof (struct gfs_meta_header);
-+ struct gfs_ea_location location;
+
-+ if (!GFS_EACMD_VALID(req->es_cmd)) {
-+ err = -EOPNOTSUPP;
-+ goto out;
-+ }
++static int
++ea_acl_chmod_unstuffed(struct gfs_inode *ip,
++ struct gfs_ea_header *ea,
++ char *data)
++{
++ struct gfs_sbd *sdp = ip->i_sbd;
++ struct buffer_head **bh;
++ unsigned int amount = GFS_EA_DATA_LEN(ea);
++ unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize);
++ uint64_t *dataptrs = GFS_EA2DATAPTRS(ea);
++ unsigned int x;
++ int error;
+
-+ if (strlen(req->es_name) == 0) {
-+ err = -EINVAL;
-+ goto out;
-+ }
++ bh = kmalloc(nptrs * sizeof(struct buffer_head *), GFP_KERNEL);
++ if (!bh)
++ return -ENOMEM;
+
-+ err = gfs_ea_write_permission(req, ip);
-+ if (err)
++ error = gfs_trans_begin(sdp, 1 + nptrs, 0);
++ if (error)
+ goto out;
+
-+ if ((req_size = get_req_size(req, avail_size)) > avail_size) {
-+ /* This can only happen with 512 byte blocks */
-+ err = -ERANGE;
-+ goto out;
++ for (x = 0; x < nptrs; x++) {
++ error = gfs_dread(sdp, gfs64_to_cpu(*dataptrs), ip->i_gl,
++ DIO_START, bh + x);
++ if (error) {
++ while (x--)
++ brelse(bh[x]);
++ goto fail;
++ }
++ dataptrs++;
+ }
-+ err = gfs_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
-+ if (err)
-+ goto out;
+
-+ if (ip->i_di.di_eattr == 0) {
-+ if (req->es_cmd == GFS_EACMD_REPLACE
-+ || req->es_cmd == GFS_EACMD_REMOVE) {
-+ err = -ENODATA;
-+ goto out_gunlock;
++ for (x = 0; x < nptrs; x++) {
++ error = gfs_dreread(sdp, bh[x], DIO_WAIT);
++ if (error) {
++ for (; x < nptrs; x++)
++ brelse(bh[x]);
++ goto fail;
+ }
-+ err = init_eattr(sdp, ip, req);
-+ goto out_gunlock;
-+ }
+
-+ err = find_eattr(ip, req->es_name, req->es_name_len, req->es_type,
-+ &location);
-+ if (err < 0)
-+ goto out_gunlock;
-+ if (err == 0 && (req->es_cmd == GFS_EACMD_REPLACE ||
-+ req->es_cmd == GFS_EACMD_REMOVE)) {
-+ err = -ENODATA;
-+ goto out_relse;
++ gfs_metatype_check2(sdp, bh[x], GFS_METATYPE_ED, GFS_METATYPE_EA);
++ gfs_trans_add_bh(ip->i_gl, bh[x]);
++
++ memcpy(bh[x]->b_data + sizeof(struct gfs_meta_header),
++ data,
++ (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize);
++
++ amount -= sdp->sd_jbsize;
++ data += sdp->sd_jbsize;
++
++ brelse(bh[x]);
+ }
-+ err = set_ea(sdp, ip, req, location);
+
-+ out_relse:
-+ if (location.bh)
-+ brelse(location.bh);
++ out:
++ kfree(bh);
+
-+ out_gunlock:
-+ gfs_glock_dq_uninit(&i_gh);
++ return error;
+
-+ out:
-+ return err;
++ fail:
++ gfs_trans_end(sdp);
++ kfree(bh);
++
++ return error;
+}
+
+/**
-+ * gfs_set_eattr_ioctl - creates, modifies, or removes an extended attribute.
-+ * @sdp: pointer to the superblock
-+ * @ip: a pointer to the gfs inode for the file
-+ * @arg: a pointer to gfs_set_eattr_io_t struct with the request
++ * gfs_ea_acl_chmod -
++ * @ip:
++ * @el:
++ * @attr:
++ * @data:
+ *
-+ * Notes: ioctl wrapper for gfs_set_eattr
-+ * Returns: 0 on success, -EXXX or error
++ * Returns: errno
+ */
+
+int
-+gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
++gfs_ea_acl_chmod(struct gfs_inode *ip, struct gfs_ea_location *el,
++ struct iattr *attr, char *data)
+{
-+ struct gfs_easet_io req;
-+ int err = 0;
-+ char *name = NULL;
-+ char *data = NULL;
++ struct buffer_head *dibh;
++ int error;
+
-+ if (copy_from_user(&req, arg, sizeof (struct gfs_easet_io))) {
-+ err = -EFAULT;
-+ goto out;
-+ }
++ if (GFS_EA_IS_STUFFED(el->el_ea)) {
++ error = gfs_trans_begin(ip->i_sbd, 2, 0);
++ if (error)
++ return error;
+
-+ name = gmalloc(req.es_name_len);
++ gfs_trans_add_bh(ip->i_gl, el->el_bh);
++ memcpy(GFS_EA2DATA(el->el_ea),
++ data,
++ GFS_EA_DATA_LEN(el->el_ea));
++ } else
++ error = ea_acl_chmod_unstuffed(ip, el->el_ea, data);
+
-+ if (req.es_data) {
-+ data = gmalloc(req.es_data_len);
++ if (error)
++ return error;
+
-+ if (copy_from_user(data, req.es_data, req.es_data_len)) {
-+ err = -EFAULT;
-+ goto out_free;
-+ }
-+ }
-+ if (copy_from_user(name, req.es_name, req.es_name_len)) {
-+ err = -EFAULT;
-+ goto out_free;
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ inode_setattr(ip->i_vnode, attr);
++ gfs_inode_attr_out(ip);
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
+ }
-+ req.es_data = data;
-+ req.es_name = name;
-+ err = gfs_set_eattr(sdp, ip, &req);
+
-+ out_free:
-+ kfree(name);
-+ if (data)
-+ kfree(data);
++ gfs_trans_end(ip->i_sbd);
+
-+ out:
-+ return err;
++ return error;
+}
+
+/**
-+ * gfs_get_eattr_ioctl - gets the value for the requested attribute name,
-+ * or a list of all the extended attribute names.
-+ * @sdp: pointer to the superblock
-+ * @ip: a pointer to the inode for the file
-+ * @arg: a pointer to the struct gfs_eaget_io struct holding the request
++ * ea_dealloc_indirect -
++ * @ip:
+ *
-+ * Notes: ioctl wrapper for the gfs_get_eattr function
-+ * Returns: 0 on success, -EXXX on error.
++ * Returns: errno
+ */
+
-+int
-+gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg)
++static int
++ea_dealloc_indirect(struct gfs_inode *ip)
+{
-+ struct gfs_eaget_io req;
-+ int result = 0;
-+ char *name = NULL;
-+ uint32_t size;
++ struct gfs_sbd *sdp = ip->i_sbd;
++ struct gfs_rgrp_list rlist;
++ struct buffer_head *indbh, *dibh;
++ uint64_t *eablk, *end;
++ unsigned int rg_blocks = 0;
++ uint64_t bstart = 0;
++ unsigned int blen = 0;
++ unsigned int x;
++ int error;
++
++ memset(&rlist, 0, sizeof(struct gfs_rgrp_list));
++
++ error = gfs_dread(sdp,
++ ip->i_di.di_eattr, ip->i_gl,
++ DIO_START | DIO_WAIT, &indbh);
++ if (error)
++ return error;
++
++ gfs_metatype_check(sdp, indbh, GFS_METATYPE_IN);
++
++ eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs_indirect));
++ end = eablk + sdp->sd_inptrs;
++
++ for (; eablk < end; eablk++) {
++ uint64_t bn;
++
++ if (!*eablk)
++ break;
++ bn = gfs64_to_cpu(*eablk);
+
-+ if (copy_from_user(&req, arg, sizeof (struct gfs_eaget_io))) {
-+ result = -EFAULT;
++ if (bstart + blen == bn)
++ blen++;
++ else {
++ if (bstart)
++ gfs_rlist_add(sdp, &rlist, bstart);
++ bstart = bn;
++ blen = 1;
++ }
++ }
++ if (bstart)
++ gfs_rlist_add(sdp, &rlist, bstart);
++ else
+ goto out;
++
++ gfs_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);
++
++ for (x = 0; x < rlist.rl_rgrps; x++) {
++ struct gfs_rgrpd *rgd;
++ rgd = gl2rgd(rlist.rl_ghs[x].gh_gl);
++ rg_blocks += rgd->rd_ri.ri_length;
+ }
+
-+ if (req.eg_name) {
-+ name = gmalloc(req.eg_name_len);
++ error = gfs_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
++ if (error)
++ goto out_rlist_free;
++
++ error = gfs_trans_begin(sdp, 2 + rg_blocks, 1);
++ if (error)
++ goto out_gunlock;
++
++ gfs_trans_add_bh(ip->i_gl, indbh);
++
++ eablk = (uint64_t *)(indbh->b_data + sizeof(struct gfs_indirect));
++ bstart = 0;
++ blen = 0;
+
-+ if (copy_from_user(name, req.eg_name, req.eg_name_len)) {
-+ result = -EFAULT;
-+ goto out_free;
++ for (; eablk < end; eablk++) {
++ uint64_t bn;
++
++ if (!*eablk)
++ break;
++ bn = gfs64_to_cpu(*eablk);
++
++ if (bstart + blen == bn)
++ blen++;
++ else {
++ if (bstart)
++ gfs_metafree(ip, bstart, blen);
++ bstart = bn;
++ blen = 1;
+ }
-+ req.eg_name = name;
++
++ *eablk = 0;
++ GFS_ASSERT_INODE(ip->i_di.di_blocks, ip,);
++ ip->i_di.di_blocks--;
+ }
-+ result = gfs_get_eattr(sdp, ip, &req, gfs_ea_copy_to_user);
++ if (bstart)
++ gfs_metafree(ip, bstart, blen);
+
-+ out_free:
-+ if (name)
-+ kfree(name);
++ ip->i_di.di_flags &= ~GFS_DIF_EA_INDIRECT;
+
-+ if (result >= 0) {
-+ size = result;
-+ result =
-+ gfs_ea_copy_to_user(req.eg_len, &size, sizeof(uint32_t));
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
+ }
+
-+ out:
++ gfs_trans_end(sdp);
+
-+ return result;
++ out_gunlock:
++ gfs_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
++
++ out_rlist_free:
++ gfs_rlist_free(&rlist);
++
++ out:
++ brelse(indbh);
++
++ return error;
+}
+
+/**
-+ * functionname - summary
-+ * @param1: description
-+ * @param2: description
-+ * @param3: description
-+ *
-+ * Function description
++ * ea_dealloc_block -
++ * @ip:
+ *
-+ * Returns: what is returned
++ * Returns: errno
+ */
+
+static int
-+gfs_get_direct_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub,
-+ uint64_t blk)
++ea_dealloc_block(struct gfs_inode *ip)
+{
+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct buffer_head *databh, *bh;
-+ struct gfs_ea_header *ea;
-+ uint64_t *datablk;
-+ unsigned int i;
++ struct gfs_alloc *al = ip->i_alloc;
++ struct gfs_rgrpd *rgd;
++ struct buffer_head *dibh;
+ int error;
+
-+ error = gfs_dread(sdp, blk, ip->i_gl, DIO_START | DIO_WAIT, &bh);
++ rgd = gfs_blk2rgrpd(sdp, ip->i_di.di_eattr);
++ GFS_ASSERT_INODE(rgd, ip,);
++
++ error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &al->al_rgd_gh);
+ if (error)
-+ goto out;
++ return error;
+
-+ error = gfs_add_bh_to_ub(ub, bh);
++ error = gfs_trans_begin(sdp, 1 + rgd->rd_ri.ri_length, 1);
++ if (error)
++ goto out_gunlock;
+
-+ ea = (struct gfs_ea_header *) ((bh)->b_data +
-+ sizeof (struct gfs_meta_header));
-+ for (;;) {
-+ GFS_ASSERT_INODE(GFS_EA_REC_LEN(ea), ip,);
++ gfs_metafree(ip, ip->i_di.di_eattr, 1);
+
-+ datablk = GFS_EA_DATA_PTRS(ea);
++ ip->i_di.di_eattr = 0;
++ GFS_ASSERT_INODE(ip->i_di.di_blocks, ip,);
++ ip->i_di.di_blocks--;
+
-+ for (i = 0; i < ea->ea_num_ptrs; i++) {
-+ error =
-+ gfs_dread(sdp, gfs64_to_cpu(*datablk), ip->i_gl,
-+ DIO_START | DIO_WAIT, &databh);
-+ if (error)
-+ goto out_relse;
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
+
-+ error = gfs_add_bh_to_ub(ub, databh);
++ gfs_trans_end(sdp);
+
-+ brelse(databh);
++ out_gunlock:
++ gfs_glock_dq_uninit(&al->al_rgd_gh);
+
-+ if (error)
-+ goto out_relse;
++ return error;
++}
+
-+ datablk++;
-+ }
++/**
++ * gfs_ea_dealloc - deallocate the extended attribute fork
++ * @ip: the inode
++ *
++ * Returns: errno
++ */
+
-+ if (GFS_EA_IS_LAST(ea))
-+ break;
-+ ea = GFS_EA_NEXT(ea);
++int
++gfs_ea_dealloc(struct gfs_inode *ip)
++{
++ struct gfs_alloc *al;
++ int error;
++
++ al = gfs_alloc_get(ip);
++
++ error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
++ if (error)
++ goto out_alloc;
++
++ error = gfs_rindex_hold(ip->i_sbd, &al->al_ri_gh);
++ if (error)
++ goto out_quota;
++
++ error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
++ if (error)
++ goto out_rindex;
++
++ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
++ error = ea_dealloc_indirect(ip);
++ if (error)
++ goto out_rindex;
+ }
+
-+ out_relse:
-+ brelse(bh);
++ error = ea_dealloc_block(ip);
+
-+ out:
++ out_rindex:
++ gfs_glock_dq_uninit(&al->al_ri_gh);
++
++ out_quota:
++ gfs_quota_unhold_m(ip);
++
++ out_alloc:
++ gfs_alloc_put(ip);
+
+ return error;
+}
+ * @dip: the directory
+ * @ub: the structure representing the user buffer to copy to
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub)
+{
-+ struct gfs_sbd *sdp = ip->i_sbd;
+ struct buffer_head *bh;
+ int error;
-+ uint64_t *eablk, *end;
++
++ error = gfs_dread(ip->i_sbd,
++ ip->i_di.di_eattr, ip->i_gl,
++ DIO_START | DIO_WAIT, &bh);
++ if (error)
++ return error;
++
++ gfs_add_bh_to_ub(ub, bh);
+
+ if (ip->i_di.di_flags & GFS_DIF_EA_INDIRECT) {
-+ error =
-+ gfs_dread(sdp, ip->i_di.di_eattr, ip->i_gl,
-+ DIO_WAIT | DIO_START, &bh);
-+ if (error)
-+ goto out;
++ struct buffer_head *eabh;
++ uint64_t *eablk, *end;
+
-+ error = gfs_add_bh_to_ub(ub, bh);
++ gfs_metatype_check(ip->i_sbd, bh, GFS_METATYPE_IN);
+
-+ eablk =
-+ (uint64_t *) ((bh)->b_data + sizeof (struct gfs_indirect));
-+ end =
-+ eablk +
-+ ((sdp->sd_sb.sb_bsize - sizeof (struct gfs_indirect)) / 8);
++ eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs_indirect));
++ end = eablk + ip->i_sbd->sd_inptrs;
+
-+ while (*eablk && eablk < end) {
-+ error =
-+ gfs_get_direct_eattr_meta(ip, ub,
-+ gfs64_to_cpu(*eablk));
-+ if (error) {
-+ brelse(bh);
-+ goto out;
-+ }
-+ eablk++;
++ for (; eablk < end; eablk++) {
++ uint64_t bn;
++
++ if (!*eablk)
++ break;
++ bn = gfs64_to_cpu(*eablk);
++
++ error = gfs_dread(ip->i_sbd, bn, ip->i_gl,
++ DIO_START | DIO_WAIT, &eabh);
++ if (error)
++ break;
++ gfs_add_bh_to_ub(ub, eabh);
++ brelse(eabh);
++ if (error)
++ break;
+ }
-+ brelse(bh);
-+ } else
-+ error = gfs_get_direct_eattr_meta(ip, ub, ip->i_di.di_eattr);
++ }
+
-+ out:
++ brelse(bh);
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/eattr.h linux/fs/gfs/eattr.h
---- linux-2.6.9-rc1-mm3/fs/gfs/eattr.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/eattr.h 2004-09-07 16:26:15.740552812 -0500
-@@ -0,0 +1,90 @@
+diff -urN linux-orig/fs/gfs/eattr.h linux-patched/fs/gfs/eattr.h
+--- linux-orig/fs/gfs/eattr.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/eattr.h 2004-10-27 15:27:10.879630993 -0500
+@@ -0,0 +1,95 @@
+/******************************************************************************
+*******************************************************************************
+**
+#ifndef __EATTR_DOT_H__
+#define __EATTR_DOT_H__
+
-+#define GFS_EA_MAY_WRITE 1
-+#define GFS_EA_MAY_READ 2
-+
-+#define GFS_EA_DATA_LEN(x) gfs32_to_cpu((x)->ea_data_len)
-+#define GFS_EA_IS_UNSTUFFED(x) ((x)->ea_num_ptrs)
-+#define GFS_EA_DATA(x) ((char *)(x) + sizeof(struct gfs_ea_header) + (x)->ea_name_len)
++#define GFS_EA_REC_LEN(ea) gfs32_to_cpu((ea)->ea_rec_len)
++#define GFS_EA_DATA_LEN(ea) gfs32_to_cpu((ea)->ea_data_len)
++
++#define GFS_EA_SIZE(ea) \
++MAKE_MULT8(sizeof(struct gfs_ea_header) + \
++ (ea)->ea_name_len + \
++ ((GFS_EA_IS_STUFFED(ea)) ? \
++ GFS_EA_DATA_LEN(ea) : \
++ (sizeof(uint64_t) * (ea)->ea_num_ptrs)))
++#define GFS_EA_STRLEN(ea) \
++((((ea)->ea_type == GFS_EATYPE_USR) ? 5 : 7) + \
++ (ea)->ea_name_len + 1)
++
++#define GFS_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
++#define GFS_EA_IS_LAST(ea) ((ea)->ea_flags & GFS_EAFLAG_LAST)
++
++#define GFS_EAREQ_SIZE_STUFFED(er) \
++MAKE_MULT8(sizeof(struct gfs_ea_header) + \
++ (er)->er_name_len + (er)->er_data_len)
++#define GFS_EAREQ_SIZE_UNSTUFFED(sdp, er) \
++MAKE_MULT8(sizeof(struct gfs_ea_header) + \
++ (er)->er_name_len + \
++ sizeof(uint64_t) * DIV_RU((er)->er_data_len, (sdp)->sd_jbsize))
++
++#define GFS_EA2NAME(ea) ((char *)((struct gfs_ea_header *)(ea) + 1))
++#define GFS_EA2DATA(ea) (GFS_EA2NAME(ea) + (ea)->ea_name_len)
++#define GFS_EA2DATAPTRS(ea) \
++((uint64_t *)(GFS_EA2NAME(ea) + MAKE_MULT8((ea)->ea_name_len)))
++#define GFS_EA2NEXT(ea) \
++((struct gfs_ea_header *)((char *)(ea) + GFS_EA_REC_LEN(ea)))
++#define GFS_EA_BH2FIRST(bh) \
++((struct gfs_ea_header *)((bh)->b_data + \
++ sizeof(struct gfs_meta_header)))
++
++struct gfs_ea_request {
++ char *er_name;
++ char *er_data;
++ unsigned int er_name_len;
++ unsigned int er_data_len;
++ unsigned int er_type; /* GFS_EATYPE_... */
++ int er_flags;
++ mode_t er_mode;
++};
+
+struct gfs_ea_location {
-+ struct buffer_head *bh;
-+ struct gfs_ea_header *ea;
-+ struct gfs_ea_header *prev;
++ struct buffer_head *el_bh;
++ struct gfs_ea_header *el_ea;
++ struct gfs_ea_header *el_prev;
+};
+
-+#define GFS_POSIX_ACL_ACCESS "posix_acl_access"
-+#define GFS_POSIX_ACL_ACCESS_LEN 16
-+#define GFS_POSIX_ACL_DEFAULT "posix_acl_default"
-+#define GFS_POSIX_ACL_DEFAULT_LEN 17
-+
-+#define IS_ACCESS_ACL(name, len) \
-+ ((len) == GFS_POSIX_ACL_ACCESS_LEN && \
-+ !memcmp(GFS_POSIX_ACL_ACCESS, (name), (len)))
-+
-+#define IS_DEFAULT_ACL(name, len) \
-+ ((len) == GFS_POSIX_ACL_DEFAULT_LEN && \
-+ !memcmp(GFS_POSIX_ACL_DEFAULT, (name), (len)))
-+
-+#define GFS_MAX_EA_ACL_BLKS 66 /* 65 for unstuffed data blocks, 1 for the ea
-+ itself */
-+
-+typedef int (*gfs_ea_copy_fn_t) (void *dest, void *src, unsigned long size);
-+
-+int gfs_ea_memcpy(void *dest, void *src, unsigned long size);
-+int gfs_ea_copy_to_user(void *dest, void *src, unsigned long size);
-+
-+int find_sys_space(struct gfs_inode *alloc_ip, struct gfs_inode *ip, int size,
-+ struct gfs_ea_location *avail);
++int gfs_ea_repack(struct gfs_inode *ip);
+
-+struct gfs_ea_header *prep_ea(struct gfs_ea_header *ea);
++int gfs_ea_get_i(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_set_i(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_remove_i(struct gfs_inode *ip, struct gfs_ea_request *er);
+
-+int write_ea(struct gfs_sbd *sdp, struct gfs_inode *alloc_ip,
-+ struct gfs_inode *ip, struct gfs_ea_header *ea,
-+ struct gfs_easet_io *req);
-+
-+int gfs_get_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_eaget_io *req, gfs_ea_copy_fn_t copy_fn);
-+int gfs_set_eattr(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_easet_io *req);
-+
-+int gfs_set_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
-+int gfs_get_eattr_ioctl(struct gfs_sbd *sdp, struct gfs_inode *ip, void *arg);
++int gfs_ea_list(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_get(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_set(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_remove(struct gfs_inode *ip, struct gfs_ea_request *er);
+
+int gfs_ea_dealloc(struct gfs_inode *ip);
+
+int gfs_get_eattr_meta(struct gfs_inode *ip, struct gfs_user_buffer *ub);
+
-+int replace_ea(struct gfs_sbd *sdp, struct gfs_inode *ip,
-+ struct gfs_ea_header *ea, struct gfs_easet_io *req);
-+
-+int find_eattr(struct gfs_inode *ip, char *name, int name_len, int type,
-+ struct gfs_ea_location *location);
-+
-+int read_unstuffed(void *dest, struct gfs_inode *ip, struct gfs_sbd *sdp,
-+ struct gfs_ea_header *ea, uint32_t avail_size,
-+ gfs_ea_copy_fn_t copy_fn);
++/* Exported to acl.c */
+
-+int get_ea(struct gfs_sbd *sdp, struct gfs_inode *ip, struct gfs_eaget_io *req,
-+ gfs_ea_copy_fn_t copy_fn);
-+
-+int init_new_inode_eattr(struct gfs_inode *dip, struct gfs_inode *ip,
-+ struct gfs_easet_io *req);
-+
-+int gfs_ea_read_permission(struct gfs_eaget_io *req, struct gfs_inode *ip);
++int gfs_ea_check_size(struct gfs_sbd *sdp, struct gfs_ea_request *er);
++int gfs_ea_find(struct gfs_inode *ip,
++ struct gfs_ea_request *er,
++ struct gfs_ea_location *el);
++int gfs_ea_get_copy(struct gfs_inode *ip,
++ struct gfs_ea_location *el,
++ char *data);
++int gfs_ea_acl_init(struct gfs_inode *ip, struct gfs_ea_request *er);
++int gfs_ea_acl_chmod(struct gfs_inode *ip, struct gfs_ea_location *el,
++ struct iattr *attr, char *data);
+
+#endif /* __EATTR_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/file.c linux/fs/gfs/file.c
---- linux-2.6.9-rc1-mm3/fs/gfs/file.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/file.c 2004-09-07 16:26:15.741552591 -0500
-@@ -0,0 +1,382 @@
+diff -urN linux-orig/fs/gfs/file.c linux-patched/fs/gfs/file.c
+--- linux-orig/fs/gfs/file.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/file.c 2004-10-27 15:27:10.879630993 -0500
+@@ -0,0 +1,392 @@
+/******************************************************************************
+*******************************************************************************
+**
+ char **p = (char **)buf;
+ int error = 0;
+
++ /* The dinode block always gets journaled */
+ if (bh->b_blocknr == ip->i_num.no_addr) {
+ GFS_ASSERT_INODE(!new, ip,);
+ gfs_trans_add_bh(ip->i_gl, bh);
+ memcpy(bh->b_data + offset, *p, size);
++
++ /* Data blocks for journaled files get written added to the journal */
+ } else if (gfs_is_jdata(ip)) {
+ gfs_trans_add_bh(ip->i_gl, bh);
+ memcpy(bh->b_data + offset, *p, size);
+ if (new)
+ gfs_buffer_clear_ends(bh, offset, size, TRUE);
++
++ /* Non-journaled data blocks get written to in-place disk blocks */
+ } else {
+ memcpy(bh->b_data + offset, *p, size);
+ if (new)
+ char **p = (char **)buf;
+ int error = 0;
+
++ /* the dinode block always gets journaled */
+ if (bh->b_blocknr == ip->i_num.no_addr) {
+ GFS_ASSERT_INODE(!new, ip,);
+ gfs_trans_add_bh(ip->i_gl, bh);
+ if (copy_from_user(bh->b_data + offset, *p, size))
+ error = -EFAULT;
++
++ /* Data blocks for journaled files get written added to the journal */
+ } else if (gfs_is_jdata(ip)) {
+ gfs_trans_add_bh(ip->i_gl, bh);
+ if (copy_from_user(bh->b_data + offset, *p, size))
+ if (error)
+ memset(bh->b_data + offset, 0, size);
+ }
++
++ /* non-journaled data blocks get written to in-place disk blocks */
+ } else {
+ if (copy_from_user(bh->b_data + offset, *p, size))
+ error = -EFAULT;
+ goto out;
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/file.h linux/fs/gfs/file.h
---- linux-2.6.9-rc1-mm3/fs/gfs/file.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/file.h 2004-09-07 16:26:15.741552591 -0500
+diff -urN linux-orig/fs/gfs/file.h linux-patched/fs/gfs/file.h
+--- linux-orig/fs/gfs/file.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/file.h 2004-10-27 15:27:10.879630993 -0500
@@ -0,0 +1,51 @@
+/******************************************************************************
+*******************************************************************************
+#ifndef __FILE_DOT_H__
+#define __FILE_DOT_H__
+
-+typedef int (*read_copy_fn_t) (struct buffer_head * bh, void **buf,
++typedef int (*read_copy_fn_t) (struct buffer_head *bh, void **buf,
+ unsigned int offset, unsigned int size);
-+typedef int (*write_copy_fn_t) (struct gfs_inode * ip, struct buffer_head * bh,
++typedef int (*write_copy_fn_t) (struct gfs_inode *ip, struct buffer_head *bh,
+ void **buf, unsigned int offset,
+ unsigned int size, int new);
+
+}
+
+#endif /* __FILE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/fixed_div64.h linux/fs/gfs/fixed_div64.h
---- linux-2.6.9-rc1-mm3/fs/gfs/fixed_div64.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/fixed_div64.h 2004-09-07 16:26:15.741552591 -0500
+diff -urN linux-orig/fs/gfs/fixed_div64.h linux-patched/fs/gfs/fixed_div64.h
+--- linux-orig/fs/gfs/fixed_div64.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/fixed_div64.h 2004-10-27 15:27:10.879630993 -0500
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
+#define do_mod(a, b) fixed_div64_do_mod(&(a), (b), sizeof(a))
+
+#endif /* __FIXED_DIV64_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/format.h linux/fs/gfs/format.h
---- linux-2.6.9-rc1-mm3/fs/gfs/format.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/format.h 2004-09-07 16:26:15.742552371 -0500
+diff -urN linux-orig/fs/gfs/format.h linux-patched/fs/gfs/format.h
+--- linux-orig/fs/gfs/format.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/format.h 2004-10-27 15:27:10.879630993 -0500
@@ -0,0 +1,30 @@
+/******************************************************************************
+*******************************************************************************
+};
+
+#endif /* __FORMAT_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/gfs.h linux/fs/gfs/gfs.h
---- linux-2.6.9-rc1-mm3/fs/gfs/gfs.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/gfs.h 2004-09-07 16:26:15.742552371 -0500
-@@ -0,0 +1,130 @@
+diff -urN linux-orig/fs/gfs/gfs.h linux-patched/fs/gfs/gfs.h
+--- linux-orig/fs/gfs/gfs.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/gfs.h 2004-10-27 15:27:10.880630761 -0500
+@@ -0,0 +1,151 @@
+/******************************************************************************
+*******************************************************************************
+**
+#define SCNX64 "LX"
+#endif
+
-+/* Divide x by y. Round up if there is a remainder. */
-+#define DIV_RU(x, y) (((x) + (y) - 1) / (y))
++/* Divide num by den. Round up if there is a remainder. */
++#define DIV_RU(num, den) (((num) + (den) - 1) / (den))
++#define MAKE_MULT8(x) (((x) + 7) & ~7)
+
+#define GFS_FAST_NAME_SIZE (8)
+
+ meta_check_magic = gfs32_to_cpu(meta_check_magic); \
+ GFS_ASSERT_SBD(meta_check_magic == GFS_MAGIC, (sdp), \
+ struct gfs_meta_header meta_check_mh; \
-+ printk("Bad metadata at %"PRIu64"\n", (uint64_t)(bh)->b_blocknr); \
++ printk("Bad metadata at %"PRIu64"\n", \
++ (uint64_t)(bh)->b_blocknr); \
+ gfs_meta_header_in(&meta_check_mh, (bh)->b_data); \
+ gfs_meta_header_print(&meta_check_mh);); \
+} \
+ GFS_ASSERT_SBD(metatype_check_magic == GFS_MAGIC && \
+ metatype_check_type == (type), (sdp), \
+ struct gfs_meta_header metatype_check_mh; \
-+ printk("Bad metadata at %"PRIu64", should be %u\n", (uint64_t)(bh)->b_blocknr, (type)); \
++ printk("Bad metadata at %"PRIu64", should be %u\n", \
++ (uint64_t)(bh)->b_blocknr, (type)); \
++ gfs_meta_header_in(&metatype_check_mh, (bh)->b_data); \
++ gfs_meta_header_print(&metatype_check_mh);); \
++} \
++while (0)
++
++#define gfs_metatype_check2(sdp, bh, type1, type2) \
++do \
++{ \
++ uint32_t metatype_check_magic = ((struct gfs_meta_header *)(bh)->b_data)->mh_magic; \
++ uint32_t metatype_check_type = ((struct gfs_meta_header *)(bh)->b_data)->mh_type; \
++ metatype_check_magic = gfs32_to_cpu(metatype_check_magic); \
++ metatype_check_type = gfs32_to_cpu(metatype_check_type); \
++ GFS_ASSERT_SBD(metatype_check_magic == GFS_MAGIC && \
++ (metatype_check_type == (type1) || \
++ metatype_check_type == (type2)), (sdp), \
++ struct gfs_meta_header metatype_check_mh; \
++ printk("Bad metadata at %"PRIu64", should be %u or %u\n", \
++ (uint64_t)(bh)->b_blocknr, (type1), (type2)); \
+ gfs_meta_header_in(&metatype_check_mh, (bh)->b_data); \
+ gfs_meta_header_print(&metatype_check_mh);); \
+} \
+while (0)
+
+#endif /* __GFS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/glock.c linux/fs/gfs/glock.c
---- linux-2.6.9-rc1-mm3/fs/gfs/glock.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/glock.c 2004-09-07 16:26:15.745551709 -0500
-@@ -0,0 +1,2524 @@
+diff -urN linux-orig/fs/gfs/glock.c linux-patched/fs/gfs/glock.c
+--- linux-orig/fs/gfs/glock.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/glock.c 2004-10-27 15:27:10.880630761 -0500
+@@ -0,0 +1,2647 @@
+/******************************************************************************
+*******************************************************************************
+**
+ unsigned long gl_flags;
+};
+
++struct greedy {
++ struct gfs_holder gr_gh;
++ struct work_struct gr_work;
++};
++
+typedef void (*glock_examiner) (struct gfs_glock * gl);
+
+/**
+static __inline__ void
+glock_hold(struct gfs_glock *gl)
+{
++ GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
+ atomic_inc(&gl->gl_count);
+}
+
+ if (!lm_name_equal(&gl->gl_name, name))
+ continue;
+
-+ glock_hold(gl);
++ atomic_inc(&gl->gl_count);
+
+ return gl;
+ }
+ GFS_ASSERT_GLOCK(list_empty(&gl->gl_holders), gl,);
+ GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters1), gl,);
+ GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters2), gl,);
++ GFS_ASSERT_GLOCK(list_empty(&gl->gl_waiters3), gl,);
+ GFS_ASSERT_GLOCK(gl->gl_state == LM_ST_UNLOCKED, gl,);
+ GFS_ASSERT_GLOCK(!gl->gl_object, gl,);
+ GFS_ASSERT_GLOCK(!gl->gl_lvb, gl,);
+ INIT_LIST_HEAD(&gl->gl_holders);
+ INIT_LIST_HEAD(&gl->gl_waiters1);
+ INIT_LIST_HEAD(&gl->gl_waiters2);
++ INIT_LIST_HEAD(&gl->gl_waiters3);
+
+ gl->gl_ops = glops;
+
+
+ gl->gl_sbd = sdp;
+
-+ INIT_LIST_HEAD(&gl->gl_dirty_buffers);
+ INIT_LIST_HEAD(&gl->gl_ail_bufs);
+
+ if (glops == &gfs_inode_glops ||
+void
+gfs_glock_hold(struct gfs_glock *gl)
+{
-+ GFS_ASSERT_GLOCK(atomic_read(&gl->gl_count) > 0, gl,);
+ glock_hold(gl);
+}
+
+
+ GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
+
-+ for (head = &gl->gl_waiters2, tmp = head->next, next = tmp->next;
++ for (head = &gl->gl_waiters3, tmp = head->next, next = tmp->next;
+ tmp != head;
+ tmp = next, next = tmp->next) {
+ tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
+}
+
+/**
-+ * do_unrecurse - a recursive holder was just dropped of the waiters2 list
++ * do_unrecurse - a recursive holder was just dropped of the waiters3 list
+ * @gh: the holder
+ *
+ * If there is only one other recursive holder, clear is HIF_RECURSE bit.
+
+ GFS_ASSERT_GLOCK(gh->gh_owner, gl,);
+
-+ for (head = &gl->gl_waiters2, tmp = head->next;
++ for (head = &gl->gl_waiters3, tmp = head->next;
+ tmp != head;
+ tmp = tmp->next) {
+ tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
+ * rq_mutex - process a mutex request in the queue
+ * @gh: the glock holder
+ *
-+ * Returns: TRUE if the queue is blocked,
++ * Returns: TRUE if the queue is blocked
+ */
+
+static int
+ * @gh: the glock holder
+ * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
+ *
-+ * Returns: TRUE if the queue is blocked,
++ * Returns: TRUE if the queue is blocked
+ */
+
+static int
+ * rq_demote - process a demote request in the queue
+ * @gh: the glock holder
+ *
-+ * Returns: TRUE if the queue is blocked,
++ * Returns: TRUE if the queue is blocked
+ */
+
+static int
+}
+
+/**
++ * rq_greedy - process a greedy request in the queue
++ * @gh: the glock holder
++ *
++ * Returns: TRUE if the queue is blocked
++ */
++
++static int
++rq_greedy(struct gfs_holder *gh)
++{
++ struct gfs_glock *gl = gh->gh_gl;
++
++ list_del_init(&gh->gh_list);
++ /* gh->gh_error never examined. */
++ clear_bit(GLF_GREEDY, &gl->gl_flags);
++ spin_unlock(&gl->gl_spin);
++
++ gfs_holder_uninit(gh);
++ kfree(container_of(gh, struct greedy, gr_gh));
++
++ spin_lock(&gl->gl_spin);
++
++ return FALSE;
++}
++
++/**
+ * run_queue - process holder structures on a glock
+ * @gl: the glock
+ * @promote_ok: It's ok to ask the LM to do promotes on a sync lock module
+ else
+ GFS_ASSERT_GLOCK(FALSE, gl,);
+
-+ } else if (!list_empty(&gl->gl_waiters2)) {
++ } else if (!list_empty(&gl->gl_waiters2) &&
++ !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) {
+ gh = list_entry(gl->gl_waiters2.next,
+ struct gfs_holder, gh_list);
+
++ if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
++ blocked = rq_demote(gh);
++ else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
++ blocked = rq_greedy(gh);
++ else
++ GFS_ASSERT_GLOCK(FALSE, gl,);
++
++ } else if (!list_empty(&gl->gl_waiters3)) {
++ gh = list_entry(gl->gl_waiters3.next,
++ struct gfs_holder, gh_list);
++
+ if (test_bit(HIF_PROMOTE, &gh->gh_iflags))
+ blocked = rq_promote(gh, promote_ok);
-+ else if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
-+ blocked = rq_demote(gh);
+ else
+ GFS_ASSERT_GLOCK(FALSE, gl,);
+
+ }
+
+ if (new_gh) {
-+ list_add(&new_gh->gh_list, &gl->gl_waiters2);
++ list_add_tail(&new_gh->gh_list, &gl->gl_waiters2);
+ new_gh = NULL;
+ } else {
+ spin_unlock(&gl->gl_spin);
+ spin_unlock(&gl->gl_spin);
+
+ } else
-+ GFS_ASSERT_GLOCK(FALSE, gl,);
++ GFS_ASSERT_GLOCK(FALSE, gl,
++ printk("ret = 0x%.8X\n", ret););
+
+ if (glops->go_xmote_bh)
+ glops->go_xmote_bh(gl);
+ }
+ }
+
-+ for (head = &gl->gl_waiters2, tmp = head->next;
++ for (head = &gl->gl_waiters3, tmp = head->next;
+ tmp != head;
+ tmp = tmp->next) {
+ tmp_gh = list_entry(tmp, struct gfs_holder, gh_list);
+ set_bit(HIF_RECURSE, &gh->gh_iflags);
+ set_bit(HIF_RECURSE, &tmp_gh->gh_iflags);
+
-+ list_add_tail(&gh->gh_list, &gl->gl_waiters2);
++ list_add_tail(&gh->gh_list, &gl->gl_waiters3);
+
+ return;
+ }
+ }
+
+ if (gh->gh_flags & LM_FLAG_PRIORITY)
-+ list_add(&gh->gh_list, &gl->gl_waiters2);
++ list_add(&gh->gh_list, &gl->gl_waiters3);
+ else
-+ list_add_tail(&gh->gh_list, &gl->gl_waiters2);
++ list_add_tail(&gh->gh_list, &gl->gl_waiters3);
+}
+
+/**
+ !list_empty(&gl->gl_holders) ||
+ !list_empty(&gl->gl_waiters1) ||
+ !list_empty(&gl->gl_waiters2) ||
++ !list_empty(&gl->gl_waiters3) ||
+ relaxed_state_ok(gl->gl_state, state, flags)) {
+ spin_unlock(&gl->gl_spin);
+ return;
+ gh.gh_owner = NULL;
+
+ spin_lock(&gl->gl_spin);
-+ list_add(&gh.gh_list, &gl->gl_waiters2);
++ list_add_tail(&gh.gh_list, &gl->gl_waiters2);
+ run_queue(gl, FALSE);
+ spin_unlock(&gl->gl_spin);
+
+}
+
+/**
++ * greedy_work -
++ * @data:
++ *
++ */
++
++static void
++greedy_work(void *data)
++{
++ struct greedy *gr = (struct greedy *)data;
++ struct gfs_holder *gh = &gr->gr_gh;
++ struct gfs_glock *gl = gh->gh_gl;
++ struct gfs_glock_operations *glops = gl->gl_ops;
++
++ clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
++
++ if (glops->go_greedy)
++ glops->go_greedy(gl);
++
++ spin_lock(&gl->gl_spin);
++
++ if (list_empty(&gl->gl_waiters2)) {
++ clear_bit(GLF_GREEDY, &gl->gl_flags);
++ spin_unlock(&gl->gl_spin);
++ gfs_holder_uninit(gh);
++ kfree(gr);
++ } else {
++ glock_hold(gl);
++ list_add_tail(&gh->gh_list, &gl->gl_waiters2);
++ run_queue(gl, FALSE);
++ spin_unlock(&gl->gl_spin);
++ glock_put(gl);
++ }
++}
++
++/**
++ * gfs_glock_be_greedy -
++ * @gl:
++ * @time:
++ *
++ * Returns: 0 if go_greedy will be called, 1 otherwise
++ */
++
++int
++gfs_glock_be_greedy(struct gfs_glock *gl, unsigned int time)
++{
++ struct greedy *gr;
++ struct gfs_holder *gh;
++
++ if (!time ||
++ gl->gl_sbd->sd_args.ar_localcaching ||
++ test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
++ return 1;
++
++ gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
++ if (!gr) {
++ clear_bit(GLF_GREEDY, &gl->gl_flags);
++ return 1;
++ }
++ gh = &gr->gr_gh;
++
++ gfs_holder_init(gl, 0, 0, gh);
++ set_bit(HIF_GREEDY, &gh->gh_iflags);
++ gh->gh_owner = NULL;
++ INIT_WORK(&gr->gr_work, greedy_work, gr);
++
++ set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
++ schedule_delayed_work(&gr->gr_work, time);
++
++ return 0;
++}
++
++/**
+ * gfs_glock_nq_init - intialize a holder and enqueue it on a glock
+ * @gl: the glock
+ * @state: the state we're requesting
+ if (test_bit(GLF_PLUG, &gl->gl_flags))
+ continue;
+
-+ glock_hold(gl);
++ atomic_inc(&gl->gl_count);
+
+ break;
+ }
+ if (error)
+ goto out;
+ }
++ for (head = &gl->gl_waiters3, tmp = head->next;
++ tmp != head;
++ tmp = tmp->next) {
++ gh = list_entry(tmp, struct gfs_holder, gh_list);
++ error = dump_holder("Waiter3", gh, buf, size, count);
++ if (error)
++ goto out;
++ }
+ if (gl->gl_ops == &gfs_inode_glops && gl2ip(gl)) {
+ if (!test_bit(GLF_LOCK, &gl->gl_flags) &&
+ list_empty(&gl->gl_holders)) {
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/glock.h linux/fs/gfs/glock.h
---- linux-2.6.9-rc1-mm3/fs/gfs/glock.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/glock.h 2004-09-07 16:26:15.745551709 -0500
-@@ -0,0 +1,134 @@
+diff -urN linux-orig/fs/gfs/glock.h linux-patched/fs/gfs/glock.h
+--- linux-orig/fs/gfs/glock.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/glock.h 2004-10-27 15:27:10.880630761 -0500
+@@ -0,0 +1,136 @@
+/******************************************************************************
+*******************************************************************************
+**
+void gfs_glock_prefetch(struct gfs_glock *gl, unsigned int state, int flags);
+void gfs_glock_force_drop(struct gfs_glock *gl);
+
++int gfs_glock_be_greedy(struct gfs_glock *gl, unsigned int time);
++
+int gfs_glock_nq_init(struct gfs_glock *gl, unsigned int state, int flags,
+ struct gfs_holder *gh);
+void gfs_glock_dq_uninit(struct gfs_holder *gh);
+int gfs_dump_lockstate(struct gfs_sbd *sdp, struct gfs_user_buffer *ub);
+
+#endif /* __GFS_GLOCK_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/glops.c linux/fs/gfs/glops.c
---- linux-2.6.9-rc1-mm3/fs/gfs/glops.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/glops.c 2004-09-07 16:26:15.746551489 -0500
-@@ -0,0 +1,526 @@
+diff -urN linux-orig/fs/gfs/glops.c linux-patched/fs/gfs/glops.c
+--- linux-orig/fs/gfs/glops.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/glops.c 2004-10-27 15:27:10.881630529 -0500
+@@ -0,0 +1,563 @@
+/******************************************************************************
+*******************************************************************************
+**
+}
+
+/**
++ * inode_greedy -
++ * @gl: the glock
++ *
++ */
++
++static void
++inode_greedy(struct gfs_glock *gl)
++{
++ struct gfs_inode *ip = (struct gfs_inode *)gl->gl_object;
++ struct gfs_sbd *sdp = ip->i_sbd;
++ unsigned int new_time;
++
++ GFS_ASSERT_GLOCK(ip, gl,);
++
++ spin_lock(&ip->i_lock);
++
++ if (time_after(ip->i_last_pfault +
++ sdp->sd_tune.gt_greedy_quantum,
++ jiffies)) {
++ new_time = ip->i_greedy + sdp->sd_tune.gt_greedy_quantum;
++ if (new_time > sdp->sd_tune.gt_greedy_max)
++ new_time = sdp->sd_tune.gt_greedy_max;
++ } else {
++ new_time = ip->i_greedy - sdp->sd_tune.gt_greedy_quantum;
++ if (!new_time || new_time > sdp->sd_tune.gt_greedy_max)
++ new_time = 1;
++ }
++
++ ip->i_greedy = new_time;
++
++ spin_unlock(&ip->i_lock);
++
++ gfs_inode_put(ip);
++}
++
++/**
+ * rgrp_go_xmote_th - promote/demote a glock
+ * @gl: the glock
+ * @state: the requested state
+ .go_demote_ok = inode_go_demote_ok,
+ .go_lock = inode_go_lock,
+ .go_unlock = inode_go_unlock,
++ .go_greedy = inode_greedy,
+ .go_type = LM_TYPE_INODE
+};
+
+ .go_demote_ok = quota_go_demote_ok,
+ .go_type = LM_TYPE_QUOTA
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/glops.h linux/fs/gfs/glops.h
---- linux-2.6.9-rc1-mm3/fs/gfs/glops.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/glops.h 2004-09-07 16:26:15.746551489 -0500
+diff -urN linux-orig/fs/gfs/glops.h linux-patched/fs/gfs/glops.h
+--- linux-orig/fs/gfs/glops.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/glops.h 2004-10-27 15:27:10.881630529 -0500
@@ -0,0 +1,26 @@
+/******************************************************************************
+*******************************************************************************
+extern struct gfs_glock_operations gfs_quota_glops;
+
+#endif /* __GLOPS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/incore.h linux/fs/gfs/incore.h
---- linux-2.6.9-rc1-mm3/fs/gfs/incore.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/incore.h 2004-09-07 16:26:15.747551268 -0500
-@@ -0,0 +1,726 @@
+diff -urN linux-orig/fs/gfs/incore.h linux-patched/fs/gfs/incore.h
+--- linux-orig/fs/gfs/incore.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/incore.h 2004-10-27 15:27:10.881630529 -0500
+@@ -0,0 +1,943 @@
+/******************************************************************************
+*******************************************************************************
+**
+*******************************************************************************
+******************************************************************************/
+
++/*
++ * In-core (memory/RAM) structures.
++ * These do not appear on-disk. See gfs_ondisk.h for on-disk structures.
++ */
++
+#ifndef __INCORE_DOT_H__
+#define __INCORE_DOT_H__
+
-+#define DIO_NEW (0x00000001)
-+#define DIO_FORCE (0x00000002)
-+#define DIO_CLEAN (0x00000004)
-+#define DIO_DIRTY (0x00000008)
-+#define DIO_START (0x00000010)
-+#define DIO_WAIT (0x00000020)
-+#define DIO_METADATA (0x00000040)
-+#define DIO_DATA (0x00000080)
-+#define DIO_INVISIBLE (0x00000100)
-+#define DIO_CHECK (0x00000200)
-+#define DIO_ALL (0x00000400)
++/* flags used in function call parameters */
++
++#define DIO_NEW (0x00000001) /* Newly allocated metadata */
++#define DIO_FORCE (0x00000002) /* Force read of block from disk */
++#define DIO_CLEAN (0x00000004) /* Don't write to disk */
++#define DIO_DIRTY (0x00000008) /* Data changed, must write to disk */
++#define DIO_START (0x00000010) /* Start disk read or write */
++#define DIO_WAIT (0x00000020) /* Wait for disk r/w to complete */
++
++#define DIO_METADATA (0x00000040) /* Process glock's protected metadata */
++#define DIO_DATA (0x00000080) /* Process glock's protected filedata */
++#define DIO_INVISIBLE (0x00000100) /* Don't monkey with glock's dirty bit */
++#define DIO_CHECK (0x00000200) /* Make sure all metadata has been synced */
++#define DIO_ALL (0x00000400) /* Flush all AIL transactions to disk */
+
+/* Structure prototypes */
+
+ * Structure of operations that are associated with each
+ * type of element in the log.
+ */
-+
+struct gfs_log_operations {
+ /* Operations specific to a given log element */
+
+ unsigned int *blocks, unsigned int *bmem);
+ void (*lo_build_dump) (struct gfs_sbd * sdp, struct gfs_trans * tr);
+
-+ /* Operations that happen at recovery time */
++ /* Operations that happen at recovery time */
+
+ void (*lo_before_scan) (struct gfs_sbd * sdp, unsigned int jid,
+ struct gfs_log_header * head,
+ void (*lo_after_scan) (struct gfs_sbd * sdp, unsigned int jid,
+ unsigned int pass);
+
++ /* Type of element (glock/buf/unlinked/quota) */
+ char *lo_name;
+};
+
+ * Structure that gets added to struct gfs_trans->tr_elements. They
+ * make up the "stuff" in each transaction.
+ */
-+
+struct gfs_log_element {
-+ struct gfs_log_operations *le_ops;
++ struct gfs_log_operations *le_ops; /* Vector of functions */
+
-+ struct gfs_trans *le_trans;
-+ struct list_head le_list;
++ struct gfs_trans *le_trans; /* We're part of this transaction */
++ struct list_head le_list; /* Link to transaction's element list */
+};
+
++/*
++ * Meta-header cache structure.
++ * One for each metadata block that we've read from disk, and are still using.
++ * In-core superblock structure hosts the actual cache.
++ * Also, each resource group keeps a list of cached blocks within its scope.
++ */
+struct gfs_meta_header_cache {
-+ struct list_head mc_list_hash;
-+ struct list_head mc_list_single;
-+ struct list_head mc_list_rgd;
++ /* Links to various lists */
++ struct list_head mc_list_hash; /* Superblock's hashed list */
++ struct list_head mc_list_single; /* Superblock's single list */
++ struct list_head mc_list_rgd; /* Resource group's list */
+
-+ uint64_t mc_block;
-+ struct gfs_meta_header mc_mh;
++ uint64_t mc_block; /* Block # (in-place address) */
++ struct gfs_meta_header mc_mh; /* Payload: the block's meta-header */
+};
+
++/*
++ * Dependency cache structure.
++ * In-core superblock structure hosts the actual cache.
++ * Also, each resource group keeps a list of dependency blocks within its scope.
++ */
+struct gfs_depend {
-+ struct list_head gd_list_hash;
-+ struct list_head gd_list_rgd;
++ /* Links to various lists */
++ struct list_head gd_list_hash; /* Superblock's hashed list */
++ struct list_head gd_list_rgd; /* Resource group's list */
+
-+ struct gfs_rgrpd *gd_rgd;
-+ uint64_t gd_formal_ino;
-+ unsigned long gd_time;
++ struct gfs_rgrpd *gd_rgd; /* Resource group descriptor */
++ uint64_t gd_formal_ino; /* Inode ID */
++ unsigned long gd_time; /* Time (jiffies) when put on list */
+};
+
+/*
-+ * Structure containing information about the allocation bitmaps.
-+ * There are one of these for each fs block that the bitmap for
-+ * the resource group header covers.
++ * Block allocation bitmap descriptor structure.
++ * One of these for each FS block that contains bitmap data
++ * (i.e. the resource group header blocks and their following bitmap blocks).
++ * Each allocatable FS data block is represented by 2 bits (4 alloc states).
+ */
-+
+struct gfs_bitmap {
-+ uint32_t bi_offset; /* The offset in the buffer of the first byte */
-+ uint32_t bi_start; /* The position of the first byte in this block */
-+ uint32_t bi_len; /* The number of bytes in this block */
++ uint32_t bi_offset; /* Byte offset of bitmap within this bit block
++ (non-zero only for an rgrp header block) */
++ uint32_t bi_start; /* Data block (rgrp scope, 32-bit) represented
++ by the first bit-pair in this bit block */
++ uint32_t bi_len; /* The number of bitmap bytes in this bit block */
+};
+
+/*
-+ * Structure containing information Resource Groups
++ * Resource Group (Rgrp) descriptor structure.
++ * There is one of these for each resource (block) group in the FS.
++ * The filesystem is divided into a number of resource groups to allow
++ * simultaneous block alloc operations by a number of nodes.
+ */
-+
+struct gfs_rgrpd {
-+ struct list_head rd_list; /* Link with superblock */
-+ struct list_head rd_list_mru;
-+ struct list_head rd_recent; /* Recently used rgrps */
-+
-+ struct gfs_glock *rd_gl; /* Glock for rgrp */
++ /* Links to superblock lists */
++ struct list_head rd_list; /* On-disk-order list of all rgrps */
++ struct list_head rd_list_mru; /* Most Recently Used list of all rgs */
++ struct list_head rd_recent; /* recently used rgrps */
+
-+ unsigned long rd_flags;
++ struct gfs_glock *rd_gl; /* Glock for this rgrp */
+
-+ struct gfs_rindex rd_ri; /* Resource Index structure */
-+ struct gfs_rgrp rd_rg; /* Resource Group structure */
-+ uint64_t rd_rg_vn;
++ struct gfs_rindex rd_ri; /* Resource Index (on-disk) structure */
++ struct gfs_rgrp rd_rg; /* Resource Group (on-disk) structure */
++ uint64_t rd_rg_vn; /* Version #: if != glock's gl_vn,
++ we need to read rgrp fm disk */
+
-+ struct gfs_bitmap *rd_bits;
-+ struct buffer_head **rd_bh;
++ /* Block alloc bitmap cache */
++ struct gfs_bitmap *rd_bits; /* Array of block bitmap descriptors */
++ struct buffer_head **rd_bh; /* Array of ptrs to block bitmap bh's */
+
-+ uint32_t rd_last_alloc_data;
-+ uint32_t rd_last_alloc_meta;
++ /* Block allocation strategy, rgrp scope. Start at these blocks when
++ searching for next data/meta block to alloc */
++ uint32_t rd_last_alloc_data; /* Most recent data block allocated */
++ uint32_t rd_last_alloc_meta; /* Most recent meta block allocated */
+
-+ struct list_head rd_mhc;
-+ struct list_head rd_depend;
++ struct list_head rd_mhc; /* Cached meta-headers for this rgrp */
++ struct list_head rd_depend; /* Dependency elements */
+
-+ struct gfs_sbd *rd_sbd;
++ struct gfs_sbd *rd_sbd; /* FS incore superblock (fs instance) */
+};
+
+/*
+ * Per-buffer data
++ * One of these is attached as GFS private data to each FS block's buffer_head.
++ * These also link into the Active Items Lists (AIL) (buffers flushed to
++ * on-disk log, but not yet flushed to on-disk in-place locations) attached
++ * to transactions and glocks.
+ */
-+
+struct gfs_bufdata {
-+ struct buffer_head *bd_bh; /* struct buffer_head which this struct belongs to */
-+ struct gfs_glock *bd_gl; /* Pointer to Glock struct for this bh */
++ struct buffer_head *bd_bh; /* We belong to this Linux buffer_head */
++ struct gfs_glock *bd_gl; /* This glock protects buffer's payload */
+
+ struct gfs_log_element bd_new_le;
+ struct gfs_log_element bd_incore_le;
+
-+ char *bd_frozen;
-+ struct semaphore bd_lock;
++ char *bd_frozen; /* "Frozen" copy of buffer's data */
++ struct semaphore bd_lock; /* Protects access to this structure */
+
-+ unsigned int bd_pinned; /* Pin count */
-+ struct list_head bd_ail_tr_list; /* List of buffers hanging off tr_ail_bufs */
-+ struct list_head bd_ail_gl_list; /* List of buffers hanging off gl_ail_bufs */
++ /* "Pin" means keep buffer in RAM, don't write to disk (yet) */
++ unsigned int bd_pinned; /* Recursive pin count */
++ struct list_head bd_ail_tr_list; /* Link to transaction's AIL list */
++ struct list_head bd_ail_gl_list; /* Link to glock's AIL list */
+};
+
+/*
+ * Glock operations
++ * One set of operations for each glock, the set selected by type of glock.
++ * These functions get called at various points in a glock's lifetime.
++ * "xmote" = promote (lock) a glock at inter-node level.
++ * "th" = top half, "bh" = bottom half
+ */
-+
+struct gfs_glock_operations {
++
++ /* Before acquiring a lock at inter-node level */
+ void (*go_xmote_th) (struct gfs_glock * gl, unsigned int state,
+ int flags);
++
++ /* After acquiring a lock at inter-node level */
+ void (*go_xmote_bh) (struct gfs_glock * gl);
++
++ /* Before releasing a lock at inter-node level, calls go_sync */
+ void (*go_drop_th) (struct gfs_glock * gl);
++
++ /* After releasing a lock at inter-node level, calls go_inval */
+ void (*go_drop_bh) (struct gfs_glock * gl);
++
++ /* Sync dirty data to disk before releasing an inter-node lock
++ (another node needs to read the updated data from disk) */
+ void (*go_sync) (struct gfs_glock * gl, int flags);
++
++ /* Invalidate locally cached data just after releasing an inter-node lock
++ (another node may change the on-disk data, so it's no good to us) */
+ void (*go_inval) (struct gfs_glock * gl, int flags);
++
++ /* Lock-type-specific check to see if it's okay to unlock a glock */
+ int (*go_demote_ok) (struct gfs_glock * gl);
++
++ /* After locking at local process level */
+ int (*go_lock) (struct gfs_glock * gl, int flags);
++
++ /* Before unlocking at local process level */
+ void (*go_unlock) (struct gfs_glock * gl, int flags);
++
++ /* After receiving a callback: another node needs the lock */
+ void (*go_callback) (struct gfs_glock * gl, unsigned int state);
-+ int go_type;
++
++ /* Called when the glock layer marks a lock as being not greedy
++ anymore */
++ void (*go_greedy) (struct gfs_glock * gl);
++
++ /* Lock type: locks with same lock # (usually an FS block #),
++ but different types, are different locks */
++ int go_type; /* glock type */
+};
+
-+/* Actions */
-+#define HIF_MUTEX (0)
-+#define HIF_PROMOTE (1)
-+#define HIF_DEMOTE (2)
++/*
++ * Glock holder structure
++ * These coordinate the use, within this node, of an acquired inter-node lock.
++ * One for each holder of a glock. A glock may be shared within a node by
++ * several processes, or even by several recursive requests from the same
++ * process. Each is a separate "holder". To be shared locally, the glock
++ * must be in "SHARED" or "DEFERRED" state at inter-node level, which means
++ * that processes on other nodes might also read the protected entity.
++ * When a process needs to manipulate a lock, it requests it via one of
++ * these holder structures. If the request cannot be satisfied immediately,
++ * the holder structure gets queued on one of these glock lists:
++ * 1) waiters1, for gaining exclusive (local) access to the glock structure.
++ * 2) waiters2, for unlocking (demoting) a lock or waiting for a lock
++ * to be unlocked.
++ * 3) waiters3, for locking (promoting) a lock. This may require
++ * changing lock state at inter-node level.
++ * When holding a lock, gfs_holder struct stays on glock's holder list.
++ * See gfs-kernel/src/harness/lm_interface.h for gh_state (LM_ST_...)
++ * and gh_flags (LM_FLAG...) fields.
++ * Also see glock.h for gh_flags field (GL_...) flags.
++ */
++
++/* Action requests */
++#define HIF_MUTEX (0) /* Exclusive (local) access to glock struct */
++#define HIF_PROMOTE (1) /* Change lock to more restrictive state */
++#define HIF_DEMOTE (2) /* Change lock to less restrictive state */
++#define HIF_GREEDY (3) /* Wait for the glock to be unlocked */
+
+/* States */
-+#define HIF_ALLOCED (3)
-+#define HIF_DEALLOC (4)
-+#define HIF_HOLDER (5)
-+#define HIF_FIRST (6)
-+#define HIF_WAKEUP (7)
-+#define HIF_RECURSE (8)
++#define HIF_ALLOCED (4) /* Holder structure is or was in use */
++#define HIF_DEALLOC (5) /* Holder structure no longer in use */
++#define HIF_HOLDER (6) /* We have been granted a hold on the lock */
++#define HIF_FIRST (7) /* We are first on glock's holder list */
++#define HIF_WAKEUP (8) /* Wake us up when request is satisfied */
++#define HIF_RECURSE (9) /* Recursive locks on same glock by same process */
+
+struct gfs_holder {
-+ struct list_head gh_list;
++ struct list_head gh_list; /* Link to one of glock's holder lists */
++
++ struct gfs_glock *gh_gl; /* Glock that we're holding */
++ struct task_struct *gh_owner; /* Linux process that is the holder */
+
-+ struct gfs_glock *gh_gl;
-+ struct task_struct *gh_owner;
-+ unsigned int gh_state;
-+ int gh_flags;
++ /* request to change lock state */
++ unsigned int gh_state; /* LM_ST_... requested lock state */
++ int gh_flags; /* GL_... or LM_FLAG_... req modifiers */
+
-+ int gh_error;
-+ unsigned long gh_iflags;
-+ struct completion gh_wait;
++ int gh_error; /* GLR_... CANCELLED or TRYFAILED or -errno */
++ unsigned long gh_iflags; /* HIF_... see above */
++ struct completion gh_wait; /* Wait for completion of ... */
+};
+
+/*
+ * Glock Structure
-+ */
-+
-+#define GLF_PLUG (0)
-+#define GLF_LOCK (1)
-+#define GLF_STICKY (2)
-+#define GLF_PREFETCH (3)
-+#define GLF_SYNC (4)
-+#define GLF_DIRTY (5)
-+#define GLF_LVB_INVALID (6)
++ * One for each inter-node lock held by this node.
++ * A glock is a local representation/abstraction of an inter-node lock.
++ * Inter-node locks are managed by a "lock module" which plugs in to the
++ * lock harness / glock interface (see gfs-kernel/harness). Different
++ * lock modules support different lock protocols (e.g. GULM, GDLM, no_lock).
++ * A glock may have one or more holders within a node. See gfs_holder above.
++ * Glocks are managed within a hash table hosted by the in-core superblock.
++ * After all holders have released a glock, it will stay in the hash table
++ * cache for a time (depending on lock type), during which the inter-node
++ * lock will not be released unless another node needs the lock. This
++ * provides better performance in case this node needs the glock again soon.
++ * Each glock has an associated vector of lock-type-specific "glops" functions
++ * which are called at important times during the life of a glock, and
++ * which define the type of lock (e.g. dinode, rgrp, non-disk, etc).
++ * See gfs_glock_operations above.
++ * A glock, at inter-node scope, is identified by the following dimensions:
++ * 1) lock number (usually a block # for on-disk protected entities,
++ * or a fixed assigned number for non-disk locks, e.g. MOUNT).
++ * 2) lock type (actually, the type of entity protected by the lock).
++ * 3) lock namespace, to support multiple GFS filesystems simultaneously.
++ * Namespace (usually cluster:filesystem) is specified when mounting.
++ * See man page for gfs_mount.
++ * Glocks require support of Lock Value Blocks (LVBs) by the inter-node lock
++ * manager. LVBs are small (32-byte) chunks of data associated with a given
++ * lock, that can be quickly shared between cluster nodes. Used for certain
++ * purposes such as sharing an rgroup's block usage statistics without
++ * requiring the overhead of:
++ * -- sync-to-disk by one node, then a
++ * -- read from disk by another node.
++ *
++ */
++
++#define GLF_PLUG (0) /* Dummy */
++#define GLF_LOCK (1) /* Exclusive (local) access to glock structure */
++#define GLF_STICKY (2) /* Don't unlock this lock unless some explicitly asks */
++#define GLF_PREFETCH (3) /* This lock has be prefetched, demote if not used soon */
++#define GLF_SYNC (4) /* This lock should be synced */
++#define GLF_DIRTY (5) /* There is dirty data for this lock */
++#define GLF_LVB_INVALID (6) /* LVB does not contain valid data */
++#define GLF_SKIP_WAITERS2 (7) /* Make run_queue() ignore gl_waiters2 holders */
++#define GLF_GREEDY (8) /* This lock is ignoring callbacks for now */
+
+struct gfs_glock {
-+ struct list_head gl_list;
-+ unsigned long gl_flags;
-+ struct lm_lockname gl_name;
-+ atomic_t gl_count;
++ struct list_head gl_list; /* Link to superblock's hash table */
++ unsigned long gl_flags; /* GLF_... see above */
++ struct lm_lockname gl_name; /* Lock number and lock type */
++ atomic_t gl_count; /* Usage count */
+
-+ spinlock_t gl_spin;
++ spinlock_t gl_spin; /* Protects some members of this struct */
+
-+ unsigned int gl_state;
-+ struct list_head gl_holders;
-+ struct list_head gl_waiters1; /* HIF_MUTEX */
-+ struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_PROMOTE */
++ /* Lock state reflects inter-node manager's lock state */
++ unsigned int gl_state; /* LM_ST_... see harness/lm_interface.h */
+
-+ struct gfs_glock_operations *gl_ops;
++ /* Lists of gfs_holders */
++ struct list_head gl_holders; /* all current holders of the glock */
++ struct list_head gl_waiters1; /* HIF_MUTEX */
++ struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
++ struct list_head gl_waiters3; /* HIF_PROMOTE */
+
-+ struct gfs_holder *gl_req_gh;
-+ gfs_glop_bh_t gl_req_bh;
++ struct gfs_glock_operations *gl_ops; /* function vector, defines type */
+
-+ lm_lock_t *gl_lock;
-+ char *gl_lvb;
-+ atomic_t gl_lvb_count;
++ /* State to remember for async lock requests */
++ struct gfs_holder *gl_req_gh; /* The holder that generated the request */
++ gfs_glop_bh_t gl_req_bh; /* The bottom half to execute */
+
-+ uint64_t gl_vn;
-+ unsigned long gl_stamp;
-+ void *gl_object;
++ lm_lock_t *gl_lock; /* Lock module's private lock data */
++ char *gl_lvb; /* Lock Value Block */
++ atomic_t gl_lvb_count; /* LVB recursive usage (hold/unhold) count */
+
-+ struct gfs_log_element gl_new_le;
-+ struct gfs_log_element gl_incore_le;
++ uint64_t gl_vn; /* Incremented when protected data changes */
++ unsigned long gl_stamp; /* Glock cache retention timer */
++ void *gl_object; /* The protected entity (e.g. a dinode) */
+
-+ struct gfs_gl_hash_bucket *gl_bucket;
-+ struct list_head gl_reclaim;
++ /* Incore transaction stuff */
++ struct gfs_log_element gl_new_le; /* Incomplete transaction */
++ struct gfs_log_element gl_incore_le; /* Complete transaction */
+
-+ struct gfs_sbd *gl_sbd;
++ struct gfs_gl_hash_bucket *gl_bucket; /* Our bucket in hash table */
++ struct list_head gl_reclaim; /* Link to "reclaim" list */
+
-+ struct inode *gl_aspace;
-+ struct list_head gl_dirty_buffers;
-+ struct list_head gl_ail_bufs;
++ struct gfs_sbd *gl_sbd; /* Superblock (FS instance) */
++
++ struct inode *gl_aspace; /* The buffers protected by this lock */
++ struct list_head gl_ail_bufs; /* AIL buffers protected by us */
+};
+
+/*
+ * In-Place Reservation structure
++ * Coordinates allocation of "in-place" (as opposed to journal) FS blocks,
++ * which contain persistent inode/file/directory data and metadata.
++ * These blocks are the allocatable blocks within resource groups (i.e.
++ * not including rgrp header and block alloc bitmap blocks).
++ * gfs_inplace_reserve() calculates a fulfillment plan for allocating blocks,
++ * based on block statistics in the resource group headers.
++ * Then, gfs_blkalloc() or gfs_metaalloc() walks the block alloc bitmaps
++ * to do the actual allocation.
+ */
-+
+struct gfs_alloc {
-+ /* Quota stuff */
++ /* Up to 4 quotas (including an inode's user and group quotas)
++ can track changes in block allocation */
+
-+ unsigned int al_qd_num;
-+ struct gfs_quota_data *al_qd[4];
-+ struct gfs_holder al_qd_ghs[4];
++ unsigned int al_qd_num; /* # of quotas tracking changes */
++ struct gfs_quota_data *al_qd[4]; /* Ptrs to quota structures */
++ struct gfs_holder al_qd_ghs[4]; /* Holders for quota glocks */
+
-+ /* Filled in by the caller to gfs_inplace_reserve() */
++ /* Request, filled in by the caller to gfs_inplace_reserve() */
+
-+ uint32_t al_requested_di;
-+ uint32_t al_requested_meta;
-+ uint32_t al_requested_data;
++ uint32_t al_requested_di; /* Number of dinodes to reserve */
++ uint32_t al_requested_meta; /* Number of metadata blocks to reserve */
++ uint32_t al_requested_data; /* Number of data blocks to reserve */
+
-+ /* Filled in by gfs_inplace_reserve() */
++ /* Fulfillment plan, filled in by gfs_inplace_reserve() */
+
-+ char *al_file;
-+ unsigned int al_line;
-+ struct gfs_holder al_ri_gh;
-+ struct gfs_holder al_rgd_gh;
-+ struct gfs_rgrpd *al_rgd;
-+ uint32_t al_reserved_meta;
-+ uint32_t al_reserved_data;
++ char *al_file; /* Debug info, .c file making request */
++ unsigned int al_line; /* Debug info, line of code making req */
++ struct gfs_holder al_ri_gh; /* Glock holder for resource grp index */
++ struct gfs_holder al_rgd_gh; /* Glock holder for al_rgd rgrp */
++ struct gfs_rgrpd *al_rgd; /* Resource group from which to alloc */
++ uint32_t al_reserved_meta; /* Alloc up to this # meta blocks from al_rgd */
++ uint32_t al_reserved_data; /* Alloc up to this # data blocks from al_rgd */
+
-+ /* Filled in by gfs_blkalloc() */
++ /* Actual alloc, filled in by gfs_blkalloc()/gfs_metaalloc(), etc. */
+
-+ uint32_t al_alloced_di;
-+ uint32_t al_alloced_meta;
-+ uint32_t al_alloced_data;
++ uint32_t al_alloced_di; /* # dinode blocks allocated */
++ uint32_t al_alloced_meta; /* # meta blocks allocated */
++ uint32_t al_alloced_data; /* # data blocks allocated */
+
+ /* Dinode allocation crap */
+
-+ struct gfs_unlinked *al_ul;
++ struct gfs_unlinked *al_ul; /* Unlinked dinode log entry */
+};
+
+/*
+#define GIF_SW_PAGED (2)
+
+struct gfs_inode {
-+ struct gfs_inum i_num;
++ struct gfs_inum i_num; /* Formal inode # and block address */
+
-+ atomic_t i_count;
-+ unsigned long i_flags;
++ atomic_t i_count; /* Usage count */
++ unsigned long i_flags; /* GIF_... see above */
+
-+ uint64_t i_vn;
-+ struct gfs_dinode i_di;
++ uint64_t i_vn; /* Version #: if different from glock's vn,
++ we need to read inode from disk */
++ struct gfs_dinode i_di; /* Dinode (on-disk) structure */
+
-+ struct gfs_glock *i_gl;
-+ struct gfs_sbd *i_sbd;
-+ struct inode *i_vnode;
++ struct gfs_glock *i_gl; /* This glock protects this inode */
++ struct gfs_sbd *i_sbd; /* Superblock (fs instance structure) */
++ struct inode *i_vnode; /* Linux VFS inode structure */
+
-+ struct gfs_holder i_iopen_gh;
++ struct gfs_holder i_iopen_gh; /* Glock holder for Inode Open lock */
+
-+ struct gfs_alloc *i_alloc;
-+ uint64_t i_last_rg_alloc;
++ /* Block allocation strategy, inode scope */
++ struct gfs_alloc *i_alloc; /* In-place block reservation structure */
++ uint64_t i_last_rg_alloc; /* Most recent block alloc was fm this rgrp */
+
-+ struct task_struct *i_creat_task;
-+ pid_t i_creat_pid;
++ spinlock_t i_lock; /* Protects this structure */
+
-+ spinlock_t i_lock;
++ /* Cache of most-recently used buffers in indirect addressing chain */
+ struct buffer_head *i_cache[GFS_MAX_META_HEIGHT];
++
++ unsigned int i_greedy; /* The amount of time to be greedy */
++ unsigned long i_last_pfault; /* The time of the last page fault */
+};
+
+/*
+#define GFF_DID_DIRECT_ALLOC (0)
+
+struct gfs_file {
-+ unsigned long f_flags;
++ unsigned long f_flags; /* GFF_... see above */
+
-+ struct semaphore f_fl_lock;
-+ struct gfs_holder f_fl_gh;
++ struct semaphore f_fl_lock; /* Lock to protect flock operations */
++ struct gfs_holder f_fl_gh; /* Holder for this f_vfile's flock */
+
-+ struct gfs_inode *f_inode;
-+ struct file *f_vfile;
++ struct gfs_inode *f_inode; /* Incore GFS inode */
++ struct file *f_vfile; /* Linux file struct */
+};
+
+/*
+#define ULF_LOCK (4)
+
+struct gfs_unlinked {
-+ struct list_head ul_list;
-+ unsigned int ul_count;
++ struct list_head ul_list; /* Link to superblock's sd_unlinked_list */
++ unsigned int ul_count; /* Usage count */
+
-+ struct gfs_inum ul_inum;
-+ unsigned long ul_flags;
++ struct gfs_inum ul_inum; /* Formal inode #, block addr */
++ unsigned long ul_flags; /* ULF_... */
+
-+ struct gfs_log_element ul_new_le;
-+ struct gfs_log_element ul_incore_le;
-+ struct gfs_log_element ul_ondisk_le;
++ struct gfs_log_element ul_new_le; /* New, not yet committed */
++ struct gfs_log_element ul_incore_le; /* Committed to incore log */
++ struct gfs_log_element ul_ondisk_le; /* Committed to ondisk log */
+};
+
+/*
+ * Quota log element
++ * One for each logged change in a block alloc value affecting a given quota.
++ * Only one of these for a given quota within a given transaction;
++ * multiple changes, within one transaction, for a given quota will be
++ * combined into one log element.
+ */
-+
+struct gfs_quota_le {
-+ struct gfs_log_element ql_le;
++ /* Log element maps us to a particular set of log operations functions,
++ and to a particular transaction */
++ struct gfs_log_element ql_le; /* Generic log element structure */
+
-+ struct gfs_quota_data *ql_data;
-+ struct list_head ql_data_list;
++ struct gfs_quota_data *ql_data; /* The quota we're changing */
++ struct list_head ql_data_list; /* Link to quota's log element list */
+
-+ int64_t ql_change;
++ int64_t ql_change; /* # of blocks alloc'd (+) or freed (-) */
+};
+
-+#define QDF_USER (0)
-+#define QDF_OD_LIST (1)
-+#define QDF_LOCK (2)
++/*
++ * Quota structure
++ * One for each user or group quota.
++ * Summarizes all block allocation activity for a given quota, and supports
++ * recording updates of current block alloc values in GFS' special quota
++ * file, including the journaling of these updates, encompassing
++ * multiple transactions and log dumps.
++ */
++
++#define QDF_USER (0) /* User (1) vs. group (0) quota */
++#define QDF_OD_LIST (1) /* Waiting for sync to quota file */
++#define QDF_LOCK (2) /* Protects access to this structure */
+
+struct gfs_quota_data {
-+ struct list_head qd_list;
-+ unsigned int qd_count;
++ struct list_head qd_list; /* Link to superblock's sd_quota_list */
++ unsigned int qd_count; /* Usage count */
+
-+ uint32_t qd_id;
-+ unsigned long qd_flags;
++ uint32_t qd_id; /* User or group ID number */
++ unsigned long qd_flags; /* QDF_... */
+
-+ struct list_head qd_le_list;
++ /* This list is for non-log-dump transactions */
++ struct list_head qd_le_list; /* List of gfs_quota_le log elements */
+
-+ int64_t qd_change_new;
-+ int64_t qd_change_ic;
-+ int64_t qd_change_od;
-+ int64_t qd_change_sync;
++ /* Summary of block alloc changes affecting this quota, in various
++ stages of logging & syncing changes to the special quota file */
++ int64_t qd_change_new; /* New, not yet committed to in-core log*/
++ int64_t qd_change_ic; /* Committed to in-core log */
++ int64_t qd_change_od; /* Committed to on-disk log */
++ int64_t qd_change_sync; /* Being synced to the in-place quota file */
+
-+ struct gfs_quota_le qd_ondisk_ql;
-+ uint64_t qd_sync_gen;
++ struct gfs_quota_le qd_ondisk_ql; /* Log element for log dump */
++ uint64_t qd_sync_gen; /* Sync-to-quota-file generation # */
+
-+ struct gfs_glock *qd_gl;
-+ struct gfs_quota_lvb qd_qb;
++ /* Glock provides protection for quota, *and* provides
++ lock value block (LVB) communication, between nodes, of current
++ quota values. Shared lock -> LVB read. EX lock -> LVB write. */
++ struct gfs_glock *qd_gl; /* glock for this quota */
++ struct gfs_quota_lvb qd_qb; /* LVB (limit/warn/value) */
+
-+ unsigned long qd_last_warn;
++ unsigned long qd_last_warn; /* Jiffies of last warning to user */
+};
+
++/*
++ * Log Buffer descriptor structure
++ * One for each FS block buffer recorded in the log
++ */
+struct gfs_log_buf {
-+ struct list_head lb_list;
++ /* Link to one of the transaction structure's lists */
++ struct list_head lb_list; /* Link to tr_free_bufs or tr_list */
+
-+ struct buffer_head lb_bh;
-+ struct buffer_head *lb_unlock;
++ struct buffer_head lb_bh; /* Buffer Head describing the journal block */
++ struct buffer_head *lb_unlock; /* Buffer head to unlock after commit */
+};
+
+/*
-+ * Transaction structures
++ * Transaction structure
++ * One for each transaction
++ * This coordinates the logging and flushing of written metadata.
+ */
+
+#define TRF_LOG_DUMP (0x00000001)
+
+struct gfs_trans {
-+ struct list_head tr_list;
++
++ /* Link to various lists */
++ struct list_head tr_list; /* Superblk's incore trans or AIL list*/
+
+ /* Initial creation stuff */
+
-+ char *tr_file;
-+ unsigned int tr_line;
++ char *tr_file; /* Debug info: .c file creating trans */
++ unsigned int tr_line; /* Debug info: codeline creating trans */
+
-+ unsigned int tr_mblks_asked; /* Number of log blocks asked to be reserved */
-+ unsigned int tr_eblks_asked;
-+ unsigned int tr_seg_reserved; /* Number of segments reserved */
++ /* Reservations for on-disk space in journal */
++ unsigned int tr_mblks_asked; /* # of meta log blocks requested */
++ unsigned int tr_eblks_asked; /* # of extra log blocks requested */
++ unsigned int tr_seg_reserved; /* # of segments actually reserved */
+
-+ struct gfs_holder *tr_t_gh;
++ struct gfs_holder *tr_t_gh; /* Glock holder for this transaction */
+
+ /* Stuff filled in during creation */
+
-+ unsigned int tr_flags;
-+ struct list_head tr_elements;
++ unsigned int tr_flags; /* TRF_... */
++ struct list_head tr_elements; /* List of this trans' log elements */
+
+ /* Stuff modified during the commit */
+
-+ unsigned int tr_num_free_bufs;
++ unsigned int tr_num_free_bufs; /* List of free gfs_log_buf structs */
+ struct list_head tr_free_bufs;
-+ unsigned int tr_num_free_bmem;
++ unsigned int tr_num_free_bmem; /* List of free fs-block-size buffers */
+ struct list_head tr_free_bmem;
+
-+ uint64_t tr_log_head; /* The current log head */
-+ uint64_t tr_first_head; /* First header block */
++ uint64_t tr_log_head; /* The current log head */
++ uint64_t tr_first_head; /* First header block */
+
-+ struct list_head tr_bufs; /* List of buffers going to the log */
++ struct list_head tr_bufs; /* List of buffers going to the log */
+
-+ /* Stuff that's part of the AIL */
++ /* Stuff that's part of the Active Items List (AIL) */
+
-+ struct list_head tr_ail_bufs;
++ struct list_head tr_ail_bufs; /* List of buffers on AIL list */
+
-+ /* Private data for different log element types */
++ /* # log elements of various types on tr_elements list */
+
-+ unsigned int tr_num_gl;
-+ unsigned int tr_num_buf;
-+ unsigned int tr_num_iul;
-+ unsigned int tr_num_ida;
-+ unsigned int tr_num_q;
++ unsigned int tr_num_gl; /* Glocks */
++ unsigned int tr_num_buf; /* Buffers */
++ unsigned int tr_num_iul; /* Unlinked inodes */
++ unsigned int tr_num_ida; /* De-allocated inodes */
++ unsigned int tr_num_q; /* Quotas */
+};
+
+/*
+ * One bucket of the glock hash table.
+ */
-+
+struct gfs_gl_hash_bucket {
+ rwlock_t hb_lock;
+ struct list_head hb_list;
-+} __attribute__ ((__aligned__(SMP_CACHE_BYTES)));
++};
+
+/*
-+ * Super Block Data Structure (One per filesystem)
-+ */
-+
-+#define SDF_JOURNAL_LIVE (0)
-+#define SDF_SCAND_RUN (1)
-+#define SDF_GLOCKD_RUN (2)
-+#define SDF_RECOVERD_RUN (3)
-+#define SDF_LOGD_RUN (4)
-+#define SDF_QUOTAD_RUN (5)
-+#define SDF_INODED_RUN (6)
-+#define SDF_NOATIME (7)
-+#define SDF_ROFS (8)
-+#define SDF_NEED_LOG_DUMP (9)
-+#define SDF_FOUND_UL_DUMP (10)
-+#define SDF_FOUND_Q_DUMP (11)
-+#define SDF_IN_LOG_DUMP (12)
-+
-+#define GFS_GL_HASH_SHIFT (13)
++ * "Super Block" Data Structure
++ * One per mounted filesystem.
++ * This is the big instance structure that ties everything together for
++ * a given mounted filesystem. Each GFS mount has its own, supporting
++ * mounts of multiple GFS filesystems on each node.
++ * Pointer to this is usually seen as "sdp" throughout code.
++ * This is a very large structure, as structures go, in part because it
++ * contains arrays of hash buckets for various in-core caches.
++ */
++
++#define SDF_JOURNAL_LIVE (0) /* Journaling is active (journal is writeable)*/
++
++/* Daemon run (1) / stop (0) flags */
++#define SDF_SCAND_RUN (1) /* Put unused glocks on reclaim queue */
++#define SDF_GLOCKD_RUN (2) /* Reclaim (dealloc) unused glocks */
++#define SDF_RECOVERD_RUN (3) /* Recover journal of a crashed node */
++#define SDF_LOGD_RUN (4) /* Update log tail after AIL flushed */
++#define SDF_QUOTAD_RUN (5) /* Sync quota changes to file, cleanup */
++#define SDF_INODED_RUN (6) /* Deallocate unlinked inodes */
++
++/* (Re)mount options from Linux VFS */
++#define SDF_NOATIME (7) /* Don't change access time */
++#define SDF_ROFS (8) /* Read-only mode */
++
++/* Journal log dump support */
++#define SDF_NEED_LOG_DUMP (9) /* Need to rewrite unlink and quota tags */
++#define SDF_FOUND_UL_DUMP (10) /* Recovery found unlinked tags */
++#define SDF_FOUND_Q_DUMP (11) /* Recovery found qutoa tags */
++#define SDF_IN_LOG_DUMP (12) /* Serializes log dumps */
++
++/* Glock cache */
++#define GFS_GL_HASH_SHIFT (13) /* # hash buckets = 8K */
+#define GFS_GL_HASH_SIZE (1 << GFS_GL_HASH_SHIFT)
+#define GFS_GL_HASH_MASK (GFS_GL_HASH_SIZE - 1)
+
-+#define GFS_MHC_HASH_SHIFT (10)
++/* Meta header cache */
++#define GFS_MHC_HASH_SHIFT (10) /* # hash buckets = 1K */
+#define GFS_MHC_HASH_SIZE (1 << GFS_MHC_HASH_SHIFT)
+#define GFS_MHC_HASH_MASK (GFS_MHC_HASH_SIZE - 1)
+
-+#define GFS_DEPEND_HASH_SHIFT (10)
++/* Dependency cache */
++#define GFS_DEPEND_HASH_SHIFT (10) /* # hash buckets = 1K */
+#define GFS_DEPEND_HASH_SIZE (1 << GFS_DEPEND_HASH_SHIFT)
+#define GFS_DEPEND_HASH_MASK (GFS_DEPEND_HASH_SIZE - 1)
+
+struct gfs_sbd {
-+ struct gfs_sb sd_sb; /* Super Block */
++ struct gfs_sb sd_sb; /* GFS on-disk Super Block image */
+
-+ struct super_block *sd_vfs; /* FS's device independent sb */
++ struct super_block *sd_vfs; /* Linux VFS device independent sb */
+
-+ struct gfs_args sd_args;
-+ unsigned long sd_flags;
++ struct gfs_args sd_args; /* Mount arguments */
++ unsigned long sd_flags; /* SDF_... see above */
+
-+ struct gfs_tune sd_tune; /* FS tuning structure */
++ struct gfs_tune sd_tune; /* Filesystem tuning structure */
+
+ /* Resource group stuff */
+
-+ struct gfs_inode *sd_riinode; /* rindex inode */
-+ uint64_t sd_riinode_vn; /* Version number of the resource index inode */
-+
-+ struct list_head sd_rglist; /* List of resource groups */
-+ struct semaphore sd_rindex_lock;
-+
-+ struct list_head sd_rg_mru_list; /* List of resource groups in MRU order */
-+ spinlock_t sd_rg_mru_lock; /* Lock for MRU list */
-+ struct list_head sd_rg_recent; /* Recently used rgrps */
-+ spinlock_t sd_rg_recent_lock;
-+ struct gfs_rgrpd *sd_rg_forward; /* Next new rgrp to try for allocation */
-+ spinlock_t sd_rg_forward_lock;
-+
-+ unsigned int sd_rgcount; /* Count of resource groups */
++ struct gfs_inode *sd_riinode; /* Resource Index (rindex) inode */
++ uint64_t sd_riinode_vn; /* Resource Index version # (detects
++ whether new rgrps have been added) */
++
++ struct list_head sd_rglist; /* List of all resource groups,
++ on-disk order */
++ struct semaphore sd_rindex_lock;/* Serializes RIndex rereads */
++ struct list_head sd_rg_mru_list;/* List of resource groups,
++ most-recently-used (MRU) order */
++ spinlock_t sd_rg_mru_lock; /* Protect mru list */
++ struct list_head sd_rg_recent; /* List of rgrps from which blocks
++ were recently allocated */
++ spinlock_t sd_rg_recent_lock; /* Protect recent list */
++ struct gfs_rgrpd *sd_rg_forward;/* Next rgrp from which to attempt
++ a block alloc */
++ spinlock_t sd_rg_forward_lock; /* Protect forward pointer */
++
++ unsigned int sd_rgcount; /* Total # of resource groups */
+
+ /* Constants computed on mount */
+
-+ uint32_t sd_fsb2bb;
-+ uint32_t sd_fsb2bb_shift; /* Shift FS Block numbers to the left by
-+ this to get buffer cache blocks */
-+ uint32_t sd_diptrs; /* Number of pointers in a dinode */
-+ uint32_t sd_inptrs; /* Number of pointers in a indirect block */
-+ uint32_t sd_jbsize; /* Size of a journaled data block */
-+ uint32_t sd_hash_bsize; /* sizeof(exhash block) */
++ /* "bb" == "basic block" == 512Byte sector */
++ uint32_t sd_fsb2bb; /* # 512B basic blocks in a FS block */
++ uint32_t sd_fsb2bb_shift; /* Shift sector # to the right by
++ this to get FileSystem block addr */
++ uint32_t sd_diptrs; /* Max # of block pointers in a dinode */
++ uint32_t sd_inptrs; /* Max # of block pointers in an indirect blk */
++ uint32_t sd_jbsize; /* Payload size (bytes) of a journaled metadata
++ block (GFS journals all meta blocks) */
++ uint32_t sd_hash_bsize; /* sizeof(exhash hash block) */
+ uint32_t sd_hash_bsize_shift;
-+ uint32_t sd_hash_ptrs; /* Number of points in a hash block */
-+ uint32_t sd_max_dirres; /* Maximum space needed to add a directory entry */
-+ uint32_t sd_max_height; /* Maximum height of a file's metadata tree */
++ uint32_t sd_hash_ptrs; /* Number of points in a hash block */
++ uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */
++ uint32_t sd_max_height; /* Max height of a file's tree */
+ uint64_t sd_heightsize[GFS_MAX_META_HEIGHT];
-+ uint32_t sd_max_jheight; /* Maximum height of a journaled file's metadata tree */
++ uint32_t sd_max_jheight; /* Max height, journaled file's tree */
+ uint64_t sd_jheightsize[GFS_MAX_META_HEIGHT];
+
+ /* Lock Stuff */
+
++ /* Glock cache (all glocks currently held by this node for this FS) */
+ struct gfs_gl_hash_bucket sd_gl_hash[GFS_GL_HASH_SIZE];
+
-+ struct list_head sd_reclaim_list;
++ /* Glock reclaim support for scand and glockd */
++ struct list_head sd_reclaim_list; /* List of glocks to reclaim */
+ spinlock_t sd_reclaim_lock;
+ wait_queue_head_t sd_reclaim_wchan;
-+ atomic_t sd_reclaim_count;
++ atomic_t sd_reclaim_count; /* # glocks on reclaim list */
+
-+ struct lm_lockstruct sd_lockstruct;
++ /* Lock module tells us if we're first-to-mount,
++ which journal to use, etc. */
++ struct lm_lockstruct sd_lockstruct; /* Info provided by lock module */
+
-+ struct list_head sd_mhc[GFS_MHC_HASH_SIZE];
-+ struct list_head sd_mhc_single;
++ /* Other caches */
++
++ /* Meta-header cache (incore copies of on-disk meta headers) */
++ struct list_head sd_mhc[GFS_MHC_HASH_SIZE]; /* hash buckets */
++ struct list_head sd_mhc_single; /* Non-hashed list of all MHCs */
+ spinlock_t sd_mhc_lock;
-+ atomic_t sd_mhc_count;
++ atomic_t sd_mhc_count; /* # MHCs in cache */
+
-+ struct list_head sd_depend[GFS_DEPEND_HASH_SIZE];
++ /* Dependency cache */
++ struct list_head sd_depend[GFS_DEPEND_HASH_SIZE]; /* Hash buckets */
+ spinlock_t sd_depend_lock;
-+ atomic_t sd_depend_count;
++ atomic_t sd_depend_count; /* # dependencies in cache */
+
-+ struct gfs_holder sd_live_gh;
++ /* LIVE inter-node lock indicates that FS is mounted on at least
++ one node */
++ struct gfs_holder sd_live_gh; /* Glock holder for LIVE lock */
+
++ /* For quiescing the filesystem */
+ struct gfs_holder sd_freeze_gh;
+ struct semaphore sd_freeze_lock;
+ unsigned int sd_freeze_count;
+
+ /* Inode Stuff */
+
-+ struct gfs_inode *sd_rooti; /* FS's root inode */
++ struct gfs_inode *sd_rooti; /* FS's root inode */
+
-+ struct gfs_glock *sd_rename_gl; /* rename glock */
++ /* Only 1 node at a time may rename (e.g. mv) directory from
++ one directory to another. */
++ struct gfs_glock *sd_rename_gl; /* Rename glock */
+
+ /* Daemon stuff */
+
-+ struct task_struct *sd_scand_process;
-+ unsigned int sd_glockd_num;
++ /* Scan for glocks and inodes to toss from memory */
++ struct task_struct *sd_scand_process; /* Scand places on reclaim list*/
++ unsigned int sd_glockd_num; /* # of glockd procs to do reclaiming*/
++
++ /* Recover journal of a crashed node */
+ struct task_struct *sd_recoverd_process;
++
++ /* Update log tail as AIL gets flushed to in-place on-disk blocks */
+ struct task_struct *sd_logd_process;
++
++ /* Sync quota updates to disk, and clean up unused quota structs */
+ struct task_struct *sd_quotad_process;
++
++ /* Clean up unused inode structures */
+ struct task_struct *sd_inoded_process;
+
++ /* Support for starting/stopping daemons */
+ struct semaphore sd_thread_lock;
+ struct completion sd_thread_completion;
+
+ /* Log stuff */
+
-+ struct gfs_glock *sd_trans_gl; /* transaction glock */
++ /* Transaction lock protects journal replay (recovery) */
++ struct gfs_glock *sd_trans_gl; /* Transaction glock structure */
+
-+ struct gfs_inode *sd_jiinode; /* jindex inode */
-+ uint64_t sd_jiinode_vn; /* Version number of the journal index inode */
++ struct gfs_inode *sd_jiinode; /* Journal index inode */
++ uint64_t sd_jiinode_vn; /* Journal index version # (detects
++ if new journals have been added) */
+
+ unsigned int sd_journals; /* Number of journals in the FS */
-+ struct gfs_jindex *sd_jindex; /* Array of Jindex structures describing this FS's journals */
++ struct gfs_jindex *sd_jindex; /* Array of journal descriptors */
+ struct semaphore sd_jindex_lock;
-+ unsigned long sd_jindex_refresh_time;
++ unsigned long sd_jindex_refresh_time; /* Poll for new journals (secs) */
+
-+ struct gfs_jindex sd_jdesc; /* Jindex structure describing this machine's journal */
-+ struct gfs_holder sd_journal_gh; /* the glock for this machine's journal */
++ struct gfs_jindex sd_jdesc; /* This machine's journal descriptor */
++ struct gfs_holder sd_journal_gh; /* This machine's journal glock */
+
+ uint64_t sd_sequence; /* Assigned to xactions in order they commit */
+ uint64_t sd_log_head; /* Block number of next journal write */
+ uint64_t sd_log_wrap;
+
+ spinlock_t sd_log_seg_lock;
-+ unsigned int sd_log_seg_free; /* Free segments in the log */
++ unsigned int sd_log_seg_free; /* # of free segments in the log */
+ struct list_head sd_log_seg_list;
+ wait_queue_head_t sd_log_seg_wait;
+
-+ struct list_head sd_log_ail; /* struct gfs_trans structures that form the Active Items List
-+ "next" is the head, "prev" is the tail */
++ /* "Active Items List" of transactions that have been flushed to
++ on-disk log, and are waiting for flush to in-place on-disk blocks */
++ struct list_head sd_log_ail; /* "next" is head, "prev" is tail */
+
-+ struct list_head sd_log_incore; /* transactions that have been commited incore (but not ondisk)
-+ "next" is the newest, "prev" is the oldest */
-+ unsigned int sd_log_buffers; /* Number of buffers in the incore log */
++ /* Transactions committed incore, but not yet flushed to on-disk log */
++ struct list_head sd_log_incore; /* "Next" is newest, "prev" is oldest */
++ unsigned int sd_log_buffers; /* # of buffers in the incore log */
+
+ struct semaphore sd_log_lock; /* Lock for access to log values */
+
+ uint64_t sd_log_dump_last;
+ uint64_t sd_log_dump_last_wrap;
+
-+ /* unlinked crap */
++ /* Unlinked crap */
+
+ struct list_head sd_unlinked_list;
+ spinlock_t sd_unlinked_lock;
+ atomic_t sd_unlinked_ic_count;
+ atomic_t sd_unlinked_od_count;
+
-+ /* quota crap */
++ /* Quota crap */
+
-+ struct list_head sd_quota_list;
++ struct list_head sd_quota_list; /* List of all gfs_quota_data structs */
+ spinlock_t sd_quota_lock;
+
-+ atomic_t sd_quota_count;
-+ atomic_t sd_quota_od_count;
++ atomic_t sd_quota_count; /* # quotas on sd_quota_list */
++ atomic_t sd_quota_od_count; /* # quotas waiting for sync to
++ special on-disk quota file */
+
-+ struct gfs_inode *sd_qinode;
++ struct gfs_inode *sd_qinode; /* Special on-disk quota file */
+
-+ uint64_t sd_quota_sync_gen;
-+ unsigned long sd_quota_sync_time;
++ uint64_t sd_quota_sync_gen; /* Generation, incr when sync to file */
++ unsigned long sd_quota_sync_time; /* Jiffies, last sync to quota file */
+
-+ /* license crap */
++ /* License crap */
+
+ struct gfs_inode *sd_linode;
+
-+ /* Recovery stuff */
++ /* Recovery stuff */
+
+ struct list_head sd_dirty_j;
+ spinlock_t sd_dirty_j_lock;
+ unsigned int sd_recovery_skips;
+ unsigned int sd_recovery_sames;
+
-+ /* Counters */
++ /* Counters */
+
+ atomic_t sd_glock_count;
+ atomic_t sd_glock_held_count;
+
+ char sd_fsname[256];
+
-+ /* Debugging crud */
++ /* Debugging crud */
+
+ unsigned long sd_last_readdirplus;
+ unsigned long sd_last_unlocked_aop;
+
+ spinlock_t sd_ail_lock;
+ struct list_head sd_recovery_bufs;
++
++ struct list_head sd_list;
+};
+
+#endif /* __INCORE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/inode.c linux/fs/gfs/inode.c
---- linux-2.6.9-rc1-mm3/fs/gfs/inode.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/inode.c 2004-09-07 16:26:15.749550827 -0500
-@@ -0,0 +1,2001 @@
+diff -urN linux-orig/fs/gfs/inode.c linux-patched/fs/gfs/inode.c
+--- linux-orig/fs/gfs/inode.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/inode.c 2004-10-27 15:27:10.881630529 -0500
+@@ -0,0 +1,2053 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <asm/semaphore.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
-+#include <linux/xattr_acl.h>
++#include <linux/posix_acl.h>
+
+#include "gfs.h"
+#include "acl.h"
+ */
+
+static void
-+inode_attr_in(struct gfs_inode *ip, struct inode *ino)
++inode_attr_in(struct gfs_inode *ip, struct inode *inode)
+{
+ unsigned int mode;
+
-+ ino->i_ino = ip->i_num.no_formal_ino;
++ inode->i_ino = ip->i_num.no_formal_ino;
+
+ switch (ip->i_di.di_type) {
+ case GFS_FILE_REG:
+ mode = S_IFREG;
-+ ino->i_rdev = 0;
++ inode->i_rdev = 0;
+ break;
+ case GFS_FILE_DIR:
+ mode = S_IFDIR;
-+ ino->i_rdev = 0;
++ inode->i_rdev = 0;
+ break;
+ case GFS_FILE_LNK:
+ mode = S_IFLNK;
-+ ino->i_rdev = 0;
++ inode->i_rdev = 0;
+ break;
+ case GFS_FILE_BLK:
+ mode = S_IFBLK;
-+ ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
++ inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
+ break;
+ case GFS_FILE_CHR:
+ mode = S_IFCHR;
-+ ino->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
++ inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
+ break;
+ case GFS_FILE_FIFO:
+ mode = S_IFIFO;
-+ ino->i_rdev = 0;
++ inode->i_rdev = 0;
+ break;
+ case GFS_FILE_SOCK:
+ mode = S_IFSOCK;
-+ ino->i_rdev = 0;
++ inode->i_rdev = 0;
+ break;
+ default:
+ GFS_ASSERT_INODE(FALSE, ip,
+ break;
+ };
+
-+ ino->i_mode = mode | (ip->i_di.di_mode & S_IALLUGO);
-+ ino->i_nlink = ip->i_di.di_nlink;
-+ ino->i_uid = ip->i_di.di_uid;
-+ ino->i_gid = ip->i_di.di_gid;
-+ i_size_write(ino, ip->i_di.di_size);
-+ ino->i_atime.tv_sec = ip->i_di.di_atime;
-+ ino->i_mtime.tv_sec = ip->i_di.di_mtime;
-+ ino->i_ctime.tv_sec = ip->i_di.di_ctime;
-+ ino->i_atime.tv_nsec = ino->i_mtime.tv_nsec = ino->i_ctime.tv_nsec = 0;
-+ ino->i_blksize = PAGE_SIZE;
-+ ino->i_blocks = ip->i_di.di_blocks <<
++ inode->i_mode = mode | (ip->i_di.di_mode & S_IALLUGO);
++ inode->i_nlink = ip->i_di.di_nlink;
++ inode->i_uid = ip->i_di.di_uid;
++ inode->i_gid = ip->i_di.di_gid;
++ i_size_write(inode, ip->i_di.di_size);
++ inode->i_atime.tv_sec = ip->i_di.di_atime;
++ inode->i_mtime.tv_sec = ip->i_di.di_mtime;
++ inode->i_ctime.tv_sec = ip->i_di.di_ctime;
++ inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
++ inode->i_blksize = PAGE_SIZE;
++ inode->i_blocks = ip->i_di.di_blocks <<
+ (ip->i_sbd->sd_sb.sb_bsize_shift - GFS_BASIC_BLOCK_SHIFT);
-+ ino->i_generation = ip->i_di.di_header.mh_incarn;
++ inode->i_generation = ip->i_di.di_header.mh_incarn;
++
++ if (ip->i_di.di_flags & GFS_DIF_IMMUTABLE)
++ inode->i_flags |= S_IMMUTABLE;
++ else
++ inode->i_flags &= ~S_IMMUTABLE;
++
++ if (ip->i_di.di_flags & GFS_DIF_APPENDONLY)
++ inode->i_flags |= S_APPEND;
++ else
++ inode->i_flags &= ~S_APPEND;
+}
+
+/**
+void
+gfs_inode_attr_out(struct gfs_inode *ip)
+{
-+ struct inode *inode;
++ struct inode *inode = ip->i_vnode;
+
-+ inode = gfs_iget(ip, NO_CREATE);
-+ if (inode) {
-+ ip->i_di.di_mode = inode->i_mode & S_IALLUGO;
-+ ip->i_di.di_uid = inode->i_uid;
-+ ip->i_di.di_gid = inode->i_gid;
-+ ip->i_di.di_atime = inode->i_atime.tv_sec;
-+ ip->i_di.di_mtime = inode->i_mtime.tv_sec;
-+ ip->i_di.di_ctime = inode->i_ctime.tv_sec;
-+ iput(inode);
-+ }
++ ip->i_di.di_mode = inode->i_mode & S_IALLUGO;
++ ip->i_di.di_uid = inode->i_uid;
++ ip->i_di.di_gid = inode->i_gid;
++ ip->i_di.di_atime = inode->i_atime.tv_sec;
++ ip->i_di.di_mtime = inode->i_mtime.tv_sec;
++ ip->i_di.di_ctime = inode->i_ctime.tv_sec;
+}
+
+/**
+
+static int
+inode_create(struct gfs_glock *i_gl, struct gfs_inum *inum,
-+ struct gfs_glock *io_gl, unsigned int io_state,
-+ struct gfs_inode **ipp)
++ struct gfs_glock *io_gl, unsigned int io_state,
++ struct gfs_inode **ipp)
+{
+ struct gfs_sbd *sdp = i_gl->gl_sbd;
+ struct gfs_inode *ip;
+
+ spin_lock_init(&ip->i_lock);
+
++ ip->i_greedy = sdp->sd_tune.gt_greedy_default;
++
+ error = gfs_glock_nq_init(io_gl,
+ io_state, GL_LOCAL_EXCL | GL_EXACT,
+ &ip->i_iopen_gh);
+ CREATE, &io_gl);
+ if (!error) {
+ error = inode_create(i_gl, inum, io_gl,
-+ LM_ST_SHARED, ipp);
++ LM_ST_SHARED, ipp);
+ gfs_glock_put(io_gl);
+ }
+ }
+void
+gfs_inode_hold(struct gfs_inode *ip)
+{
-+ GFS_ASSERT_INODE(atomic_read(&ip->i_count), ip,);
++ GFS_ASSERT_INODE(atomic_read(&ip->i_count) > 0, ip,);
+ atomic_inc(&ip->i_count);
+}
+
+dinode_dealloc(struct gfs_inode *ip)
+{
+ struct gfs_sbd *sdp = ip->i_sbd;
++ struct gfs_alloc *al;
+ struct gfs_rgrpd *rgd;
-+ struct gfs_holder ri_gh, rgd_gh;
+ int error;
+
-+ gfs_alloc_get(ip);
++ al = gfs_alloc_get(ip);
+
+ error = gfs_quota_hold_m(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+ if (error)
-+ goto fail;
++ goto out;
+
-+ error = gfs_rindex_hold(sdp, &ri_gh);
++ error = gfs_rindex_hold(sdp, &al->al_ri_gh);
+ if (error)
-+ goto fail_qs;
++ goto out_qs;
+
+ rgd = gfs_blk2rgrpd(sdp, ip->i_num.no_addr);
+ GFS_ASSERT_INODE(rgd, ip,);
+
-+ error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
++ error = gfs_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &al->al_rgd_gh);
+ if (error)
-+ goto fail_rindex_relse;
++ goto out_rindex_relse;
+
+ GFS_ASSERT_INODE(ip->i_di.di_blocks == 1, ip,
+ gfs_dinode_print(&ip->i_di););
+
+ error = gfs_trans_begin(sdp, 3, 2);
+ if (error)
-+ goto fail_rg_gunlock;
++ goto out_rg_gunlock;
+
+ error = dinode_mark_unused(ip);
+ if (error)
-+ goto fail_end_trans;
++ goto out_end_trans;
+
+ gfs_difree(rgd, ip);
+
+ gfs_trans_add_unlinked(sdp, GFS_LOG_DESC_IDA, &ip->i_num);
+ clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
+
++ out_end_trans:
+ gfs_trans_end(sdp);
+
-+ gfs_glock_dq_uninit(&rgd_gh);
-+ gfs_glock_dq_uninit(&ri_gh);
-+
-+ gfs_quota_unhold_m(ip);
-+ gfs_alloc_put(ip);
-+
-+ return 0;
-+
-+ fail_end_trans:
-+ gfs_trans_end(sdp);
-+
-+ fail_rg_gunlock:
-+ gfs_glock_dq_uninit(&rgd_gh);
++ out_rg_gunlock:
++ gfs_glock_dq_uninit(&al->al_rgd_gh);
+
-+ fail_rindex_relse:
-+ gfs_glock_dq_uninit(&ri_gh);
++ out_rindex_relse:
++ gfs_glock_dq_uninit(&al->al_ri_gh);
+
-+ fail_qs:
++ out_qs:
+ gfs_quota_unhold_m(ip);
+
-+ fail:
++ out:
+ gfs_alloc_put(ip);
+
+ return error;
+ * @is_root: If TRUE, ignore the caller's permissions
+ * @i_gh: An uninitialized holder for the new inode glock
+ *
++ * There will always be a vnode for the d_gh inode unless @is_root
++ * is true.
++ *
+ * Returns: 0 on success, -EXXXX on failure
+ */
+
+ return error;
+
+ if (!is_root) {
-+ struct inode *dir = gfs_iget(dip, NO_CREATE);
-+ if (dir) {
-+ error = permission(dir, MAY_EXEC, NULL);
-+ iput(dir);
-+ if (error) {
-+ gfs_glock_dq(d_gh);
-+ return error;
-+ }
++ error = permission(dip->i_vnode, MAY_EXEC, NULL);
++ if (error) {
++ gfs_glock_dq(d_gh);
++ return error;
+ }
+ }
+
+ }
+
+ if (!is_root) {
-+ struct inode *dir = gfs_iget(dip, NO_CREATE);
-+ if (dir) {
-+ error = permission(dir, MAY_EXEC, NULL);
-+ iput(dir);
-+ if (error) {
-+ gfs_glock_dq(d_gh);
-+ gfs_glock_dq_uninit(i_gh);
-+ goto out;
-+ }
++ error = permission(dip->i_vnode, MAY_EXEC, NULL);
++ if (error) {
++ gfs_glock_dq(d_gh);
++ gfs_glock_dq_uninit(i_gh);
++ goto out;
+ }
+ }
+
+{
+ int error;
+
-+ {
-+ struct inode *dir = gfs_iget(dip, NO_CREATE);
-+ if (dir) {
-+ error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
-+ iput(dir);
-+ if (error)
-+ return error;
-+ }
-+ }
++ error = permission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
++ if (error)
++ return error;
+
+ /* Don't create entries in an unlinked directory */
+
+static int
+inode_init_and_link(struct gfs_inode *dip, struct qstr *name,
+ struct gfs_inum *inum, struct gfs_glock *gl,
-+ unsigned int type, unsigned int mode)
++ unsigned int type, mode_t mode)
+{
+ struct gfs_sbd *sdp = dip->i_sbd;
-+ struct posix_acl *acl = NULL;
+ struct gfs_alloc *al;
+ struct gfs_inode *ip;
+ unsigned int uid, gid;
+ int alloc_required;
++ void *acl_a_data = NULL, *acl_d_data = NULL;
++ unsigned int acl_size = 0, acl_blocks = 0;
+ int error;
+
+ if (sdp->sd_args.ar_suiddir &&
+ } else
+ gid = current->fsgid;
+
-+ error = gfs_setup_new_acl(dip, type, &mode, &acl);
++ error = gfs_acl_new_prep(dip, type, &mode,
++ &acl_a_data, &acl_d_data,
++ &acl_size, &acl_blocks);
+ if (error)
+ return error;
+
+ if (error)
+ goto fail_gunlock_q;
+
-+ if (acl)
++ if (acl_blocks)
+ alloc_required = TRUE;
+ else {
+ error = gfs_diradd_alloc_required(dip, name, &alloc_required);
+ if (error)
+ goto fail_gunlock_q;
+
-+ al->al_requested_meta = sdp->sd_max_dirres + GFS_MAX_EA_ACL_BLKS;
++ al->al_requested_meta = sdp->sd_max_dirres + acl_blocks;
+
+ error = gfs_inplace_reserve(dip);
+ if (error)
+ one block for an unlinked tag. */
+
+ error = gfs_trans_begin(sdp,
-+ 2 + sdp->sd_max_dirres +
-+ al->al_rgd->rd_ri.ri_length +
-+ GFS_MAX_EA_ACL_BLKS, 2);
++ 2 + sdp->sd_max_dirres + acl_blocks +
++ al->al_rgd->rd_ri.ri_length, 2);
+ if (error)
+ goto fail_inplace;
+ } else {
++ error = gfs_rindex_hold(sdp, &al->al_ri_gh);
++ if (error)
++ goto fail_gunlock_q;
++
+ /* Trans may require:
+ blocks for two dinodes, a leaf block,
+ and one block for a quota change and
+ &(struct gfs_inum){0, inum->no_addr});
+ gfs_trans_add_quota(sdp, +1, uid, gid);
+
-+ /* Gfs_inode_get() can't fail here. But then again, it shouldn't be
-+ here (it should be in gfs_createi()). Gfs_init_acl() has no
-+ business needing a memory-resident inode. */
-+
++ /* gfs_inode_get() can't fail here. */
+ gfs_inode_get(gl, inum, CREATE, &ip);
+
-+ if (acl) {
-+ error = gfs_init_acl(dip, ip, type, acl);
-+ GFS_ASSERT(!error, ); /* Sigh. */
-+ }
++ if (acl_blocks)
++ error = gfs_acl_new_init(dip, ip,
++ acl_a_data, acl_d_data,
++ acl_size);
+
-+ return 0;
++ if (!alloc_required)
++ gfs_glock_dq_uninit(&al->al_ri_gh);
++
++ return error;
+
+ fail_end_trans:
+ gfs_trans_end(sdp);
+ fail_inplace:
+ if (alloc_required)
+ gfs_inplace_release(dip);
++ else
++ gfs_glock_dq_uninit(&al->al_ri_gh);
+
+ fail_gunlock_q:
+ gfs_quota_unlock_m(dip);
+
+ fail:
+ gfs_alloc_put(dip);
-+ if (acl)
-+ posix_acl_release(acl);
++ if (acl_a_data)
++ kfree(acl_a_data);
++ else if (acl_d_data)
++ kfree(acl_d_data);
+
+ return error;
+}
+}
+
+/*
-+ * gfs_revalidate - check to see that a inode is still in a directory
++ * gfs_unlink_ok - check to see that a inode is still in a directory
+ * @dip: the directory
+ * @name: the name of the file
+ * @ip: the inode
+ *
+ * Assumes that the lock on (at least) @dip is held.
+ *
-+ * Returns: 0 if the parent/child relationship is correct, -ENOENT if it isn't
++ * Returns: 0 if the parent/child relationship is correct, errno if it isn't
+ */
+
+int
-+gfs_revalidate(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
++gfs_unlink_ok(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip)
+{
+ struct gfs_inum inum;
+ unsigned int type;
+ int error;
+
++ if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
++ return -EPERM;
++
++ if ((dip->i_di.di_mode & S_ISVTX) &&
++ dip->i_di.di_uid != current->fsuid &&
++ ip->i_di.di_uid != current->fsuid &&
++ !capable(CAP_FOWNER))
++ return -EPERM;
++
++ if (IS_APPEND(dip->i_vnode))
++ return -EPERM;
++
++ error = permission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
++ if (error)
++ return error;
++
+ error = gfs_dir_search(dip, name, &inum, &type);
-+ if (!error) {
-+ if (inum.no_formal_ino == ip->i_num.no_formal_ino)
-+ GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
-+ else
-+ error = -ENOENT;
-+ }
++ if (error)
++ return error;
+
-+ return error;
++ if (inum.no_formal_ino != ip->i_num.no_formal_ino)
++ return -ENOENT;
++
++ GFS_ASSERT_INODE(ip->i_di.di_type == type, ip,);
++
++ return 0;
+}
+
+/*
+}
+
+/**
++ * gfs_setattr_simple -
++ * @ip:
++ * @attr:
++ *
++ * Called with a reference on the vnode.
++ *
++ * Returns: errno
++ */
++
++int
++gfs_setattr_simple(struct gfs_inode *ip, struct iattr *attr)
++{
++ struct buffer_head *dibh;
++ int error;
++
++ /* Trans may require:
++ one dinode block. */
++
++ error = gfs_trans_begin(ip->i_sbd, 1, 0);
++ if (error)
++ return error;
++
++ error = gfs_get_inode_buffer(ip, &dibh);
++ if (!error) {
++ inode_setattr(ip->i_vnode, attr);
++ gfs_inode_attr_out(ip);
++
++ gfs_trans_add_bh(ip->i_gl, dibh);
++ gfs_dinode_out(&ip->i_di, dibh->b_data);
++ brelse(dibh);
++ }
++
++ gfs_trans_end(ip->i_sbd);
++
++ return error;
++}
++
++/**
+ * iah_make_jdata -
+ * @gl:
+ * @inum:
+{
+ return inode_alloc_hidden(sdp, &sdp->sd_sb.sb_license_di);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/inode.h linux/fs/gfs/inode.h
---- linux-2.6.9-rc1-mm3/fs/gfs/inode.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/inode.h 2004-09-07 16:26:15.750550606 -0500
-@@ -0,0 +1,68 @@
+diff -urN linux-orig/fs/gfs/inode.h linux-patched/fs/gfs/inode.h
+--- linux-orig/fs/gfs/inode.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/inode.h 2004-10-27 15:27:10.881630529 -0500
+@@ -0,0 +1,70 @@
+/******************************************************************************
+*******************************************************************************
+**
+ struct gfs_holder *i_gh);
+int gfs_unlinki(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
+int gfs_rmdiri(struct gfs_inode *dip, struct qstr *name, struct gfs_inode *ip);
-+int gfs_revalidate(struct gfs_inode *dip, struct qstr *name,
-+ struct gfs_inode *ip);
++int gfs_unlink_ok(struct gfs_inode *dip, struct qstr *name,
++ struct gfs_inode *ip);
+int gfs_ok_to_move(struct gfs_inode *this, struct gfs_inode *to);
+int gfs_readlinki(struct gfs_inode *ip, char **buf, unsigned int *len);
+
+
+void gfs_try_toss_vnode(struct gfs_inode *ip);
+
++int gfs_setattr_simple(struct gfs_inode *ip, struct iattr *attr);
++
+/* Backwards compatibility functions */
+
+int gfs_alloc_qinode(struct gfs_sbd *sdp);
+}
+
+#endif /* __INODE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ioctl.c linux/fs/gfs/ioctl.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ioctl.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ioctl.c 2004-09-07 16:26:15.751550386 -0500
-@@ -0,0 +1,983 @@
+diff -urN linux-orig/fs/gfs/ioctl.c linux-patched/fs/gfs/ioctl.c
+--- linux-orig/fs/gfs/ioctl.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ioctl.c 2004-10-27 15:27:10.881630529 -0500
+@@ -0,0 +1,974 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+ case GFS_DIF_IMMUTABLE:
+ case GFS_DIF_APPENDONLY:
++ /* The IMMUTABLE and APPENDONLY flags can only be changed by
++ the relevant capability. */
++ if (((ip->i_di.di_flags ^ flag) & (GFS_DIF_IMMUTABLE | GFS_DIF_APPENDONLY)) &&
++ !capable(CAP_LINUX_IMMUTABLE)) {
++ error = -EPERM;
++ goto out;
++ }
++ break;
++
+ case GFS_DIF_NOATIME:
+ case GFS_DIF_SYNC:
+ /* FixMe!!! */
+ error = stat_gfs_ioctl(sdp, arg);
+ break;
+
-+ case GFS_FREEZE:
-+ if (capable(CAP_SYS_ADMIN))
-+ error = gfs_freeze_fs(sdp);
-+ else
-+ error = -EACCES;
-+ break;
-+
-+ case GFS_UNFREEZE:
-+ if (capable(CAP_SYS_ADMIN))
-+ gfs_unfreeze_fs(sdp);
-+ else
-+ error = -EACCES;
-+ break;
-+
+ case GFS_RECLAIM_METADATA:
+ if (capable(CAP_SYS_ADMIN))
+ error = reclaim_ioctl(sdp, arg);
+ error = -EACCES;
+ break;
+
-+ case GFS_EATTR_GET:
-+ /* Permissions handled later */
-+ error = gfs_get_eattr_ioctl(sdp, ip, arg);
-+ break;
-+
-+ case GFS_EATTR_SET:
-+ /* Permissions handled later */
-+ error = gfs_set_eattr_ioctl(sdp, ip, arg);
++ case GFS_WHERE_ARE_YOU: {
++ unsigned int x = GFS_MAGIC;
++ if (copy_to_user(arg, &x, sizeof(unsigned int)))
++ error = -EFAULT;
+ break;
++ }
+
-+ case GFS_WHERE_ARE_YOU:
-+ {
-+ unsigned int x = GFS_MAGIC;
-+ if (copy_to_user(arg, &x, sizeof(unsigned int)))
-+ error = -EFAULT;
-+ }
++ case GFS_COOKIE: {
++ unsigned long x = (unsigned long)sdp;
++ if (copy_to_user(arg, &x, sizeof(unsigned long)))
++ error = -EFAULT;
+ break;
++ }
+
+ case GFS_SET_FLAG:
+ case GFS_CLEAR_FLAG:
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ioctl.h linux/fs/gfs/ioctl.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ioctl.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ioctl.h 2004-09-07 16:26:15.751550386 -0500
+diff -urN linux-orig/fs/gfs/ioctl.h linux-patched/fs/gfs/ioctl.h
+--- linux-orig/fs/gfs/ioctl.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ioctl.h 2004-10-27 15:27:10.882630297 -0500
@@ -0,0 +1,21 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_ioctli(struct gfs_inode *ip, unsigned int cmd, void *arg);
+
+#endif /* __IOCTL_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/locking.c linux/fs/gfs/locking.c
---- linux-2.6.9-rc1-mm3/fs/gfs/locking.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/locking.c 2004-09-07 16:26:15.752550165 -0500
-@@ -0,0 +1,114 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/smp_lock.h>
-+#include <linux/spinlock.h>
-+#include <asm/semaphore.h>
-+#include <linux/completion.h>
-+#include <linux/buffer_head.h>
-+
-+#include "gfs.h"
-+#include "dio.h"
-+#include "glock.h"
-+#include "locking.h"
-+#include "super.h"
-+
-+/**
-+ * gfs_mount_lockproto - mount a locking protocol
-+ * @sdp: the filesystem
-+ * @args: mount arguements
-+ * @silent: if TRUE, don't complain if the FS isn't a GFS fs
-+ *
-+ * Returns: 0 on success, -EXXX on failure
-+ */
-+
-+int
-+gfs_mount_lockproto(struct gfs_sbd *sdp, int silent)
-+{
-+ struct gfs_sb *sb = NULL;
-+ struct buffer_head *bh;
-+ char *proto, *table, *p = NULL;
-+ int error = 0;
-+
-+ proto = sdp->sd_args.ar_lockproto;
-+ table = sdp->sd_args.ar_locktable;
-+
-+ /* Try to autodetect */
-+
-+ if (!proto[0] || !table[0]) {
-+ error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, NULL,
-+ DIO_FORCE | DIO_START | DIO_WAIT, &bh);
-+ if (error)
-+ goto out;
-+
-+ sb = gmalloc(sizeof(struct gfs_sb));
-+ gfs_sb_in(sb, bh->b_data);
-+ brelse(bh);
-+
-+ error = gfs_check_sb(sdp, sb, silent);
-+ if (error)
-+ goto out;
-+
-+ if (!proto[0])
-+ proto = sb->sb_lockproto;
-+
-+ if (!table[0])
-+ table = sb->sb_locktable;
-+ }
-+
-+ error = lm_mount(proto, table, sdp->sd_args.ar_hostdata,
-+ gfs_glock_cb, sdp,
-+ GFS_MIN_LVB_SIZE, &sdp->sd_lockstruct);
-+ if (error) {
-+ printk("GFS: can't mount proto = %s, table = %s, hostdata = %s\n",
-+ proto, table, sdp->sd_args.ar_hostdata);
-+ goto out;
-+ }
-+
-+ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lockspace, sdp,);
-+ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_ops, sdp,);
-+ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lvb_size >= GFS_MIN_LVB_SIZE,
-+ sdp,);
-+
-+ if (!*table) {
-+ table = p = gmalloc(sizeof(sdp->sd_vfs->s_id) + 1);
-+ strncpy(table, sdp->sd_vfs->s_id, sizeof(sdp->sd_vfs->s_id));
-+ table[sizeof(sdp->sd_vfs->s_id)] = 0;
-+ }
-+
-+ snprintf(sdp->sd_fsname, 256, "%s.%u", table,
-+ sdp->sd_lockstruct.ls_jid);
-+
-+ if (p)
-+ kfree(p);
-+
-+ out:
-+ if (sb)
-+ kfree(sb);
-+
-+ return error;
-+}
-+
-+/**
-+ * gfs_unmount_lockproto - Unmount lock protocol
-+ * @sdp: The GFS superblock
-+ *
-+ */
-+
-+void
-+gfs_unmount_lockproto(struct gfs_sbd *sdp)
-+{
-+ lm_unmount(&sdp->sd_lockstruct);
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/locking.h linux/fs/gfs/locking.h
---- linux-2.6.9-rc1-mm3/fs/gfs/locking.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/locking.h 2004-09-07 16:26:15.752550165 -0500
-@@ -0,0 +1,20 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#ifndef __LOCKING_DOT_H__
-+#define __LOCKING_DOT_H__
-+
-+int gfs_mount_lockproto(struct gfs_sbd *sdp, int silent);
-+void gfs_unmount_lockproto(struct gfs_sbd *sdp);
-+
-+#endif /* __LOCKING_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/log.c linux/fs/gfs/log.c
---- linux-2.6.9-rc1-mm3/fs/gfs/log.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/log.c 2004-09-07 16:26:15.753549945 -0500
-@@ -0,0 +1,1318 @@
+diff -urN linux-orig/fs/gfs/log.c linux-patched/fs/gfs/log.c
+--- linux-orig/fs/gfs/log.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/log.c 2004-10-27 15:27:10.882630297 -0500
+@@ -0,0 +1,1319 @@
+/******************************************************************************
+*******************************************************************************
+**
+/**
+ * gfs_ail_start - Start I/O on the AIL
+ * @sdp: the filesystem
-+ * @flags:
++ * @flags: DIO_ALL -- flush *all* AIL transactions to disk
++ * default -- flush first-on-list AIL transaction to disk
+ *
+ */
+
+ LO_CLEAN_DUMP(sdp, le);
+ }
+
-+ /* If there isn't anything the AIL, we won't get back the log
++ /* If there isn't anything in the AIL, we won't get back the log
+ space we reserved unless we do it ourselves. */
+
+ if (list_empty(&sdp->sd_log_ail)) {
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/log.h linux/fs/gfs/log.h
---- linux-2.6.9-rc1-mm3/fs/gfs/log.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/log.h 2004-09-07 16:26:15.754549724 -0500
+diff -urN linux-orig/fs/gfs/log.h linux-patched/fs/gfs/log.h
+--- linux-orig/fs/gfs/log.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/log.h 2004-10-27 15:27:10.882630297 -0500
@@ -0,0 +1,79 @@
+/******************************************************************************
+*******************************************************************************
+ struct buffer_head *unlock);
+
+#endif /* __LOG_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/lops.c linux/fs/gfs/lops.c
---- linux-2.6.9-rc1-mm3/fs/gfs/lops.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/lops.c 2004-09-07 16:26:15.776544872 -0500
-@@ -0,0 +1,1563 @@
+diff -urN linux-orig/fs/gfs/lops.c linux-patched/fs/gfs/lops.c
+--- linux-orig/fs/gfs/lops.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/lops.c 2004-10-27 15:27:10.882630297 -0500
+@@ -0,0 +1,1570 @@
+/******************************************************************************
+*******************************************************************************
+**
+ * @blkno: the location of the log's copy of the block
+ *
+ * Returns: 0 on success, -EXXX on failure
++ *
++ * Read in-place block from disk
++ * Read log (journal) block from disk
++ * Compare generation numbers
++ * Copy log block to in-place block on-disk if:
++ * log generation # > in-place generation #
++ * OR generation #s are ==, but data contained in block is different (corrupt)
+ */
+
+static int
+ &gfs_quota_lops,
+ NULL
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/lops.h linux/fs/gfs/lops.h
---- linux-2.6.9-rc1-mm3/fs/gfs/lops.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/lops.h 2004-09-07 16:26:15.777544652 -0500
+diff -urN linux-orig/fs/gfs/lops.h linux-patched/fs/gfs/lops.h
+--- linux-orig/fs/gfs/lops.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/lops.h 2004-10-27 15:27:10.882630297 -0500
@@ -0,0 +1,179 @@
+/******************************************************************************
+*******************************************************************************
+while (0)
+
+#endif /* __LOPS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/lvb.c linux/fs/gfs/lvb.c
---- linux-2.6.9-rc1-mm3/fs/gfs/lvb.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/lvb.c 2004-09-07 16:26:15.777544652 -0500
+diff -urN linux-orig/fs/gfs/lvb.c linux-patched/fs/gfs/lvb.c
+--- linux-orig/fs/gfs/lvb.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/lvb.c 2004-10-27 15:27:10.882630297 -0500
@@ -0,0 +1,148 @@
+/******************************************************************************
+*******************************************************************************
+ pv(qb, qb_warn, "%"PRIu64);
+ pv(qb, qb_value, "%"PRId64);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/lvb.h linux/fs/gfs/lvb.h
---- linux-2.6.9-rc1-mm3/fs/gfs/lvb.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/lvb.h 2004-09-07 16:26:15.777544652 -0500
-@@ -0,0 +1,48 @@
+diff -urN linux-orig/fs/gfs/lvb.h linux-patched/fs/gfs/lvb.h
+--- linux-orig/fs/gfs/lvb.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/lvb.h 2004-10-27 15:27:10.882630297 -0500
+@@ -0,0 +1,66 @@
+/******************************************************************************
+*******************************************************************************
+**
+*******************************************************************************
+******************************************************************************/
+
++/*
++ * Formats of Lock Value Blocks (LVBs) for various types of locks.
++ * These 32-bit data chunks can be shared quickly between nodes
++ * via the inter-node lock manager (via LAN instead of on-disk).
++ */
++
+#ifndef __LVB_DOT_H__
+#define __LVB_DOT_H__
+
+#define GFS_MIN_LVB_SIZE (32)
+
++/*
++ * Resource Group block allocation statistics
++ * Each resource group lock contains one of these in its LVB.
++ * Used for sharing approximate current statistics for statfs.
++ * Not used for actual block allocation.
++ */
+struct gfs_rgrp_lvb {
-+ uint32_t rb_magic;
-+ uint32_t rb_free;
-+ uint32_t rb_useddi;
-+ uint32_t rb_freedi;
-+ uint32_t rb_usedmeta;
-+ uint32_t rb_freemeta;
++ uint32_t rb_magic; /* GFS_MAGIC sanity check value */
++ uint32_t rb_free; /* # free data blocks */
++ uint32_t rb_useddi; /* # used dinode blocks */
++ uint32_t rb_freedi; /* # free dinode blocks */
++ uint32_t rb_usedmeta; /* # used metadata blocks */
++ uint32_t rb_freemeta; /* # free metadata blocks */
+};
+
++/*
++ * Quota
++ * Each quota lock contains one of these in its LVB.
++ * Keeps track of block allocation limits and current block allocation
++ * for either a cluster-wide user or a cluster-wide group.
++ */
+struct gfs_quota_lvb {
-+ uint32_t qb_magic;
++ uint32_t qb_magic; /* GFS_MAGIC sanity check value */
+ uint32_t qb_pad;
-+ uint64_t qb_limit;
-+ uint64_t qb_warn;
-+ int64_t qb_value;
++ uint64_t qb_limit; /* Hard limit of # blocks to alloc */
++ uint64_t qb_warn; /* Warn user when alloc is above this # */
++ int64_t qb_value; /* Current # blocks allocated */
+};
+
+/* Translation functions */
+void gfs_quota_lvb_print(struct gfs_quota_lvb *qb);
+
+#endif /* __LVB_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/main.c linux/fs/gfs/main.c
---- linux-2.6.9-rc1-mm3/fs/gfs/main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/main.c 2004-09-07 16:26:15.778544431 -0500
-@@ -0,0 +1,142 @@
+diff -urN linux-orig/fs/gfs/main.c linux-patched/fs/gfs/main.c
+--- linux-orig/fs/gfs/main.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/main.c 2004-10-27 15:27:10.883630065 -0500
+@@ -0,0 +1,123 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/init.h>
+
+#include "gfs.h"
-+#include "mount.h"
+#include "ops_fstype.h"
-+
-+struct proc_dir_entry *gfs_proc_entry = NULL;
++#include "proc.h"
+
+/**
+ * init_gfs_fs - Register GFS as a filesystem
+int __init
+init_gfs_fs(void)
+{
-+ int error = 0;
-+
-+ init_MUTEX(&gfs_mount_args_lock);
++ int error;
+
-+ gfs_proc_entry = create_proc_read_entry("fs/gfs", S_IFREG | 0200, NULL, NULL, NULL);
-+ if (!gfs_proc_entry) {
-+ printk("GFS: can't register /proc/fs/gfs\n");
-+ error = -EINVAL;
-+ goto fail;
-+ }
-+ gfs_proc_entry->write_proc = gfs_proc_write;
++ gfs_proc_init();
+
+ gfs_random_number = xtime.tv_nsec;
+
+ gfs_glock_cachep = kmem_cache_create("gfs_glock", sizeof(struct gfs_glock),
+ 0, 0,
+ NULL, NULL);
++ gfs_inode_cachep = NULL;
++ gfs_bufdata_cachep = NULL;
++ gfs_mhc_cachep = NULL;
++ error = -ENOMEM;
+ if (!gfs_glock_cachep)
-+ goto fail2;
++ goto fail;
+
+ gfs_inode_cachep = kmem_cache_create("gfs_inode", sizeof(struct gfs_inode),
+ 0, 0,
+ NULL, NULL);
+ if (!gfs_inode_cachep)
-+ goto fail2;
++ goto fail;
+
+ gfs_bufdata_cachep = kmem_cache_create("gfs_bufdata", sizeof(struct gfs_bufdata),
+ 0, 0,
+ NULL, NULL);
+ if (!gfs_bufdata_cachep)
-+ goto fail2;
++ goto fail;
+
+ gfs_mhc_cachep = kmem_cache_create("gfs_meta_header_cache", sizeof(struct gfs_meta_header_cache),
+ 0, 0,
+ NULL, NULL);
+ if (!gfs_mhc_cachep)
-+ goto fail2;
++ goto fail;
+
+ error = register_filesystem(&gfs_fs_type);
+ if (error)
-+ goto fail2;
++ goto fail;
+
+ printk("GFS %s (built %s %s) installed\n",
+ GFS_RELEASE_NAME, __DATE__, __TIME__);
+
+ return 0;
+
-+ fail2:
++ fail:
+ if (gfs_mhc_cachep)
+ kmem_cache_destroy(gfs_mhc_cachep);
+
+ if (gfs_glock_cachep)
+ kmem_cache_destroy(gfs_glock_cachep);
+
-+ down(&gfs_mount_args_lock);
-+ if (gfs_mount_args) {
-+ kfree(gfs_mount_args);
-+ gfs_mount_args = NULL;
-+ }
-+ up(&gfs_mount_args_lock);
-+ remove_proc_entry("fs/gfs", NULL);
++ gfs_proc_uninit();
+
-+ fail:
+ return error;
+}
+
+ kmem_cache_destroy(gfs_inode_cachep);
+ kmem_cache_destroy(gfs_glock_cachep);
+
-+ down(&gfs_mount_args_lock);
-+ if (gfs_mount_args) {
-+ kfree(gfs_mount_args);
-+ gfs_mount_args = NULL;
-+ }
-+ up(&gfs_mount_args_lock);
-+ remove_proc_entry("fs/gfs", NULL);
++ gfs_proc_uninit();
+}
+
+MODULE_DESCRIPTION("Global File System " GFS_RELEASE_NAME);
+module_init(init_gfs_fs);
+module_exit(exit_gfs_fs);
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/mount.c linux/fs/gfs/mount.c
---- linux-2.6.9-rc1-mm3/fs/gfs/mount.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/mount.c 2004-09-07 16:26:15.778544431 -0500
-@@ -0,0 +1,215 @@
+diff -urN linux-orig/fs/gfs/mount.c linux-patched/fs/gfs/mount.c
+--- linux-orig/fs/gfs/mount.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/mount.c 2004-10-27 15:27:10.883630065 -0500
+@@ -0,0 +1,239 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <asm/semaphore.h>
+#include <linux/completion.h>
+#include <linux/buffer_head.h>
-+#include <linux/module.h>
-+#include <asm/uaccess.h>
+
+#include "gfs.h"
++#include "dio.h"
++#include "glock.h"
+#include "mount.h"
-+
-+char *gfs_mount_args = NULL;
-+struct semaphore gfs_mount_args_lock;
++#include "proc.h"
++#include "super.h"
+
+/**
+ * gfs_make_args - Parse mount arguments
+ */
+
+int
-+gfs_make_args(char *data, struct gfs_args *args)
++gfs_make_args(char *data_arg, struct gfs_args *args)
+{
++ char *data = data_arg;
+ char *options, *x, *y;
-+ int do_free = FALSE;
+ int error = 0;
+
+ /* If someone preloaded options, use those instead */
+
-+ down(&gfs_mount_args_lock);
-+ if (gfs_mount_args) {
-+ data = gfs_mount_args;
-+ gfs_mount_args = NULL;
-+ do_free = TRUE;
++ spin_lock(&gfs_proc_margs_lock);
++ if (gfs_proc_margs) {
++ data = gfs_proc_margs;
++ gfs_proc_margs = NULL;
+ }
-+ up(&gfs_mount_args_lock);
++ spin_unlock(&gfs_proc_margs_lock);
+
+ /* Set some defaults */
+
+ }
+
+ else if (!strcmp(x, "acl"))
-+ args->ar_posixacls = TRUE;
++ args->ar_posix_acls = TRUE;
+
+ else if (!strcmp(x, "suiddir"))
+ args->ar_suiddir = TRUE;
+ if (error)
+ printk("GFS: invalid mount option(s)\n");
+
-+ if (do_free)
++ if (data != data_arg)
+ kfree(data);
+
+ return error;
+}
+
+/**
-+ * gfs_proc_write - Read in some mount options
-+ * @file: unused
-+ * @buffer: a buffer of mount options
-+ * @count: the length of the mount options
-+ * @data: unused
-+ *
-+ * Called when someone writes to /proc/fs/gfs.
-+ * It allows you to specify mount options when you can't do it
-+ * from mount. i.e. from a inital ramdisk
++ * gfs_mount_lockproto - mount a locking protocol
++ * @sdp: the filesystem
++ * @args: mount arguements
++ * @silent: if TRUE, don't complain if the FS isn't a GFS fs
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
+int
-+gfs_proc_write(struct file *file,
-+ const char *buffer, unsigned long count,
-+ void *data)
++gfs_mount_lockproto(struct gfs_sbd *sdp, int silent)
+{
++ struct gfs_sb *sb = NULL;
++ char *proto, *table;
+ int error;
-+ char *p;
+
-+ if (!try_module_get(THIS_MODULE))
-+ return -EAGAIN; /* Huh!?! */
-+ down(&gfs_mount_args_lock);
++ proto = sdp->sd_args.ar_lockproto;
++ table = sdp->sd_args.ar_locktable;
+
-+ if (gfs_mount_args) {
-+ kfree(gfs_mount_args);
-+ gfs_mount_args = NULL;
-+ }
++ /* Try to autodetect */
+
-+ if (!count) {
-+ error = 0;
-+ goto fail;
-+ }
++ if (!proto[0] || !table[0]) {
++ struct buffer_head *bh;
+
-+ gfs_mount_args = gmalloc(count + 1);
++ error = gfs_dread(sdp, GFS_SB_ADDR >> sdp->sd_fsb2bb_shift, NULL,
++ DIO_FORCE | DIO_START | DIO_WAIT, &bh);
++ if (error)
++ return error;
+
-+ error = -EFAULT;
-+ if (copy_from_user(gfs_mount_args, buffer, count))
-+ goto fail_free;
++ sb = kmalloc(sizeof(struct gfs_sb), GFP_KERNEL);
++ if (!sb) {
++ brelse(bh);
++ return -ENOMEM;
++ }
++ gfs_sb_in(sb, bh->b_data);
++ brelse(bh);
+
-+ gfs_mount_args[count] = 0;
++ error = gfs_check_sb(sdp, sb, silent);
++ if (error)
++ goto out;
+
-+ /* Get rid of extra newlines */
++ if (!proto[0])
++ proto = sb->sb_lockproto;
+
-+ for (p = gfs_mount_args; *p; p++)
-+ if (*p == '\n')
-+ *p = 0;
++ if (!table[0])
++ table = sb->sb_locktable;
++ }
+
-+ up(&gfs_mount_args_lock);
-+ module_put(THIS_MODULE);
++ printk("GFS: Trying to join cluster \"%s\", \"%s\"\n",
++ proto, table);
+
-+ return count;
++ error = lm_mount(proto, table, sdp->sd_args.ar_hostdata,
++ gfs_glock_cb, sdp,
++ GFS_MIN_LVB_SIZE, &sdp->sd_lockstruct);
++ if (error) {
++ printk("GFS: can't mount proto = %s, table = %s, hostdata = %s\n",
++ proto, table, sdp->sd_args.ar_hostdata);
++ goto out;
++ }
++
++ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lockspace, sdp,);
++ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_ops, sdp,);
++ GFS_ASSERT_SBD(sdp->sd_lockstruct.ls_lvb_size >= GFS_MIN_LVB_SIZE,
++ sdp,);
+
-+ fail_free:
-+ kfree(gfs_mount_args);
-+ gfs_mount_args = NULL;
++ snprintf(sdp->sd_fsname, 256, "%s.%u",
++ (*table) ? table : sdp->sd_vfs->s_id,
++ sdp->sd_lockstruct.ls_jid);
++
++ printk("GFS: fsid=%s: Joined cluster. Now mounting FS...\n",
++ sdp->sd_fsname);
++
++ out:
++ if (sb)
++ kfree(sb);
+
-+ fail:
-+ up(&gfs_mount_args_lock);
-+ module_put(THIS_MODULE);
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/mount.h linux/fs/gfs/mount.h
---- linux-2.6.9-rc1-mm3/fs/gfs/mount.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/mount.h 2004-09-07 16:26:15.778544431 -0500
-@@ -0,0 +1,27 @@
++
++/**
++ * gfs_unmount_lockproto - Unmount lock protocol
++ * @sdp: The GFS superblock
++ *
++ */
++
++void
++gfs_unmount_lockproto(struct gfs_sbd *sdp)
++{
++ lm_unmount(&sdp->sd_lockstruct);
++}
+diff -urN linux-orig/fs/gfs/mount.h linux-patched/fs/gfs/mount.h
+--- linux-orig/fs/gfs/mount.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/mount.h 2004-10-27 15:27:10.883630065 -0500
+@@ -0,0 +1,22 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+int gfs_make_args(char *data, struct gfs_args *args);
+
-+/* Allow args to be passed to GFS when using an initial ram disk */
-+
-+extern char *gfs_mount_args;
-+extern struct semaphore gfs_mount_args_lock;
-+
-+int gfs_proc_write(struct file *file, const char *buffer,
-+ unsigned long count, void *data);
++int gfs_mount_lockproto(struct gfs_sbd *sdp, int silent);
++void gfs_unmount_lockproto(struct gfs_sbd *sdp);
+
+#endif /* __MOUNT_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ondisk.c linux/fs/gfs/ondisk.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ondisk.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ondisk.c 2004-09-07 16:26:15.779544211 -0500
+diff -urN linux-orig/fs/gfs/ondisk.c linux-patched/fs/gfs/ondisk.c
+--- linux-orig/fs/gfs/ondisk.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ondisk.c 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,28 @@
+/******************************************************************************
+*******************************************************************************
+#define WANT_GFS_CONVERSION_FUNCTIONS
+#include <linux/gfs_ondisk.h>
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_address.c linux/fs/gfs/ops_address.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_address.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_address.c 2004-09-07 16:26:15.779544211 -0500
+diff -urN linux-orig/fs/gfs/ops_address.c linux-patched/fs/gfs/ops_address.c
+--- linux-orig/fs/gfs/ops_address.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_address.c 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,476 @@
+/******************************************************************************
+*******************************************************************************
+
+ atomic_inc(&ip->i_sbd->sd_ops_address);
+
-+ GFS_ASSERT_INODE(gfs_glock_is_held_excl(ip->i_gl) &&
-+ !gfs_is_stuffed(ip), ip,);
++ GFS_ASSERT_INODE(gfs_glock_is_held_excl(ip->i_gl), ip,);
++ GFS_ASSERT_INODE(!gfs_is_stuffed(ip), ip,);
+
+ error = block_write_full_page(page, get_block_noalloc, wbc);
+
+ .bmap = gfs_bmap,
+ .direct_IO = gfs_direct_IO,
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_address.h linux/fs/gfs/ops_address.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_address.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_address.h 2004-09-07 16:26:15.780543990 -0500
+diff -urN linux-orig/fs/gfs/ops_address.h linux-patched/fs/gfs/ops_address.h
+--- linux-orig/fs/gfs/ops_address.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_address.h 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,19 @@
+/******************************************************************************
+*******************************************************************************
+extern struct address_space_operations gfs_file_aops;
+
+#endif /* __OPS_ADDRESS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_dentry.c linux/fs/gfs/ops_dentry.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_dentry.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_dentry.c 2004-09-07 16:26:15.780543990 -0500
+diff -urN linux-orig/fs/gfs/ops_dentry.c linux-patched/fs/gfs/ops_dentry.c
+--- linux-orig/fs/gfs/ops_dentry.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_dentry.c 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,124 @@
+/******************************************************************************
+*******************************************************************************
+struct dentry_operations gfs_dops = {
+ .d_revalidate = gfs_drevalidate,
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_dentry.h linux/fs/gfs/ops_dentry.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_dentry.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_dentry.h 2004-09-07 16:26:15.780543990 -0500
+diff -urN linux-orig/fs/gfs/ops_dentry.h linux-patched/fs/gfs/ops_dentry.h
+--- linux-orig/fs/gfs/ops_dentry.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_dentry.h 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,19 @@
+/******************************************************************************
+*******************************************************************************
+extern struct dentry_operations gfs_dops;
+
+#endif /* __OPS_DENTRY_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_export.c linux/fs/gfs/ops_export.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_export.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_export.c 2004-09-07 16:26:15.781543770 -0500
+diff -urN linux-orig/fs/gfs/ops_export.c linux-patched/fs/gfs/ops_export.c
+--- linux-orig/fs/gfs/ops_export.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_export.c 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,415 @@
+/******************************************************************************
+*******************************************************************************
+ .get_dentry = gfs_get_dentry,
+};
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_export.h linux/fs/gfs/ops_export.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_export.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_export.h 2004-09-07 16:26:15.781543770 -0500
+diff -urN linux-orig/fs/gfs/ops_export.h linux-patched/fs/gfs/ops_export.h
+--- linux-orig/fs/gfs/ops_export.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_export.h 2004-10-27 15:27:10.883630065 -0500
@@ -0,0 +1,19 @@
+/******************************************************************************
+*******************************************************************************
+extern struct export_operations gfs_export_ops;
+
+#endif /* __OPS_EXPORT_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_file.c linux/fs/gfs/ops_file.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_file.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_file.c 2004-09-07 16:26:39.160387807 -0500
-@@ -0,0 +1,1646 @@
+diff -urN linux-orig/fs/gfs/ops_file.c linux-patched/fs/gfs/ops_file.c
+--- linux-orig/fs/gfs/ops_file.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_file.c 2004-10-27 15:27:10.884629833 -0500
+@@ -0,0 +1,1631 @@
+/******************************************************************************
+*******************************************************************************
+**
+ goto fail_gunlock;
+ }
+
-+ /* If this is an exclusive create, make sure our gfs_create()
-+ says we created the file. The O_EXCL flag isn't passed
-+ to gfs_create(), so we have to check it here. */
-+
-+ if (file->f_flags & O_CREAT) {
-+ if (ip->i_creat_task == current &&
-+ ip->i_creat_pid == current->pid) {
-+ ip->i_creat_task = NULL;
-+ ip->i_creat_pid = 0;
-+ } else if (file->f_flags & O_EXCL) {
-+ error = -EEXIST;
-+ goto fail_gunlock;
-+ }
-+ }
-+
+ /* Listen to the Direct I/O flag */
+
+ if (ip->i_di.di_flags & GFS_DIF_DIRECTIO)
+ .lock = gfs_lock,
+ .flock = gfs_flock,
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_file.h linux/fs/gfs/ops_file.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_file.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_file.h 2004-09-07 16:26:15.783543329 -0500
+diff -urN linux-orig/fs/gfs/ops_file.h linux-patched/fs/gfs/ops_file.h
+--- linux-orig/fs/gfs/ops_file.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_file.h 2004-10-27 15:27:10.884629833 -0500
@@ -0,0 +1,20 @@
+/******************************************************************************
+*******************************************************************************
+extern struct file_operations gfs_dir_fops;
+
+#endif /* __OPS_FILE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_fstype.c linux/fs/gfs/ops_fstype.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_fstype.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_fstype.c 2004-09-07 16:26:15.784543108 -0500
-@@ -0,0 +1,607 @@
+diff -urN linux-orig/fs/gfs/ops_fstype.c linux-patched/fs/gfs/ops_fstype.c
+--- linux-orig/fs/gfs/ops_fstype.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_fstype.c 2004-10-27 15:27:10.884629833 -0500
+@@ -0,0 +1,608 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include "glock.h"
+#include "glops.h"
+#include "inode.h"
-+#include "locking.h"
+#include "mount.h"
+#include "ops_export.h"
+#include "ops_fstype.h"
+#include "ops_super.h"
++#include "proc.h"
+#include "quota.h"
+#include "recovery.h"
+#include "rgrp.h"
+ * @data: Mount options
+ * @silent: Don't complain if its not a GFS filesystem
+ *
-+ * Returns: The VFS superblock, or NULL on error
++ * Returns: errno
+ */
+
+static int
+ unsigned int x;
+ int error;
+
-+ error = -ENOMEM;
+ sdp = vmalloc(sizeof(struct gfs_sbd));
-+ if (!sdp)
++ if (!sdp) {
++ printk("GFS: can't alloc struct gfs_sbd\n");
++ error = -ENOMEM;
+ goto fail;
++ }
+
+ memset(sdp, 0, sizeof(struct gfs_sbd));
+
+ sb->s_op = &gfs_super_ops;
+ sb->s_export_op = &gfs_export_ops;
+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
-+ sb->s_maxbytes = ~0ULL;
++ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
-+ if (sdp->sd_args.ar_posixacls)
++ if (sdp->sd_args.ar_posix_acls)
+ sb->s_flags |= MS_POSIXACL;
+
+ /* Set up the buffer cache and fill in some fake values
+ if (error)
+ goto fail_vfree;
+
-+ printk("GFS: fsid=%s: Joined cluster. Now mounting FS...\n",
-+ sdp->sd_fsname);
-+
+ if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
+ !sdp->sd_args.ar_ignore_local_fs) {
+ /* Force local [p|f]locks */
+ if (error)
+ goto fail_inoded;
+
++ gfs_proc_fs_add(sdp);
++
+ gfs_glock_dq_uninit(&mount_gh);
+
+ return 0;
+
-+ fail_inoded:
++ fail_inoded:
+ down(&sdp->sd_thread_lock);
+ clear_bit(SDF_INODED_RUN, &sdp->sd_flags);
+ wake_up_process(sdp->sd_inoded_process);
+ up(&sdp->sd_thread_lock);
+ wait_for_completion(&sdp->sd_thread_completion);
+
-+ fail_quotad:
++ fail_quotad:
+ down(&sdp->sd_thread_lock);
+ clear_bit(SDF_QUOTAD_RUN, &sdp->sd_flags);
+ wake_up_process(sdp->sd_quotad_process);
+ up(&sdp->sd_thread_lock);
+ wait_for_completion(&sdp->sd_thread_completion);
+
-+ fail_logd:
++ fail_logd:
+ down(&sdp->sd_thread_lock);
+ clear_bit(SDF_LOGD_RUN, &sdp->sd_flags);
+ wake_up_process(sdp->sd_logd_process);
+ up(&sdp->sd_thread_lock);
+ wait_for_completion(&sdp->sd_thread_completion);
+
-+ fail_dput:
++ fail_dput:
+ dput(sb->s_root);
+
-+ fail_li_free:
++ fail_li_free:
+ gfs_inode_put(sdp->sd_linode);
+
-+ fail_qi_free:
++ fail_qi_free:
+ gfs_inode_put(sdp->sd_qinode);
+
-+ fail_root_free:
++ fail_root_free:
+ gfs_inode_put(sdp->sd_rooti);
+
-+ fail_ri_free:
++ fail_ri_free:
+ gfs_inode_put(sdp->sd_riinode);
+ gfs_clear_rgrpd(sdp);
+
-+ fail_recoverd:
++ fail_recoverd:
+ down(&sdp->sd_thread_lock);
+ clear_bit(SDF_RECOVERD_RUN, &sdp->sd_flags);
+ wake_up_process(sdp->sd_recoverd_process);
+ up(&sdp->sd_thread_lock);
+ wait_for_completion(&sdp->sd_thread_completion);
+
-+ fail_recover_dump:
++ fail_recover_dump:
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ gfs_unlinked_cleanup(sdp);
+ gfs_quota_cleanup(sdp);
+
-+ fail_gunlock_journal:
++ fail_gunlock_journal:
+ gfs_glock_dq_uninit(&sdp->sd_journal_gh);
+
-+ fail_gunlock_ji:
++ fail_gunlock_ji:
+ if (jindex)
+ gfs_glock_dq_uninit(&ji_gh);
+
-+ fail_trans_gl:
++ fail_trans_gl:
+ gfs_glock_put(sdp->sd_trans_gl);
+
-+ fail_ji_free:
++ fail_ji_free:
+ gfs_inode_put(sdp->sd_jiinode);
+ gfs_clear_journals(sdp);
+
-+ fail_gunlock_sb:
++ fail_gunlock_sb:
+ if (super)
+ gfs_glock_dq_uninit(&sb_gh);
+
-+ fail_gunlock_live:
++ fail_gunlock_live:
+ gfs_glock_dq_uninit(&sdp->sd_live_gh);
+
-+ fail_gunlock_mount:
++ fail_gunlock_mount:
+ gfs_glock_dq_uninit(&mount_gh);
+
-+ fail_glockd:
++ fail_glockd:
+ clear_bit(SDF_GLOCKD_RUN, &sdp->sd_flags);
+ wake_up(&sdp->sd_reclaim_wchan);
+ while (sdp->sd_glockd_num--)
+ up(&sdp->sd_thread_lock);
+ wait_for_completion(&sdp->sd_thread_completion);
+
-+ fail_lockproto:
++ fail_lockproto:
+ gfs_gl_hash_clear(sdp, TRUE);
+ gfs_unmount_lockproto(sdp);
+ gfs_clear_dirty_j(sdp);
+ while (invalidate_inodes(sb))
+ yield();
+
-+ fail_vfree:
++ fail_vfree:
+ vfree(sdp);
+
-+ fail:
++ fail:
+ vfs2sdp(sb) = NULL;
+ return error;
+}
+ .kill_sb = kill_block_super,
+ .owner = THIS_MODULE,
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_fstype.h linux/fs/gfs/ops_fstype.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_fstype.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_fstype.h 2004-09-07 16:26:15.785542887 -0500
+diff -urN linux-orig/fs/gfs/ops_fstype.h linux-patched/fs/gfs/ops_fstype.h
+--- linux-orig/fs/gfs/ops_fstype.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_fstype.h 2004-10-27 15:27:10.884629833 -0500
@@ -0,0 +1,19 @@
+/******************************************************************************
+*******************************************************************************
+extern struct file_system_type gfs_fs_type;
+
+#endif /* __OPS_FSTYPE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_inode.c linux/fs/gfs/ops_inode.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_inode.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_inode.c 2004-09-07 16:26:15.787542446 -0500
-@@ -0,0 +1,1723 @@
+diff -urN linux-orig/fs/gfs/ops_inode.c linux-patched/fs/gfs/ops_inode.c
+--- linux-orig/fs/gfs/ops_inode.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_inode.c 2004-10-27 15:27:10.884629833 -0500
+@@ -0,0 +1,1632 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/namei.h>
+#include <linux/utsname.h>
+#include <asm/uaccess.h>
-+#include <linux/xattr.h>
+#include <linux/mm.h>
++#include <linux/xattr.h>
+#include <linux/posix_acl.h>
+
+#include "gfs.h"
+#include "bmap.h"
+#include "dio.h"
+#include "dir.h"
++#include "eaops.h"
+#include "eattr.h"
+#include "glock.h"
+#include "inode.h"
+ &i_gh);
+ if (!error)
+ break;
-+ else if (error != -EEXIST) {
++ else if (error != -EEXIST ||
++ (nd->intent.open.flags & O_EXCL)) {
+ gfs_holder_uninit(&d_gh);
+ return error;
+ }
+ gfs_quota_unlock_m(dip);
+ gfs_unlinked_unlock(sdp, dip->i_alloc->al_ul);
+ gfs_alloc_put(dip);
-+
-+ ip->i_creat_task = current;
-+ ip->i_creat_pid = current->pid;
+ }
+
+ gfs_glock_dq_uninit(&d_gh);
+ error = -EFBIG;
+ goto fail_gunlock;
+ }
++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
++ error = -EPERM;
++ goto fail_gunlock;
++ }
+ if (!ip->i_di.di_nlink) {
+ error = -EINVAL;
+ goto fail_gunlock;
+ if (error)
+ goto fail;
+
-+ error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
-+ if (error)
-+ goto fail_gunlock;
-+
-+ if ((dip->i_di.di_mode & S_ISVTX) &&
-+ dip->i_di.di_uid != current->fsuid &&
-+ ip->i_di.di_uid != current->fsuid &&
-+ !capable(CAP_FOWNER)) {
-+ error = -EPERM;
-+ goto fail_gunlock;
-+ }
-+
-+ error = gfs_revalidate(dip, &dentry->d_name, ip);
++ error = gfs_unlink_ok(dip, &dentry->d_name, ip);
+ if (error)
+ goto fail_gunlock;
+
+ gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
+
+ error = gfs_createi(&d_gh, &dentry->d_name,
-+ GFS_FILE_LNK, 0777,
++ GFS_FILE_LNK, S_IFLNK | S_IRWXUGO,
+ &i_gh);
+ if (error) {
+ gfs_holder_uninit(&d_gh);
+ gfs_holder_init(dip->i_gl, 0, 0, &d_gh);
+
+ error = gfs_createi(&d_gh, &dentry->d_name,
-+ GFS_FILE_DIR, mode,
++ GFS_FILE_DIR, S_IFDIR | mode,
+ &i_gh);
+ if (error) {
+ gfs_holder_uninit(&d_gh);
+ if (error)
+ goto fail;
+
-+ error = permission(dir, MAY_WRITE | MAY_EXEC, NULL);
-+ if (error)
-+ goto fail_gunlock;
-+
-+ if ((dip->i_di.di_mode & S_ISVTX) &&
-+ dip->i_di.di_uid != current->fsuid &&
-+ ip->i_di.di_uid != current->fsuid &&
-+ !capable(CAP_FOWNER)) {
-+ error = -EPERM;
-+ goto fail_gunlock;
-+ }
-+
-+ error = gfs_revalidate(dip, &dentry->d_name, ip);
++ error = gfs_unlink_ok(dip, &dentry->d_name, ip);
+ if (error)
+ goto fail_gunlock;
+
+
+ /* Check out the old directory */
+
-+ error = permission(odir, MAY_WRITE | MAY_EXEC, NULL);
-+ if (error)
-+ goto fail_gunlock;
-+
-+ if ((odip->i_di.di_mode & S_ISVTX) &&
-+ odip->i_di.di_uid != current->fsuid &&
-+ ip->i_di.di_uid != current->fsuid &&
-+ !capable(CAP_FOWNER)) {
-+ error = -EPERM;
-+ goto fail_gunlock;
-+ }
-+
-+ error = gfs_revalidate(odip, &odentry->d_name, ip);
++ error = gfs_unlink_ok(odip, &odentry->d_name, ip);
+ if (error)
+ goto fail_gunlock;
+
+ /* Check out the new directory */
+
-+ error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
-+ if (error)
-+ goto fail_gunlock;
-+
+ if (nip) {
-+ if ((ndip->i_di.di_mode & S_ISVTX) &&
-+ ndip->i_di.di_uid != current->fsuid &&
-+ nip->i_di.di_uid != current->fsuid &&
-+ !capable(CAP_FOWNER)) {
-+ error = -EPERM;
-+ goto fail_gunlock;
-+ }
-+
-+ error = gfs_revalidate(ndip, &ndentry->d_name, nip);
++ error = gfs_unlink_ok(ndip, &ndentry->d_name, nip);
+ if (error)
+ goto fail_gunlock;
+
+ }
+ }
+ } else {
++ error = permission(ndir, MAY_WRITE | MAY_EXEC, NULL);
++ if (error)
++ goto fail_gunlock;
++
+ error = gfs_dir_search(ndip, &ndentry->d_name, NULL, NULL);
+ switch (error) {
+ case -ENOENT:
+}
+
+/**
++ * gfs_permission_i -
++ * @inode:
++ * @mask:
++ * @nd:
++ *
++ * Shamelessly ripped from ext3
++ *
++ * Returns: errno
++ */
++
++static int
++gfs_permission_i(struct inode *inode, int mask, struct nameidata *nd)
++{
++ int mode = inode->i_mode;
++
++ /* Nobody gets write access to a read-only fs */
++ if ((mask & MAY_WRITE) &&
++ IS_RDONLY(inode) &&
++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
++ return -EROFS;
++
++ /* Nobody gets write access to an immutable file */
++ if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
++ return -EACCES;
++
++ if (current->fsuid == inode->i_uid)
++ mode >>= 6;
++ else if (IS_POSIXACL(inode)) {
++ struct posix_acl *acl = NULL;
++ int error;
++
++ /* The access ACL cannot grant access if the group class
++ permission bits don't contain all requested permissions. */
++ if (((mode >> 3) & mask & S_IRWXO) != mask)
++ goto check_groups;
++
++ error = gfs_acl_get(vn2ip(inode), TRUE, &acl);
++ if (error)
++ return error;
++
++ if (acl) {
++ int error = posix_acl_permission(inode, acl, mask);
++ posix_acl_release(acl);
++ if (error == -EACCES)
++ goto check_capabilities;
++ return error;
++ } else
++ goto check_groups;
++ } else {
++ check_groups:
++ if (in_group_p(inode->i_gid))
++ mode >>= 3;
++ }
++
++ if ((mode & mask & S_IRWXO) == mask)
++ return 0;
++
++ check_capabilities:
++ /* Allowed to override Discretionary Access Control? */
++ if (!(mask & MAY_EXEC) ||
++ (inode->i_mode & S_IXUGO) ||
++ S_ISDIR(inode->i_mode))
++ if (capable(CAP_DAC_OVERRIDE))
++ return 0;
++
++ /* Read and search granted if capable(CAP_DAC_READ_SEARCH) */
++ if (capable(CAP_DAC_READ_SEARCH) &&
++ ((mask == MAY_READ) ||
++ (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))))
++ return 0;
++
++ return -EACCES;
++}
++
++/**
+ * gfs_permission -
+ * @inode:
+ * @mask:
+{
+ struct gfs_inode *ip = vn2ip(inode);
+ struct gfs_holder i_gh;
-+ struct posix_acl *acl;
-+ umode_t mode = inode->i_mode;
+ int error;
+
+ atomic_inc(&ip->i_sbd->sd_ops_inode);
+ if (error)
+ return error;
+
-+ if (mask & MAY_WRITE) {
-+ if (IS_RDONLY(inode) &&
-+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
-+ error = -EROFS;
-+ goto out;
-+ }
-+ if (IS_IMMUTABLE(inode)) {
-+ error = -EACCES;
-+ goto out;
-+ }
-+ }
-+
-+ if (capable(CAP_DAC_OVERRIDE))
-+ if (!(mask & MAY_EXEC) || (mode & S_IXUGO))
-+ goto out;
-+
-+ if (capable(CAP_DAC_READ_SEARCH) &&
-+ (mask == MAY_READ ||
-+ (!(mask & MAY_WRITE) && S_ISDIR(mode))))
-+ goto out;
-+
-+ if (inode->i_uid == current->fsuid) {
-+ if ((mask & (mode >> 6)) != mask)
-+ error = -EACCES;
-+ goto out;
-+ }
-+
-+ if ((mask & (mode >> 3)) == mask) {
-+ error = gfs_getacl(inode, TRUE, &acl);
-+ if (acl) {
-+ error = posix_acl_permission(inode, acl, mask);
-+ goto out;
-+ } else if (error && error != -ENODATA)
-+ goto out;
-+ error = 0;
-+ if (in_group_p(inode->i_gid)) {
-+ error = 0;
-+ goto out;
-+ }
-+ } else if (in_group_p(inode->i_gid)) {
-+ error = -EACCES;
-+ goto out;
-+ }
-+
-+ if ((mask & mode) == mask)
-+ goto out;
++ error = gfs_permission_i(inode, mask, nd);
+
-+ error = -EACCES;
-+
-+ out:
+ gfs_glock_dq_uninit(&i_gh);
+
+ return error;
+ struct gfs_inode *ip = vn2ip(inode);
+ struct gfs_sbd *sdp = ip->i_sbd;
+ struct gfs_holder i_gh;
-+ struct gfs_alloc *al;
-+ struct buffer_head *dibh;
-+ uint32_t ouid, ogid, nuid, ngid;
-+ int error = 0;
++ int error;
+
+ atomic_inc(&sdp->sd_ops_inode);
+
+ if (error)
+ return error;
+
++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) {
++ error = -EPERM;
++ goto fail;
++ }
++
+ error = inode_change_ok(inode, attr);
+ if (error)
+ goto fail;
+ }
+
+ else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) {
++ struct gfs_alloc *al;
++ struct buffer_head *dibh;
++ uint32_t ouid, ogid, nuid, ngid;
++
+ ouid = ip->i_di.di_uid;
+ ogid = ip->i_di.di_gid;
+ nuid = attr->ia_uid;
+ gfs_alloc_put(ip);
+ }
+
-+ else {
-+ /* Trans may require:
-+ one dinode block plus changes for acl. */
-+
-+ error = gfs_trans_begin(sdp,
-+ 1 + GFS_MAX_EA_ACL_BLKS, 0);
++ else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) {
++ error = gfs_acl_chmod(ip, attr);
+ if (error)
+ goto fail;
++ }
+
-+ error = gfs_get_inode_buffer(ip, &dibh);
-+ if (!error) {
-+ inode_setattr(inode, attr);
-+ gfs_inode_attr_out(ip);
-+
-+ if (attr->ia_valid & ATTR_MODE)
-+ error = gfs_acl_setattr(inode);
-+
-+ gfs_trans_add_bh(ip->i_gl, dibh);
-+ gfs_dinode_out(&ip->i_di, dibh->b_data);
-+ brelse(dibh);
-+ }
-+
-+ gfs_trans_end(sdp);
++ else {
++ error = gfs_setattr_simple(ip, attr);
++ if (error)
++ goto fail;
+ }
+
+ gfs_glock_dq_uninit(&i_gh);
+}
+
+/**
-+ * get_eatype - get the type of the ea, and trucate the type from the name
-+ * @namep: ea name, possibly with type appended
-+ *
-+ * Returns: GFS_EATYPE_XXX
-+ */
-+
-+int
-+get_eatype(const char *name, char **truncated_name)
-+{
-+ int type;
-+
-+ if (strncmp(name, "system.", 7) == 0) {
-+ type = GFS_EATYPE_SYS;
-+ *truncated_name = strchr(name, '.') + 1;
-+ } else if (strncmp(name, "user.", 5) == 0) {
-+ type = GFS_EATYPE_USR;
-+ *truncated_name = strchr(name, '.') + 1;
-+ } else {
-+ type = GFS_EATYPE_UNUSED;
-+ *truncated_name = NULL;
-+ }
-+
-+ return type;
-+}
-+
-+/**
+ * gfs_setxattr - Set (or create or replace) an inode's extended attribute
-+ * @dentry: inode's dentry
-+ * @name: name of the extended attribute
-+ * @data: the value of the extended attribute
-+ * @size: the size of data
-+ * @flags: used to specify create or replace actions
++ * @dentry:
++ * @name:
++ * @data:
++ * @size:
++ * @flags:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
+int
+ const void *data, size_t size,
+ int flags)
+{
-+ struct inode *inode = dentry->d_inode;
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_easet_io req;
-+ char *truncated_name;
-+ int error = 0;
-+
-+ atomic_inc(&sdp->sd_ops_inode);
++ struct gfs_ea_request er;
+
-+ req.es_type = get_eatype(name, &truncated_name);
++ atomic_inc(&vfs2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
-+ if (req.es_type == GFS_EATYPE_UNUSED)
-+ error = -EOPNOTSUPP;
-+ else {
-+ req.es_data = data;
-+ req.es_name = truncated_name;
-+ req.es_data_len = size;
-+ req.es_name_len = strlen(truncated_name);
-+ if (flags & XATTR_CREATE)
-+ req.es_cmd = GFS_EACMD_CREATE;
-+ else if (flags & XATTR_REPLACE)
-+ req.es_cmd = GFS_EACMD_REPLACE;
-+ else
-+ req.es_cmd = GFS_EACMD_SET;
-+ error = gfs_set_eattr(sdp, ip, &req);
-+ }
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_type = gfs_ea_name2type(name, &er.er_name);
++ if (er.er_type == GFS_EATYPE_UNUSED)
++ return -EOPNOTSUPP;
++ er.er_data = (char *)data;
++ er.er_name_len = strlen(er.er_name);
++ er.er_data_len = size;
++ er.er_flags = flags;
+
-+ return error;
++ return gfs_ea_set(vn2ip(dentry->d_inode), &er);
+}
+
+/**
+ * @data:
+ * @size:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: The number of bytes put into data, or -errno
+ */
+
+ssize_t
+gfs_getxattr(struct dentry *dentry, const char *name,
+ void *data, size_t size)
+{
-+ struct inode *inode = dentry->d_inode;
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_eaget_io req;
-+ char *truncated_name;
-+ int error = 0;
-+
-+ atomic_inc(&sdp->sd_ops_inode);
++ struct gfs_ea_request er;
+
-+ req.eg_type = get_eatype(name, &truncated_name);
++ atomic_inc(&vfs2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
-+ if (req.eg_type == GFS_EATYPE_UNUSED)
-+ error = -EOPNOTSUPP;
-+ else {
-+ req.eg_name = truncated_name;
-+ req.eg_name_len = strlen(truncated_name);
-+ req.eg_data = data;
-+ req.eg_data_len = size;
-+ req.eg_len = NULL;
-+ error = gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
-+ }
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_type = gfs_ea_name2type(name, &er.er_name);
++ if (er.er_type == GFS_EATYPE_UNUSED)
++ return -EOPNOTSUPP;
++ er.er_data = data;
++ er.er_name_len = strlen(er.er_name);
++ er.er_data_len = size;
+
-+ return error;
++ return gfs_ea_get(vn2ip(dentry->d_inode), &er);
+}
+
+/**
+ * @buffer:
+ * @size:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: The number of bytes put into data, or -errno
+ */
+
+ssize_t
+gfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
-+ struct inode *inode = dentry->d_inode;
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_eaget_io req;
++ struct gfs_ea_request er;
+
-+ atomic_inc(&sdp->sd_ops_inode);
++ atomic_inc(&vfs2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
-+ req.eg_type = 0;
-+ req.eg_name = NULL;
-+ req.eg_name_len = 0;
-+ req.eg_data = buffer;
-+ req.eg_data_len = size;
-+ req.eg_len = NULL;
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_data = (size) ? buffer : NULL;
++ er.er_data_len = size;
+
-+ return gfs_get_eattr(sdp, ip, &req, gfs_ea_memcpy);
++ return gfs_ea_list(vn2ip(dentry->d_inode), &er);
+}
+
+/**
+ * @dentry:
+ * @name:
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
+int
+gfs_removexattr(struct dentry *dentry, const char *name)
+{
-+ struct inode *inode = dentry->d_inode;
-+ struct gfs_inode *ip = vn2ip(inode);
-+ struct gfs_sbd *sdp = ip->i_sbd;
-+ struct gfs_easet_io req;
-+ char *truncated_name;
-+ int error = 0;
++ struct gfs_ea_request er;
+
-+ atomic_inc(&sdp->sd_ops_inode);
++ atomic_inc(&vfs2sdp(dentry->d_inode->i_sb)->sd_ops_inode);
+
-+ req.es_type = get_eatype(name, &truncated_name);
-+
-+ if (req.es_type == GFS_EATYPE_UNUSED)
-+ error = -EOPNOTSUPP;
-+ else {
-+ req.es_name = truncated_name;
-+ req.es_data = NULL;
-+ req.es_data_len = 0;
-+ req.es_name_len = strlen(truncated_name);
-+ req.es_cmd = GFS_EACMD_REMOVE;
-+ error = gfs_set_eattr(sdp, ip, &req);
-+ }
++ memset(&er, 0, sizeof(struct gfs_ea_request));
++ er.er_type = gfs_ea_name2type(name, &er.er_name);
++ if (er.er_type == GFS_EATYPE_UNUSED)
++ return -EOPNOTSUPP;
++ er.er_name_len = strlen(er.er_name);
+
-+ return error;
++ return gfs_ea_remove(vn2ip(dentry->d_inode), &er);
+}
+
+struct inode_operations gfs_file_iops = {
+ .removexattr = gfs_removexattr,
+};
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_inode.h linux/fs/gfs/ops_inode.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_inode.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_inode.h 2004-09-07 16:26:15.787542446 -0500
+diff -urN linux-orig/fs/gfs/ops_inode.h linux-patched/fs/gfs/ops_inode.h
+--- linux-orig/fs/gfs/ops_inode.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_inode.h 2004-10-27 15:27:10.884629833 -0500
@@ -0,0 +1,22 @@
+/******************************************************************************
+*******************************************************************************
+extern struct inode_operations gfs_dev_iops;
+
+#endif /* __OPS_INODE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_super.c linux/fs/gfs/ops_super.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_super.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_super.c 2004-09-07 16:26:15.788542226 -0500
-@@ -0,0 +1,418 @@
+diff -urN linux-orig/fs/gfs/ops_super.c linux-patched/fs/gfs/ops_super.c
+--- linux-orig/fs/gfs/ops_super.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_super.c 2004-10-27 15:27:10.884629833 -0500
+@@ -0,0 +1,424 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include "dio.h"
+#include "glock.h"
+#include "inode.h"
-+#include "locking.h"
+#include "log.h"
++#include "mount.h"
+#include "ops_super.h"
+#include "page.h"
++#include "proc.h"
+#include "quota.h"
+#include "recovery.h"
+#include "rgrp.h"
+ * @inode: The inode
+ * @sync: synchronous write flag
+ *
++ * Returns: errno
+ */
+
-+static void
++static int
+gfs_write_inode(struct inode *inode, int sync)
+{
+ struct gfs_inode *ip = vn2ip(inode);
+
+ if (ip && sync && !gfs_in_panic)
+ gfs_log_flush_glock(ip->i_gl);
++
++ return 0;
+}
+
+/**
+
+ atomic_inc(&sdp->sd_ops_super);
+
++ gfs_proc_fs_del(sdp);
++
+ /* Unfreeze the filesystem, if we need to */
+
+ down(&sdp->sd_freeze_lock);
+ seq_printf(s, ",upgrade");
+ if (args->ar_num_glockd != GFS_GLOCKD_DEFAULT)
+ seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
-+ if (args->ar_posixacls)
++ if (args->ar_posix_acls)
+ seq_printf(s, ",acl");
+ if (args->ar_suiddir)
+ seq_printf(s, ",suiddir");
+ .clear_inode = gfs_clear_inode,
+ .show_options = gfs_show_options,
+};
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_super.h linux/fs/gfs/ops_super.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_super.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_super.h 2004-09-07 16:26:15.788542226 -0500
+diff -urN linux-orig/fs/gfs/ops_super.h linux-patched/fs/gfs/ops_super.h
+--- linux-orig/fs/gfs/ops_super.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_super.h 2004-10-27 15:27:10.885629601 -0500
@@ -0,0 +1,19 @@
+/******************************************************************************
+*******************************************************************************
+extern struct super_operations gfs_super_ops;
+
+#endif /* __OPS_SUPER_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_vm.c linux/fs/gfs/ops_vm.c
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_vm.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_vm.c 2004-09-07 16:26:15.789542005 -0500
-@@ -0,0 +1,212 @@
+diff -urN linux-orig/fs/gfs/ops_vm.c linux-patched/fs/gfs/ops_vm.c
+--- linux-orig/fs/gfs/ops_vm.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_vm.c 2004-10-27 15:27:10.885629601 -0500
+@@ -0,0 +1,239 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include "trans.h"
+
+/**
++ * pfault_be_greedy -
++ * @ip:
++ *
++ */
++
++static void
++pfault_be_greedy(struct gfs_inode *ip)
++{
++ unsigned int time;
++
++ spin_lock(&ip->i_lock);
++ time = ip->i_greedy;
++ ip->i_last_pfault = jiffies;
++ spin_unlock(&ip->i_lock);
++
++ gfs_inode_hold(ip);
++ if (gfs_glock_be_greedy(ip->i_gl, time))
++ gfs_inode_put(ip);
++}
++
++/**
+ * gfs_private_nopage -
+ * @area:
+ * @address:
+
+ result = filemap_nopage(area, address, type);
+
++ if (result && result != NOPAGE_OOM)
++ pfault_be_greedy(ip);
++
+ gfs_glock_dq_uninit(&i_gh);
+
+ return result;
+ if (error) {
+ page_cache_release(result);
+ result = NULL;
++ goto out;
+ }
+ set_page_dirty(result);
+ }
+
++ pfault_be_greedy(ip);
++
+ out:
+ gfs_glock_dq_uninit(&i_gh);
+
+ .nopage = gfs_sharewrite_nopage,
+};
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/ops_vm.h linux/fs/gfs/ops_vm.h
---- linux-2.6.9-rc1-mm3/fs/gfs/ops_vm.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/ops_vm.h 2004-09-07 16:26:15.789542005 -0500
+diff -urN linux-orig/fs/gfs/ops_vm.h linux-patched/fs/gfs/ops_vm.h
+--- linux-orig/fs/gfs/ops_vm.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/ops_vm.h 2004-10-27 15:27:10.885629601 -0500
@@ -0,0 +1,20 @@
+/******************************************************************************
+*******************************************************************************
+extern struct vm_operations_struct gfs_vm_ops_sharewrite;
+
+#endif /* __OPS_VM_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/page.c linux/fs/gfs/page.c
---- linux-2.6.9-rc1-mm3/fs/gfs/page.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/page.c 2004-09-07 16:26:15.790541785 -0500
-@@ -0,0 +1,276 @@
+diff -urN linux-orig/fs/gfs/page.c linux-patched/fs/gfs/page.c
+--- linux-orig/fs/gfs/page.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/page.c 2004-10-27 15:27:10.885629601 -0500
+@@ -0,0 +1,278 @@
+/******************************************************************************
+*******************************************************************************
+**
+ int release = FALSE;
+
+ if (!page || page->index) {
-+ RETRY_MALLOC(page = grab_cache_page(inode->i_mapping, 0), page);
++ page = grab_cache_page(inode->i_mapping, 0);
++ if (!page)
++ return -ENOMEM;
+ release = TRUE;
+ }
+
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/page.h linux/fs/gfs/page.h
---- linux-2.6.9-rc1-mm3/fs/gfs/page.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/page.h 2004-09-07 16:26:15.790541785 -0500
+diff -urN linux-orig/fs/gfs/page.h linux-patched/fs/gfs/page.h
+--- linux-orig/fs/gfs/page.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/page.h 2004-10-27 15:27:10.885629601 -0500
@@ -0,0 +1,26 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_truncator_page(struct gfs_inode *ip, uint64_t size);
+
+#endif /* __PAGE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/quota.c linux/fs/gfs/quota.c
---- linux-2.6.9-rc1-mm3/fs/gfs/quota.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/quota.c 2004-09-07 16:26:15.791541564 -0500
+diff -urN linux-orig/fs/gfs/proc.c linux-patched/fs/gfs/proc.c
+--- linux-orig/fs/gfs/proc.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/proc.c 2004-10-27 15:27:10.885629601 -0500
+@@ -0,0 +1,402 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#include <linux/sched.h>
++#include <linux/slab.h>
++#include <linux/smp_lock.h>
++#include <linux/spinlock.h>
++#include <asm/semaphore.h>
++#include <linux/completion.h>
++#include <linux/buffer_head.h>
++#include <linux/proc_fs.h>
++#include <linux/module.h>
++#include <asm/uaccess.h>
++
++#include "gfs.h"
++#include "proc.h"
++#include "super.h"
++
++struct list_head gfs_fs_list;
++struct semaphore gfs_fs_lock;
++char *gfs_proc_margs;
++spinlock_t gfs_proc_margs_lock;
++spinlock_t req_lock;
++
++/**
++ * gfs_proc_fs_add - Add a FS to the list of mounted FSs
++ * @sdp:
++ *
++ */
++
++void
++gfs_proc_fs_add(struct gfs_sbd *sdp)
++{
++ down(&gfs_fs_lock);
++ list_add(&sdp->sd_list, &gfs_fs_list);
++ up(&gfs_fs_lock);
++}
++
++/**
++ * gfs_proc_fs_del - Remove a FS from the list of mounted FSs
++ * @sdp:
++ *
++ */
++
++void
++gfs_proc_fs_del(struct gfs_sbd *sdp)
++{
++ down(&gfs_fs_lock);
++ list_del(&sdp->sd_list);
++ up(&gfs_fs_lock);
++}
++
++/**
++ * do_list - Copy the list of mountes FSs to userspace
++ * @user_buf:
++ * @size:
++ *
++ * @Returns: -errno, or the number of bytes copied to userspace
++ */
++
++static ssize_t
++do_list(char *user_buf, size_t size)
++{
++ struct list_head *tmp;
++ struct gfs_sbd *sdp;
++ unsigned int s = 0, o = 0;
++ char num[21];
++ char *buf;
++ int error = 0;
++
++ down(&gfs_fs_lock);
++
++ for (tmp = gfs_fs_list.next; tmp != &gfs_fs_list; tmp = tmp->next) {
++ sdp = list_entry(tmp, struct gfs_sbd, sd_list);
++ s += sprintf(num, "%lu", (unsigned long)sdp) +
++ strlen(sdp->sd_vfs->s_id) +
++ strlen(sdp->sd_fsname) + 3;
++ }
++
++ if (!s)
++ goto out;
++
++ error = -EFBIG;
++ if (s > size)
++ goto out;
++
++ error = -ENOMEM;
++ buf = kmalloc(s + 1, GFP_KERNEL);
++ if (!buf)
++ goto out;
++
++ for (tmp = gfs_fs_list.next; tmp != &gfs_fs_list; tmp = tmp->next) {
++ sdp = list_entry(tmp, struct gfs_sbd, sd_list);
++ o += sprintf(buf + o, "%lu %s %s\n",
++ (unsigned long)sdp, sdp->sd_vfs->s_id, sdp->sd_fsname);
++ }
++
++ GFS_ASSERT(o <= s,);
++
++ if (copy_to_user(user_buf, buf, o))
++ error = -EFAULT;
++ else
++ error = o;
++
++ kfree(buf);
++
++ out:
++ up(&gfs_fs_lock);
++
++ return error;
++}
++
++/**
++ * find_argument -
++ * @p:
++ *
++ * Returns:
++ */
++
++static char *
++find_argument(char *p)
++{
++ char *p2;
++
++ while (*p == ' ' || *p == '\n')
++ p++;
++ if (!*p)
++ return NULL;
++ for (p2 = p; *p2; p2++) /* do nothing */;
++ p2--;
++ while (*p2 == ' ' || *p2 == '\n')
++ *p2-- = 0;
++
++ return p;
++}
++
++/**
++ * do_freeze - freeze a filesystem
++ * @p: the freeze command
++ *
++ * Returns: errno
++ */
++
++static int
++do_freeze(char *p)
++{
++ struct list_head *tmp;
++ struct gfs_sbd *sdp;
++ char num[21];
++ int error = 0;
++
++ p = find_argument(p + 6);
++ if (!p)
++ return -ENOENT;
++
++ down(&gfs_fs_lock);
++
++ for (tmp = gfs_fs_list.next; tmp != &gfs_fs_list; tmp = tmp->next) {
++ sdp = list_entry(tmp, struct gfs_sbd, sd_list);
++ sprintf(num, "%lu", (unsigned long)sdp);
++ if (strcmp(num, p) == 0)
++ break;
++ }
++
++ if (tmp == &gfs_fs_list)
++ error = -ENOENT;
++ else
++ error = gfs_freeze_fs(sdp);
++
++ up(&gfs_fs_lock);
++
++ return error;
++}
++
++/**
++ * do_unfreeze - unfreeze a filesystem
++ * @p: the unfreeze command
++ *
++ * Returns: errno
++ */
++
++static int
++do_unfreeze(char *p)
++{
++ struct list_head *tmp;
++ struct gfs_sbd *sdp;
++ char num[21];
++ int error = 0;
++
++ p = find_argument(p + 8);
++ if (!p)
++ return -ENOENT;
++
++ down(&gfs_fs_lock);
++
++ for (tmp = gfs_fs_list.next; tmp != &gfs_fs_list; tmp = tmp->next) {
++ sdp = list_entry(tmp, struct gfs_sbd, sd_list);
++ sprintf(num, "%lu", (unsigned long)sdp);
++ if (strcmp(num, p) == 0)
++ break;
++ }
++
++ if (tmp == &gfs_fs_list)
++ error = -ENOENT;
++ else
++ gfs_unfreeze_fs(sdp);
++
++ up(&gfs_fs_lock);
++
++ return error;
++}
++
++/**
++ * do_margs - Pass in mount arguments
++ * @p: the margs command
++ *
++ * Returns: errno
++ */
++
++static int
++do_margs(char *p)
++{
++ char *new_buf, *old_buf;
++
++ p = find_argument(p + 5);
++ if (!p)
++ return -ENOENT;
++
++ new_buf = kmalloc(strlen(p) + 1, GFP_KERNEL);
++ if (!new_buf)
++ return -ENOMEM;
++ strcpy(new_buf, p);
++
++ spin_lock(&gfs_proc_margs_lock);
++ old_buf = gfs_proc_margs;
++ gfs_proc_margs = new_buf;
++ spin_unlock(&gfs_proc_margs_lock);
++
++ if (old_buf)
++ kfree(old_buf);
++
++ return 0;
++}
++
++/**
++ * gfs_proc_write - take a command from userspace
++ * @file:
++ * @buf:
++ * @size:
++ * @offset:
++ *
++ * Returns: -errno or the number of bytes taken
++ */
++
++static ssize_t
++gfs_proc_write(struct file *file, const char *buf, size_t size, loff_t *offset)
++{
++ char *p;
++
++ spin_lock(&req_lock);
++ p = file->private_data;
++ file->private_data = NULL;
++ spin_unlock(&req_lock);
++
++ if (p)
++ kfree(p);
++
++ if (!size)
++ return -EINVAL;
++
++ p = kmalloc(size + 1, GFP_KERNEL);
++ if (!p)
++ return -ENOMEM;
++ p[size] = 0;
++
++ if (copy_from_user(p, buf, size)) {
++ kfree(p);
++ return -EFAULT;
++ }
++
++ spin_lock(&req_lock);
++ file->private_data = p;
++ spin_unlock(&req_lock);
++
++ return size;
++}
++
++/**
++ * gfs_proc_read - return the results of a command
++ * @file:
++ * @buf:
++ * @size:
++ * @offset:
++ *
++ * Returns: -errno or the number of bytes returned
++ */
++
++static ssize_t
++gfs_proc_read(struct file *file, char *buf, size_t size, loff_t *offset)
++{
++ char *p;
++ int error;
++
++ spin_lock(&req_lock);
++ p = file->private_data;
++ file->private_data = NULL;
++ spin_unlock(&req_lock);
++
++ if (!p)
++ return -ENOENT;
++
++ if (!size) {
++ kfree(p);
++ return -EINVAL;
++ }
++
++ if (strncmp(p, "list", 4) == 0)
++ error = do_list(buf, size);
++ else if (strncmp(p, "freeze", 6) == 0)
++ error = do_freeze(p);
++ else if (strncmp(p, "unfreeze", 8) == 0)
++ error = do_unfreeze(p);
++ else if (strncmp(p, "margs", 5) == 0)
++ error = do_margs(p);
++ else
++ error = -ENOSYS;
++
++ kfree(p);
++
++ return error;
++}
++
++/**
++ * gfs_proc_close - free any mismatches writes
++ * @inode:
++ * @file:
++ *
++ * Returns: 0
++ */
++
++static int
++gfs_proc_close(struct inode *inode, struct file *file)
++{
++ if (file->private_data)
++ kfree(file->private_data);
++ return 0;
++}
++
++static struct file_operations gfs_proc_fops =
++{
++ .owner = THIS_MODULE,
++ .write = gfs_proc_write,
++ .read = gfs_proc_read,
++ .release = gfs_proc_close,
++};
++
++/**
++ * gfs_proc_init - initialize GFS' proc interface
++ *
++ */
++
++void
++gfs_proc_init(void)
++{
++ struct proc_dir_entry *pde;
++
++ INIT_LIST_HEAD(&gfs_fs_list);
++ init_MUTEX(&gfs_fs_lock);
++ gfs_proc_margs = NULL;
++ spin_lock_init(&gfs_proc_margs_lock);
++ spin_lock_init(&req_lock);
++
++ pde = create_proc_entry("fs/gfs", S_IFREG | 0200, NULL);
++ if (pde) {
++ pde->owner = THIS_MODULE;
++ pde->proc_fops = &gfs_proc_fops;
++ }
++}
++
++/**
++ * gfs_proc_uninit - uninitialize GFS' proc interface
++ *
++ */
++
++void
++gfs_proc_uninit(void)
++{
++ if (gfs_proc_margs)
++ kfree(gfs_proc_margs);
++ remove_proc_entry("fs/gfs", NULL);
++}
++
+diff -urN linux-orig/fs/gfs/proc.h linux-patched/fs/gfs/proc.h
+--- linux-orig/fs/gfs/proc.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/proc.h 2004-10-27 15:27:10.885629601 -0500
+@@ -0,0 +1,27 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __PROC_DOT_H__
++#define __PROC_DOT_H__
++
++/* Allow args to be passed to GFS when using an initial ram disk */
++extern char *gfs_proc_margs;
++extern spinlock_t gfs_proc_margs_lock;
++
++void gfs_proc_fs_add(struct gfs_sbd *sdp);
++void gfs_proc_fs_del(struct gfs_sbd *sdp);
++
++void gfs_proc_init(void);
++void gfs_proc_uninit(void);
++
++#endif /* __PROC_DOT_H__ */
+diff -urN linux-orig/fs/gfs/quota.c linux-patched/fs/gfs/quota.c
+--- linux-orig/fs/gfs/quota.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/quota.c 2004-10-27 15:27:10.885629601 -0500
@@ -0,0 +1,1146 @@
+/******************************************************************************
+*******************************************************************************
+ if (current->signal) {
+ tty = current->signal->tty;
+ if (tty && tty->driver->write)
-+ tty->driver->write(tty, line, len);
++ tty->driver->write(tty, 0, line, len);
+ }
+
+ kfree(line);
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/quota.h linux/fs/gfs/quota.h
---- linux-2.6.9-rc1-mm3/fs/gfs/quota.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/quota.h 2004-09-07 16:26:15.792541344 -0500
+diff -urN linux-orig/fs/gfs/quota.h linux-patched/fs/gfs/quota.h
+--- linux-orig/fs/gfs/quota.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/quota.h 2004-10-27 15:27:10.885629601 -0500
@@ -0,0 +1,40 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_quota_read(struct gfs_sbd *sdp, void *arg);
+
+#endif /* __QUOTA_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/recovery.c linux/fs/gfs/recovery.c
---- linux-2.6.9-rc1-mm3/fs/gfs/recovery.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/recovery.c 2004-09-07 16:26:15.793541123 -0500
+diff -urN linux-orig/fs/gfs/recovery.c linux-patched/fs/gfs/recovery.c
+--- linux-orig/fs/gfs/recovery.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/recovery.c 2004-10-27 15:27:10.886629369 -0500
@@ -0,0 +1,749 @@
+/******************************************************************************
+*******************************************************************************
+
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/recovery.h linux/fs/gfs/recovery.h
---- linux-2.6.9-rc1-mm3/fs/gfs/recovery.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/recovery.h 2004-09-07 16:26:15.793541123 -0500
+diff -urN linux-orig/fs/gfs/recovery.h linux-patched/fs/gfs/recovery.h
+--- linux-orig/fs/gfs/recovery.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/recovery.h 2004-10-27 15:27:10.886629369 -0500
@@ -0,0 +1,36 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_recover_dump(struct gfs_sbd *sdp);
+
+#endif /* __RECOVERY_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/rgrp.c linux/fs/gfs/rgrp.c
---- linux-2.6.9-rc1-mm3/fs/gfs/rgrp.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/rgrp.c 2004-09-07 16:26:15.795540682 -0500
-@@ -0,0 +1,1932 @@
+diff -urN linux-orig/fs/gfs/rgrp.c linux-patched/fs/gfs/rgrp.c
+--- linux-orig/fs/gfs/rgrp.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/rgrp.c 2004-10-27 15:27:10.886629369 -0500
+@@ -0,0 +1,1947 @@
+/******************************************************************************
+*******************************************************************************
+**
+ for (x = 0; x < num; x++) {
+ gfs_meta_check(sdp, bh[x]);
+
-+ RETRY_MALLOC(mc = kmem_cache_alloc(gfs_mhc_cachep, GFP_KERNEL), mc);
++ mc = kmem_cache_alloc(gfs_mhc_cachep, GFP_KERNEL);
++ if (!mc)
++ return;
+ memset(mc, 0, sizeof(struct gfs_meta_header_cache));
+
+ mc->mc_block = bh[x]->b_blocknr;
+
+ memset(count, 0, 4 * sizeof(uint32_t));
+
++ /* Count # blocks in each of 4 possible allocation states */
+ for (buf = 0; buf < length; buf++) {
+ bits = &rgd->rd_bits[buf];
+ for (x = 0; x < 4; x++)
+ * gfs_compute_bitstructs - Compute the bitmap sizes
+ * @rgd: The resource group descriptor
+ *
++ * Calculates bitmap descriptors, one for each block that contains bitmap data
+ */
+
+static void
+{
+ struct gfs_sbd *sdp = rgd->rd_sbd;
+ struct gfs_bitmap *bits;
-+ uint32_t length = rgd->rd_ri.ri_length;
++ uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */
+ uint32_t bytes_left, bytes;
+ int x;
+
+ for (x = 0; x < length; x++) {
+ bits = &rgd->rd_bits[x];
+
++ /* small rgrp; bitmap stored completely in header block */
+ if (length == 1) {
+ bytes = bytes_left;
+ bits->bi_offset = sizeof(struct gfs_rgrp);
+ bits->bi_start = 0;
+ bits->bi_len = bytes;
++ /* header block */
+ } else if (x == 0) {
+ bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_rgrp);
+ bits->bi_offset = sizeof(struct gfs_rgrp);
+ bits->bi_start = 0;
+ bits->bi_len = bytes;
++ /* last block */
+ } else if (x + 1 == length) {
+ bytes = bytes_left;
+ bits->bi_offset = sizeof(struct gfs_meta_header);
+ bits->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left;
+ bits->bi_len = bytes;
++ /* other blocks */
+ } else {
+ bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs_meta_header);
+ bits->bi_offset = sizeof(struct gfs_meta_header);
+ * @rgd: the RG data
+ * @al: the struct gfs_alloc structure describing the reservation
+ *
-+ * Sets the $ir_datares field in @res.
-+ * Sets the $ir_metares field in @res.
++ * If there's room for the requested blocks to be allocated from the RG:
++ * Sets the $al_reserved_data field in @al.
++ * Sets the $al_reserved_meta field in @al.
++ * Sets the $al_rgd field in @al.
+ *
-+ * Returns: 1 on success, 0 on failure
++ * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
+ */
+
+static int
+}
+
+/**
-+ * recent_rgrp_first - get first RG from recent list
++ * recent_rgrp_first - get first RG from "recent" list
+ * @sdp: The GFS superblock
+ * @rglast: address of the rgrp used last
+ *
+}
+
+/**
-+ * recent_rgrp_next - get next RG from recent list
++ * recent_rgrp_next - get next RG from "recent" list
+ * @cur_rgd: current rgrp
+ *
+ * Returns: The next rgrp in the recent list
+}
+
+/**
-+ * recent_rgrp_remove - remove an RG from recent list
++ * recent_rgrp_remove - remove an RG from "recent" list
+ * @rgd: The rgrp to remove
+ *
+ */
+}
+
+/**
-+ * recent_rgrp_add - add an RG to recent list
++ * recent_rgrp_add - add an RG to tail of "recent" list
+ * @new_rgd: The rgrp to add
+ *
++ * Before adding, make sure that:
++ * 1) it's not already on the list
++ * 2) there's still room for more entries
++ * The capacity limit imposed on the "recent" list is basically a node's "share"
++ * of rgrps within a cluster, i.e. (total # rgrps) / (# nodes (journals))
+ */
+
+static void
+ fail:
+ return error;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/rgrp.h linux/fs/gfs/rgrp.h
---- linux-2.6.9-rc1-mm3/fs/gfs/rgrp.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/rgrp.h 2004-09-07 16:26:15.796540461 -0500
+diff -urN linux-orig/fs/gfs/rgrp.h linux-patched/fs/gfs/rgrp.h
+--- linux-orig/fs/gfs/rgrp.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/rgrp.h 2004-10-27 15:27:10.886629369 -0500
@@ -0,0 +1,75 @@
+/******************************************************************************
+*******************************************************************************
+int gfs_reclaim_metadata(struct gfs_sbd *sdp, struct gfs_reclaim_stats *stats);
+
+#endif /* __RGRP_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/super.c linux/fs/gfs/super.c
---- linux-2.6.9-rc1-mm3/fs/gfs/super.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/super.c 2004-09-07 16:26:15.797540241 -0500
-@@ -0,0 +1,1035 @@
+diff -urN linux-orig/fs/gfs/super.c linux-patched/fs/gfs/super.c
+--- linux-orig/fs/gfs/super.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/super.c 2004-10-27 15:27:10.886629369 -0500
+@@ -0,0 +1,1039 @@
+/******************************************************************************
+*******************************************************************************
+**
+ gt->gt_prefetch_secs = 10;
+ gt->gt_statfs_slots = 64;
+ gt->gt_max_mhc = 10000;
++ gt->gt_greedy_default = HZ / 10;
++ gt->gt_greedy_quantum = HZ / 40;
++ gt->gt_greedy_max = HZ / 4;
+}
+
+/**
+ struct buffer_head *bh;
+ uint32_t hash_blocks, ind_blocks, leaf_blocks;
+ uint32_t tmp_blocks;
-+ uint64_t space = 0;
+ unsigned int x;
+ int error;
+
+ sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
+ sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
-+ uint64_t d;
++ uint64_t space, d;
+ uint32_t m;
++
+ space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+ sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - sizeof(struct gfs_dinode);
+ sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
+ for (x = 2;; x++) {
-+ uint64_t d;
++ uint64_t space, d;
+ uint32_t m;
++
+ space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
+ d = space;
+ m = do_div(d, sdp->sd_inptrs);
+ * gfs_make_fs_rw - Turn a RO FS into a RW one
+ * @sdp: the filesystem
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+ * gfs_make_fs_ro - Turn a RW FS into a RO one
+ * @sdp: the filesystem
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+ *
+ * This really shouldn't busy wait like this.
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+static int
+ * @usage: the usage structure
+ * @interruptible: Stop if there is a signal pending
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: errno
+ */
+
+int
+ * @state: the state to put the transaction lock into
+ * @t_gh: the hold on the transaction lock
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
+int
+ * aquiring the transaction log exclusively. All journals are
+ * ensured to be in a clean state as well.
+ *
-+ * Returns: 0 on success, -EXXX on error
++ * Returns: errno
+ */
+
+int
+
+ up(&sdp->sd_freeze_lock);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/super.h linux/fs/gfs/super.h
---- linux-2.6.9-rc1-mm3/fs/gfs/super.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/super.h 2004-09-07 16:26:15.797540241 -0500
+diff -urN linux-orig/fs/gfs/super.h linux-patched/fs/gfs/super.h
+--- linux-orig/fs/gfs/super.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/super.h 2004-10-27 15:27:10.886629369 -0500
@@ -0,0 +1,53 @@
+/******************************************************************************
+*******************************************************************************
+void gfs_unfreeze_fs(struct gfs_sbd *sdp);
+
+#endif /* __SUPER_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/trans.c linux/fs/gfs/trans.c
---- linux-2.6.9-rc1-mm3/fs/gfs/trans.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/trans.c 2004-09-07 16:26:15.798540020 -0500
+diff -urN linux-orig/fs/gfs/trans.c linux-patched/fs/gfs/trans.c
+--- linux-orig/fs/gfs/trans.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/trans.c 2004-10-27 15:27:10.886629369 -0500
@@ -0,0 +1,410 @@
+/******************************************************************************
+*******************************************************************************
+ tr->tr_num_q++;
+ }
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/trans.h linux/fs/gfs/trans.h
---- linux-2.6.9-rc1-mm3/fs/gfs/trans.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/trans.h 2004-09-07 16:26:15.798540020 -0500
+diff -urN linux-orig/fs/gfs/trans.h linux-patched/fs/gfs/trans.h
+--- linux-orig/fs/gfs/trans.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/trans.h 2004-10-27 15:27:10.887629137 -0500
@@ -0,0 +1,37 @@
+/******************************************************************************
+*******************************************************************************
+ uint32_t gid);
+
+#endif /* __TRANS_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/unlinked.c linux/fs/gfs/unlinked.c
---- linux-2.6.9-rc1-mm3/fs/gfs/unlinked.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/unlinked.c 2004-09-07 16:26:15.799539800 -0500
+diff -urN linux-orig/fs/gfs/unlinked.c linux-patched/fs/gfs/unlinked.c
+--- linux-orig/fs/gfs/unlinked.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/unlinked.c 2004-10-27 15:27:10.887629137 -0500
@@ -0,0 +1,427 @@
+/******************************************************************************
+*******************************************************************************
+ printk("GFS: fsid=%s: error deallocating inodes: %d\n",
+ sdp->sd_fsname, error);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/unlinked.h linux/fs/gfs/unlinked.h
---- linux-2.6.9-rc1-mm3/fs/gfs/unlinked.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/unlinked.h 2004-09-07 16:26:15.799539800 -0500
+diff -urN linux-orig/fs/gfs/unlinked.h linux-patched/fs/gfs/unlinked.h
+--- linux-orig/fs/gfs/unlinked.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/unlinked.h 2004-10-27 15:27:10.887629137 -0500
@@ -0,0 +1,32 @@
+/******************************************************************************
+*******************************************************************************
+void gfs_unlinked_dealloc(struct gfs_sbd *sdp);
+
+#endif /* __UNLINKED_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/util.c linux/fs/gfs/util.c
---- linux-2.6.9-rc1-mm3/fs/gfs/util.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/util.c 2004-09-07 16:26:15.800539579 -0500
-@@ -0,0 +1,324 @@
+diff -urN linux-orig/fs/gfs/util.c linux-patched/fs/gfs/util.c
+--- linux-orig/fs/gfs/util.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/util.c 2004-10-27 15:27:10.887629137 -0500
+@@ -0,0 +1,326 @@
+/******************************************************************************
+*******************************************************************************
+**
+{
+ gfs_in_panic = TRUE;
+
-+ printk("\nGFS: Assertion failed on line %d of file %s\n"
-+ "GFS: assertion: \"%s\"\n"
-+ "GFS: time = %lu\n",
-+ line, file, assertion, get_seconds());
-+
+ switch (type) {
-+ case GFS_ASSERT_TYPE_SBD:
-+ {
++ case GFS_ASSERT_TYPE_SBD: {
+ struct gfs_sbd *sdp = (struct gfs_sbd *)ptr;
-+ printk("GFS: fsid=%s\n", sdp->sd_fsname);
++ panic("GFS: Assertion failed on line %d of file %s\n"
++ "GFS: assertion: \"%s\"\n"
++ "GFS: time = %lu\n"
++ "GFS: fsid=%s\n",
++ line, file, assertion, get_seconds(),
++ sdp->sd_fsname);
+ }
-+ break;
+
-+ case GFS_ASSERT_TYPE_GLOCK:
-+ {
++ case GFS_ASSERT_TYPE_GLOCK: {
+ struct gfs_glock *gl = (struct gfs_glock *)ptr;
+ struct gfs_sbd *sdp = gl->gl_sbd;
-+ printk("GFS: fsid=%s: glock = (%u, %"PRIu64")\n",
-+ sdp->sd_fsname,
-+ gl->gl_name.ln_type,
-+ gl->gl_name.ln_number);
++ panic("GFS: Assertion failed on line %d of file %s\n"
++ "GFS: assertion: \"%s\"\n"
++ "GFS: time = %lu\n"
++ "GFS: fsid=%s: glock = (%u, %"PRIu64")\n",
++ line, file, assertion, get_seconds(),
++ sdp->sd_fsname,
++ gl->gl_name.ln_type,
++ gl->gl_name.ln_number);
+ }
-+ break;
+
-+ case GFS_ASSERT_TYPE_INODE:
-+ {
++ case GFS_ASSERT_TYPE_INODE: {
+ struct gfs_inode *ip = (struct gfs_inode *)ptr;
+ struct gfs_sbd *sdp = ip->i_sbd;
-+ printk("GFS: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n",
-+ sdp->sd_fsname,
-+ ip->i_num.no_formal_ino, ip->i_num.no_addr);
++ panic("GFS: Assertion failed on line %d of file %s\n"
++ "GFS: assertion: \"%s\"\n"
++ "GFS: time = %lu\n"
++ "GFS: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n",
++ line, file, assertion, get_seconds(),
++ sdp->sd_fsname,
++ ip->i_num.no_formal_ino, ip->i_num.no_addr);
+ }
-+ break;
+
-+ case GFS_ASSERT_TYPE_RGRPD:
-+ {
++ case GFS_ASSERT_TYPE_RGRPD: {
+ struct gfs_rgrpd *rgd = (struct gfs_rgrpd *)ptr;
+ struct gfs_sbd *sdp = rgd->rd_sbd;
-+ printk("GFS: fsid=%s: rgroup = %"PRIu64"\n",
-+ sdp->sd_fsname, rgd->rd_ri.ri_addr);
-+ }
-+ break;
++ panic("GFS: Assertion failed on line %d of file %s\n"
++ "GFS: assertion: \"%s\"\n"
++ "GFS: time = %lu\n"
++ "GFS: fsid=%s: RG = %"PRIu64"\n",
++ line, file, assertion, get_seconds(),
++ sdp->sd_fsname,
++ rgd->rd_ri.ri_addr);
+ }
+
-+ printk("\n");
-+#if 0
-+ printk("GFS: Record message above and reboot.\n");
-+ BUG();
-+#endif
-+ panic("GFS: Record message above and reboot.\n");
++ default:
++ panic("GFS: Assertion failed on line %d of file %s\n"
++ "GFS: assertion: \"%s\"\n"
++ "GFS: time = %lu\n",
++ line, file, assertion, get_seconds());
++ }
+}
+
+/**
+ char *file, unsigned int line)
+{
+ switch (type) {
-+ case GFS_IO_ERROR_TYPE_BH:
-+ {
++ case GFS_IO_ERROR_TYPE_BH: {
+ struct buffer_head *bh = (struct buffer_head *)ptr;
+ printk("GFS: fsid=%s: I/O error on block %"PRIu64"\n",
+ sdp->sd_fsname, (uint64_t)bh->b_blocknr);
++ break;
+ }
-+ break;
+
-+ case GFS_IO_ERROR_TYPE_INODE:
-+ {
++ case GFS_IO_ERROR_TYPE_INODE: {
+ struct gfs_inode *ip = (struct gfs_inode *)ptr;
+ printk("GFS: fsid=%s: I/O error in inode %"PRIu64"/%"PRIu64"\n",
+ sdp->sd_fsname,
+ ip->i_num.no_formal_ino, ip->i_num.no_addr);
++ break;
+ }
-+ break;
+
+ default:
-+ printk("GFS: fsid=%s: I/O error\n", sdp->sd_fsname);
-+ break;
++ printk("GFS: fsid=%s: I/O error\n", sdp->sd_fsname);
++ break;
+ }
+
+ GFS_ASSERT_SBD(FALSE, sdp,);
+ return p;
+}
+
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs/util.h linux/fs/gfs/util.h
---- linux-2.6.9-rc1-mm3/fs/gfs/util.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs/util.h 2004-09-07 16:26:15.801539359 -0500
+diff -urN linux-orig/fs/gfs/util.h linux-patched/fs/gfs/util.h
+--- linux-orig/fs/gfs/util.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs/util.h 2004-10-27 15:27:10.887629137 -0500
@@ -0,0 +1,156 @@
+/******************************************************************************
+*******************************************************************************
+#define GFS_ASSERT_TYPE_INODE (21)
+#define GFS_ASSERT_TYPE_RGRPD (22)
+
-+#define GFS_ASSERT(x, todo) \
++#define GFS_ASSERT(assertion, todo) \
+do \
+{ \
-+ if (!(x)) \
++ if (!(assertion)) \
+ { \
+ {todo} \
-+ gfs_assert_i(#x, GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); \
++ gfs_assert_i(#assertion, GFS_ASSERT_TYPE_NONE, NULL, __FILE__, __LINE__); \
+ } \
+} \
+while (0)
+
-+#define GFS_ASSERT_SBD(x, sdp, todo) \
++#define GFS_ASSERT_SBD(assertion, sdp, todo) \
+do \
+{ \
-+ if (!(x)) \
++ if (!(assertion)) \
+ { \
+ struct gfs_sbd *gfs_assert_sbd = (sdp); \
+ {todo} \
-+ gfs_assert_i(#x, GFS_ASSERT_TYPE_SBD, gfs_assert_sbd, __FILE__, __LINE__); \
++ gfs_assert_i(#assertion, GFS_ASSERT_TYPE_SBD, gfs_assert_sbd, __FILE__, __LINE__); \
+ } \
+} \
+while (0)
+
-+#define GFS_ASSERT_GLOCK(x, gl, todo) \
++#define GFS_ASSERT_GLOCK(assertion, gl, todo) \
+do \
+{ \
-+ if (!(x)) \
++ if (!(assertion)) \
+ { \
+ struct gfs_glock *gfs_assert_glock = (gl); \
+ {todo} \
-+ gfs_assert_i(#x, GFS_ASSERT_TYPE_GLOCK, gfs_assert_glock, __FILE__, __LINE__); \
++ gfs_assert_i(#assertion, GFS_ASSERT_TYPE_GLOCK, gfs_assert_glock, __FILE__, __LINE__); \
+ } \
+} \
+while (0)
+
-+#define GFS_ASSERT_INODE(x, ip, todo) \
++#define GFS_ASSERT_INODE(assertion, ip, todo) \
+do \
+{ \
-+ if (!(x)) \
++ if (!(assertion)) \
+ { \
+ struct gfs_inode *gfs_assert_inode = (ip); \
+ {todo} \
-+ gfs_assert_i(#x, GFS_ASSERT_TYPE_INODE, gfs_assert_inode, __FILE__, __LINE__); \
++ gfs_assert_i(#assertion, GFS_ASSERT_TYPE_INODE, gfs_assert_inode, __FILE__, __LINE__); \
+ } \
+} \
+while (0)
+
-+#define GFS_ASSERT_RGRPD(x, rgd, todo) \
++#define GFS_ASSERT_RGRPD(assertion, rgd, todo) \
+do \
+{ \
-+ if (!(x)) \
++ if (!(assertion)) \
+ { \
+ struct gfs_rgrpd *gfs_assert_rgrpd = (rgd); \
+ {todo} \
-+ gfs_assert_i(#x, GFS_ASSERT_TYPE_RGRPD, gfs_assert_rgrpd, __FILE__, __LINE__); \
++ gfs_assert_i(#assertion, GFS_ASSERT_TYPE_RGRPD, gfs_assert_rgrpd, __FILE__, __LINE__); \
+ } \
+} \
+while (0)
+
+
+#endif /* __UTIL_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/include/linux/gfs_ioctl.h linux/include/linux/gfs_ioctl.h
---- linux-2.6.9-rc1-mm3/include/linux/gfs_ioctl.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/include/linux/gfs_ioctl.h 2004-09-07 16:26:15.801539359 -0500
-@@ -0,0 +1,219 @@
+diff -urN linux-orig/include/linux/gfs_ioctl.h linux-patched/include/linux/gfs_ioctl.h
+--- linux-orig/include/linux/gfs_ioctl.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/linux/gfs_ioctl.h 2004-10-27 15:27:10.880630761 -0500
+@@ -0,0 +1,183 @@
+/******************************************************************************
+*******************************************************************************
+**
+ Ioctls implemented
+
+ Reserved Ioctls: 3, 7, 8, 9, 10, 4, 13
-+ Next Ioctl: 44
++ Next Ioctl: 45
+ */
+
+#define GFS_STACK_PRINT _GFSC_(40)
+
+#define GFS_STATGFS _GFSC_(12)
+
-+#define GFS_FREEZE _GFSC_(14)
-+#define GFS_UNFREEZE _GFSC_(15)
-+
+#define GFS_RECLAIM_METADATA _GFSC_(16)
+
+#define GFS_QUOTA_SYNC _GFSC_(17)
+#define GFS_GET_TUNE _GFSC_(21)
+#define GFS_SET_TUNE _GFSC_(22)
+
-+#define GFS_EATTR_GET _GFSC_(26)
-+#define GFS_EATTR_SET _GFSC_(27)
-+
+#define GFS_WHERE_ARE_YOU _GFSC_(35)
++#define GFS_COOKIE _GFSC_(44)
+
+#define GFS_SET_FLAG _GFSC_(36)
+#define GFS_CLEAR_FLAG _GFSC_(37)
+ * You can tune a filesystem, but you can't tune a yak.
+ */
+
-+#define GFS_TUNE_VERSION ((GFS_IOCTL_VERSION << 16) | (138))
++#define GFS_TUNE_VERSION ((GFS_IOCTL_VERSION << 16) | (139))
+
+struct gfs_tune {
+ unsigned int gt_tune_version;
+ unsigned int gt_incore_log_blocks;
+ unsigned int gt_jindex_refresh_secs;
+ unsigned int gt_depend_secs;
-+ unsigned int gt_scand_secs;
-+ unsigned int gt_recoverd_secs;
-+ unsigned int gt_logd_secs;
-+ unsigned int gt_quotad_secs;
-+ unsigned int gt_inoded_secs;
-+ unsigned int gt_quota_simul_sync;
-+ unsigned int gt_quota_warn_period;
++
++ /* how often various daemons run (seconds) */
++ unsigned int gt_scand_secs; /* find unused glocks and inodes */
++ unsigned int gt_recoverd_secs; /* recover journal of crashed node */
++ unsigned int gt_logd_secs; /* update log tail as AIL flushes */
++ unsigned int gt_quotad_secs; /* sync changes to quota file, clean */
++ unsigned int gt_inoded_secs; /* toss unused inodes */
++
++ unsigned int gt_quota_simul_sync; /* max # quotavals to sync at once */
++ unsigned int gt_quota_warn_period; /* secs between quota warn msgs */
+ unsigned int gt_atime_quantum;
-+ unsigned int gt_quota_quantum;
-+ unsigned int gt_quota_scale_num;
-+ unsigned int gt_quota_scale_den;
++ unsigned int gt_quota_quantum; /* secs between syncs to quota file */
++ unsigned int gt_quota_scale_num; /* numerator */
++ unsigned int gt_quota_scale_den; /* denominator */
+ unsigned int gt_quota_enforce;
+ unsigned int gt_quota_account;
+ unsigned int gt_new_files_jdata;
+ unsigned int gt_prefetch_secs;
+ unsigned int gt_statfs_slots;
+ unsigned int gt_max_mhc;
-+};
-+
-+/*
-+ * Extended Attribute Ioctl structures
-+ *
-+ * Note: The name_len does not include a null character.
-+ *
-+ * Getting and setting EAs return the following errors that aren't
-+ * what they seem
-+ *
-+ * ENODATA - No such extended attribute
-+ * ERANGE - Extended attribute data is too large for the buffer
-+ * ENOSPC - No space left for extended attributes
-+ * EEXIST - Extended attribute already exists
-+ */
-+
-+#define GFS_EACMD_SET (0)
-+#define GFS_EACMD_CREATE (1)
-+#define GFS_EACMD_REPLACE (2)
-+#define GFS_EACMD_REMOVE (3)
-+
-+struct gfs_eaget_io {
-+ char *eg_data;
-+ char *eg_name;
-+ char *eg_len;
-+ uint32_t eg_data_len;
-+ uint8_t eg_name_len;
-+ uint8_t eg_type; /* GFS_EATYPE_... */
-+};
-+
-+struct gfs_easet_io {
-+ const char *es_data;
-+ char *es_name;
-+ uint16_t es_data_len;
-+ uint8_t es_name_len; /* not counting the NULL */
-+ uint8_t es_cmd; /* GFS_EACMD_... */
-+ uint8_t es_type; /* GFS_EATYPE_... */
++ unsigned int gt_greedy_default;
++ unsigned int gt_greedy_quantum;
++ unsigned int gt_greedy_max;
+};
+
+#define GFS_GLOCKD_DEFAULT (1)
+
+ unsigned int ar_num_glockd;
+
-+ int ar_posixacls; /* Enable posix acls */
++ int ar_posix_acls; /* Enable posix acls */
+ int ar_suiddir; /* suiddir support */
+};
+
+#endif /* ___GFS_IOCTL_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/include/linux/gfs_ondisk.h linux/include/linux/gfs_ondisk.h
---- linux-2.6.9-rc1-mm3/include/linux/gfs_ondisk.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/include/linux/gfs_ondisk.h 2004-09-07 16:26:15.804538697 -0500
-@@ -0,0 +1,1720 @@
+diff -urN linux-orig/include/linux/gfs_ondisk.h linux-patched/include/linux/gfs_ondisk.h
+--- linux-orig/include/linux/gfs_ondisk.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/linux/gfs_ondisk.h 2004-10-27 15:27:10.880630761 -0500
+@@ -0,0 +1,1793 @@
+/******************************************************************************
+*******************************************************************************
+**
+******************************************************************************/
+
+/*
-+* NOTE:
-+* If you add 8 byte fields to these structures, they must be 8 byte
-+* aligned. 4 byte field must be 4 byte aligned, etc...
-+*
-+* All structures must be a multiple of 8 bytes long.
-+*
-+* GRIPES:
-+* We should have forgetten about supporting 512B FS block sizes
-+* and made the di_reserved field in the struct gfs_dinode structure
-+* much bigger.
-+*
-+* de_rec_len in struct gfs_dirent should really have been a 32-bit value
-+* as it now limits us to a 64k FS block size (with the current code
-+* in dir.c).
-+*/
++ * On-disk structures.
++ *
++ * NOTE:
++ * If you add 8 byte fields to these structures, they must be 8 byte
++ * aligned. 4 byte field must be 4 byte aligned, etc...
++ *
++ * All structures must be a multiple of 8 bytes long.
++ *
++ * GRIPES:
++ * We should have forgetten about supporting 512B FS block sizes
++ * and made the di_reserved field in the struct gfs_dinode structure
++ * much bigger.
++ *
++ * de_rec_len in struct gfs_dirent should really have been a 32-bit value
++ * as it now limits us to a 64k FS block size (with the current code
++ * in dir.c).
++ */
+
+#ifndef __GFS_ONDISK_DOT_H__
+#define __GFS_ONDISK_DOT_H__
+
-+#define GFS_MAGIC (0x01161970)
-+#define GFS_BASIC_BLOCK (512)
++#define GFS_MAGIC (0x01161970) /* for all on-disk headers */
++#define GFS_BASIC_BLOCK (512) /* "basic block" = "sector" = 512B */
+#define GFS_BASIC_BLOCK_SHIFT (9)
-+#define GFS_DUMPS_PER_LOG (4)
-+
-+/* Lock numbers of the LM_TYPE_NONDISK type */
-+
-+#define GFS_MOUNT_LOCK (0)
-+#define GFS_LIVE_LOCK (1)
-+#define GFS_TRANS_LOCK (2)
-+#define GFS_RENAME_LOCK (3)
-+
-+/* Format numbers for various metadata types */
-+
-+#define GFS_FORMAT_SB (100)
-+#define GFS_FORMAT_RG (200)
-+#define GFS_FORMAT_RB (300)
-+#define GFS_FORMAT_DI (400)
-+#define GFS_FORMAT_IN (500)
-+#define GFS_FORMAT_LF (600)
-+#define GFS_FORMAT_JD (700)
-+#define GFS_FORMAT_LH (800)
-+#define GFS_FORMAT_LD (900)
-+/* These don't have actual struct gfs_meta_header structures to go with them */
-+#define GFS_FORMAT_JI (1000)
-+#define GFS_FORMAT_RI (1100)
-+#define GFS_FORMAT_DE (1200)
-+#define GFS_FORMAT_QU (1500)
-+#define GFS_FORMAT_EA (1600)
-+/* These are part of the superblock */
-+#define GFS_FORMAT_FS (1309)
-+#define GFS_FORMAT_MULTI (1401)
++
++/* Controls how much data can be logged in-core before dumping log to disk */
++
++#define GFS_DUMPS_PER_LOG (4) /* 1/4 of on-disk journal size*/
++
++/* Lock numbers of the LM_TYPE_NONDISK type. These protect certain
++ * cluster-wide operations (rather than on-disk entities).
++ * Currently, the LIVE lock is not used for any real purpose. */
++
++#define GFS_MOUNT_LOCK (0) /* only one node can Mount at a time */
++#define GFS_LIVE_LOCK (1) /* shared by all mounted nodes */
++#define GFS_TRANS_LOCK (2) /* Transaction, protects jrnl recovery */
++#define GFS_RENAME_LOCK (3) /* only one node can Rename at a time */
++
++/* On-disk format (version) numbers for various metadata types,
++ * used in gfs_meta_header */
++
++#define GFS_FORMAT_SB (100) /* Super-Block */
++#define GFS_FORMAT_RG (200) /* Resource Group Header */
++#define GFS_FORMAT_RB (300) /* Resource Group Block Alloc BitBlock */
++#define GFS_FORMAT_DI (400) /* "Disk" inode (dinode) */
++#define GFS_FORMAT_IN (500) /* Indirect dinode block list */
++#define GFS_FORMAT_LF (600) /* Leaf dinode block list */
++#define GFS_FORMAT_JD (700) /* Journal Data */
++#define GFS_FORMAT_LH (800) /* Log Header */
++#define GFS_FORMAT_LD (900) /* Log Descriptor */
++/* These don't have actual struct gfs_meta_header structures to go with them */
++#define GFS_FORMAT_JI (1000) /* Journal Index */
++#define GFS_FORMAT_RI (1100) /* Resource Group Index */
++#define GFS_FORMAT_DE (1200) /* Directory Entry */
++#define GFS_FORMAT_QU (1500) /* Quota */
++#define GFS_FORMAT_EA (1600) /* Extended Attribute */
++#define GFS_FORMAT_ED (1700) /* Extended Attribute data */
++/* These version #s are embedded in the superblock */
++#define GFS_FORMAT_FS (1309) /* Filesystem (all-encompassing) */
++#define GFS_FORMAT_MULTI (1401) /* Multi-Host */
+
+/*
+ * An on-disk inode number
++ * Initially, the on-disk block address of the inode block is assigned as the
++ * formal (permanent) ID as well. Block address can change (to move inode
++ * on-disk), but formal ID must stay unchanged once assigned.
+ */
+
+#define gfs_inum_equal(ino1, ino2) \
+ ((ino1)->no_addr == (ino2)->no_addr))
+
+struct gfs_inum {
-+ uint64_t no_formal_ino;
-+ uint64_t no_addr;
++ uint64_t no_formal_ino; /* inode identifier */
++ uint64_t no_addr; /* block # of dinode block */
+};
+
+/*
+ * Generic metadata head structure
+ *
-+ * Every inplace buffer logged in the journal must start with this.
++ * Every inplace buffer logged in the journal must start
++ * with a struct gfs_meta_header.
+ */
+
+#define GFS_METATYPE_NONE (0)
-+#define GFS_METATYPE_SB (1)
-+#define GFS_METATYPE_RG (2)
-+#define GFS_METATYPE_RB (3)
-+#define GFS_METATYPE_DI (4)
-+#define GFS_METATYPE_IN (5)
-+#define GFS_METATYPE_LF (6)
-+#define GFS_METATYPE_JD (7)
-+#define GFS_METATYPE_LH (8)
-+#define GFS_METATYPE_LD (9)
-+#define GFS_METATYPE_EA (10)
-+
-+#define GFS_META_CLUMP (64)
++#define GFS_METATYPE_SB (1) /* Super-Block */
++#define GFS_METATYPE_RG (2) /* Resource Group Header */
++#define GFS_METATYPE_RB (3) /* Resource Group Block Alloc BitBlock */
++#define GFS_METATYPE_DI (4) /* "Disk" inode (dinode) */
++#define GFS_METATYPE_IN (5) /* Indirect dinode block list */
++#define GFS_METATYPE_LF (6) /* Leaf dinode block list */
++#define GFS_METATYPE_JD (7) /* Journal Data */
++#define GFS_METATYPE_LH (8) /* Log Header (gfs_log_header) */
++#define GFS_METATYPE_LD (9) /* Log Descriptor (gfs_log_descriptor) */
++#define GFS_METATYPE_EA (10) /* Extended Attribute */
++#define GFS_METATYPE_ED (11) /* Extended Attribute data */
++
++#define GFS_META_CLUMP (64) /* # blocks to convert fm data to meta */
+
+struct gfs_meta_header {
-+ uint32_t mh_magic; /* Magic number */
-+ uint32_t mh_type; /* GFS_METATYPE_XX */
-+ uint64_t mh_generation; /* Generation number */
-+ uint32_t mh_format; /* GFS_FORMAT_XX */
-+ uint32_t mh_incarn;
++ uint32_t mh_magic; /* GFS_MAGIC sanity check magic number */
++ uint32_t mh_type; /* GFS_METATYPE_XX type of metadata block */
++ uint64_t mh_generation; /* increment before writing to journal */
++ uint32_t mh_format; /* GFS_FORMAT_XX (version # for this type) */
++ uint32_t mh_incarn; /* increment when marking dinode "unused" */
+};
+
+/*
+ * super-block structure
+ *
-+ * It's probably good if SIZEOF_SB <= GFS_BASIC_BLOCK
++ * One of these is at beginning of filesystem.
++ * It's probably good if SIZEOF_SB <= GFS_BASIC_BLOCK (512 bytes)
+ */
+
-+/* Address of SuperBlock in GFS basic blocks */
++/* Address of SuperBlock in GFS basic blocks. 1st 64K of filesystem is empty
++ for safety against getting clobbered by wayward volume managers, etc.
++ 64k was chosen because it's the largest GFS-supported fs block size. */
+#define GFS_SB_ADDR (128)
++
+/* The lock number for the superblock (must be zero) */
+#define GFS_SB_LOCK (0)
+#define GFS_CRAP_LOCK (1)
+#define GFS_LOCKNAME_LEN (64)
+
+struct gfs_sb {
-+ /* Order is important */
++ /* Order is important; need to be able to read old superblocks
++ in order to support on-disk version upgrades */
+ struct gfs_meta_header sb_header;
+
-+ uint32_t sb_fs_format;
-+ uint32_t sb_multihost_format;
-+ uint32_t sb_flags;
++ uint32_t sb_fs_format; /* GFS_FORMAT_FS (on-disk version) */
++ uint32_t sb_multihost_format; /* GFS_FORMAT_MULTI */
++ uint32_t sb_flags; /* ?? */
+
-+ /* Important information */
-+ uint32_t sb_bsize; /* fundamental fs block size in bytes */
-+ uint32_t sb_bsize_shift; /* log2(sb_bsize) */
-+ uint32_t sb_seg_size; /* Journal segment size in FS blocks */
++ uint32_t sb_bsize; /* fundamental FS block size in bytes */
++ uint32_t sb_bsize_shift; /* log2(sb_bsize) */
++ uint32_t sb_seg_size; /* Journal segment size in FS blocks */
+
-+ struct gfs_inum sb_jindex_di; /* journal index inode number (GFS_SB_LOCK) */
-+ struct gfs_inum sb_rindex_di; /* resource index inode number (GFS_SB_LOCK) */
-+ struct gfs_inum sb_root_di; /* root directory inode number (GFS_ROOT_LOCK) */
++ /* These special inodes do not appear in any on-disk directory. */
++ struct gfs_inum sb_jindex_di; /* journal index inode */
++ struct gfs_inum sb_rindex_di; /* resource group index inode */
++ struct gfs_inum sb_root_di; /* root directory inode */
+
-+ char sb_lockproto[GFS_LOCKNAME_LEN]; /* Type of locking this FS uses */
-+ char sb_locktable[GFS_LOCKNAME_LEN]; /* Name of lock table for this FS */
++ /* Default inter-node locking protocol (lock module) and namespace */
++ char sb_lockproto[GFS_LOCKNAME_LEN]; /* lock protocol name */
++ char sb_locktable[GFS_LOCKNAME_LEN]; /* unique name for this FS */
+
-+ struct gfs_inum sb_quota_di;
-+ struct gfs_inum sb_license_di;
++ /* More special inodes */
++ struct gfs_inum sb_quota_di; /* quota inode */
++ struct gfs_inum sb_license_di; /* license inode */
+
+ char sb_reserved[96];
+};
+
+/*
+ * journal index structure
++ *
++ * One for each journal used by the filesystem.
++ * These descriptors are packed contiguously within the jindex inode (file).
+ */
+
+struct gfs_jindex {
-+ uint64_t ji_addr; /* starting block of the journal */
-+ uint32_t ji_nsegment; /* number of segments in journal */
++ uint64_t ji_addr; /* starting block of the journal */
++ uint32_t ji_nsegment; /* number of segments in journal */
+ uint32_t ji_pad;
+
+ char ji_reserved[64];
+
+/*
+ * resource index structure
++ *
++ * One of these for each resource group in the filesystem.
++ * These descriptors are packed contiguously within the rindex inode (file).
+ */
+
+struct gfs_rindex {
-+ uint64_t ri_addr; /* rgrp block disk address */
-+ uint32_t ri_length; /* length of rgrp header in fs blocks */
++ uint64_t ri_addr; /* block # of 1st block (header) in rgrp */
++ uint32_t ri_length; /* # fs blocks containing rgrp header & bitmap */
+ uint32_t ri_pad;
+
-+ uint64_t ri_data1; /* first data location */
-+ uint32_t ri_data; /* num of data blocks in rgrp */
++ uint64_t ri_data1; /* block # of first data block in rgrp */
++ uint32_t ri_data; /* number of data blocks in rgrp */
+
-+ uint32_t ri_bitbytes; /* number of bytes in data bitmaps */
++ uint32_t ri_bitbytes; /* total # bytes used by block alloc bitmap */
+
+ char ri_reserved[64];
+};
+/*
+ * resource group header structure
+ *
-+ */
-+
-+/* Number of blocks per byte in rgrp */
-+#define GFS_NBBY (4)
++ * One of these at beginning of the first block of an rgrp,
++ * followed by block alloc bitmap data in remainder of first block.
++ * Each resource group contains:
++ * Header block, including block allocation statistics (struct gfs_rgrp)
++ * and first part of block alloc bitmap.
++ * Bitmap block(s), continuing block alloc bitmap started in header block.
++ * Data blocks, containing file data and metadata.
++ *
++ * In older versions, now-unused (but previously allocated) dinodes were
++ * saved for re-use in an on-disk linked list (chain). This is no longer
++ * done, but support still exists for reclaiming dinodes from this list,
++ * to support upgrades from older on-disk formats.
++ */
++
++/* Each data block within rgrp is represented by 2 bits in the alloc bitmap */
++#define GFS_NBBY (4) /* # blocks represented by 1 bitmap byte */
+#define GFS_BIT_SIZE (2)
+#define GFS_BIT_MASK (0x00000003)
+
++/* 4 possible block allocation states */
+#define GFS_BLKST_FREE (0)
+#define GFS_BLKST_USED (1)
+#define GFS_BLKST_FREEMETA (2)
+struct gfs_rgrp {
+ struct gfs_meta_header rg_header;
+
-+ uint32_t rg_flags; /* flags */
++ uint32_t rg_flags; /* ?? */
+
-+ uint32_t rg_free; /* number of free data blocks */
++ uint32_t rg_free; /* number of free data blocks */
+
-+ uint32_t rg_useddi; /* number of dinodes */
-+ uint32_t rg_freedi; /* number of unused dinodes */
-+ struct gfs_inum rg_freedi_list; /* list of free dinodes */
++ /* dinodes are USEDMETA, but are handled separately from other METAs */
++ uint32_t rg_useddi; /* number of dinodes (used or free) */
++ uint32_t rg_freedi; /* number of unused (free) dinodes */
++ struct gfs_inum rg_freedi_list; /* 1st block in chain of free dinodes */
+
-+ uint32_t rg_usedmeta; /* number of used metadata blocks (not including dinodes) */
-+ uint32_t rg_freemeta; /* number of unused metadata blocks */
++ /* these META statistics do not include dinodes (used or free) */
++ uint32_t rg_usedmeta; /* number of used metadata blocks */
++ uint32_t rg_freemeta; /* number of unused metadata blocks */
+
+ char rg_reserved[64];
+};
+
+/*
-+ * Quota Structures
++ * quota structure
+ */
+
+struct gfs_quota {
+};
+
+/*
-+ * dinode structure
++ * dinode (disk inode) structure
++ * The ondisk representation of inodes
++ * One for each file, directory, etc.
++ * GFS does not put more than one inode in a single block.
++ * The inode may be "stuffed", carrying file data along with metadata,
++ * if the file data is small enough.
++ * Otherwise, the inode block contains pointers to other blocks that contain
++ * either file data or other pointers to other blocks (indirect addressing
++ * via a metadata tree).
+ */
+
+#define GFS_MAX_META_HEIGHT (10)
+
+/* Dinode types */
+#define GFS_FILE_NON (0)
-+#define GFS_FILE_REG (1)
-+#define GFS_FILE_DIR (2)
-+#define GFS_FILE_LNK (5)
-+#define GFS_FILE_BLK (7)
-+#define GFS_FILE_CHR (8)
-+#define GFS_FILE_FIFO (101)
-+#define GFS_FILE_SOCK (102)
++#define GFS_FILE_REG (1) /* regular file */
++#define GFS_FILE_DIR (2) /* directory */
++#define GFS_FILE_LNK (5) /* link */
++#define GFS_FILE_BLK (7) /* block device node */
++#define GFS_FILE_CHR (8) /* character device node */
++#define GFS_FILE_FIFO (101) /* fifo/pipe */
++#define GFS_FILE_SOCK (102) /* socket */
+
+/* Dinode flags */
-+#define GFS_DIF_JDATA (0x00000001)
-+#define GFS_DIF_EXHASH (0x00000002)
-+#define GFS_DIF_UNUSED (0x00000004)
-+#define GFS_DIF_EA_INDIRECT (0x00000008)
-+#define GFS_DIF_DIRECTIO (0x00000010)
-+#define GFS_DIF_IMMUTABLE (0x00000020)
-+#define GFS_DIF_APPENDONLY (0x00000040)
-+#define GFS_DIF_NOATIME (0x00000080)
-+#define GFS_DIF_SYNC (0x00000100)
-+#define GFS_DIF_INHERIT_DIRECTIO (0x40000000)
-+#define GFS_DIF_INHERIT_JDATA (0x80000000)
++#define GFS_DIF_JDATA (0x00000001) /* journal this (meta)data blk */
++#define GFS_DIF_EXHASH (0x00000002) /* hashed directory */
++#define GFS_DIF_UNUSED (0x00000004) /* unused dinode */
++#define GFS_DIF_EA_INDIRECT (0x00000008) /* extended attribute, indirect*/
++#define GFS_DIF_DIRECTIO (0x00000010)
++#define GFS_DIF_IMMUTABLE (0x00000020)
++#define GFS_DIF_APPENDONLY (0x00000040)
++#define GFS_DIF_NOATIME (0x00000080)
++#define GFS_DIF_SYNC (0x00000100)
++#define GFS_DIF_INHERIT_DIRECTIO (0x40000000)
++#define GFS_DIF_INHERIT_JDATA (0x80000000)
+
+struct gfs_dinode {
+ struct gfs_meta_header di_header;
+
-+ struct gfs_inum di_num;
++ struct gfs_inum di_num; /* formal inode # and block address */
+
+ uint32_t di_mode; /* mode of file */
+ uint32_t di_uid; /* owner's user id */
+ int64_t di_atime; /* time last accessed */
+ int64_t di_mtime; /* time last modified */
+ int64_t di_ctime; /* time last changed */
++
++ /* Non-zero only for character or block device nodes */
+ uint32_t di_major; /* device major number */
+ uint32_t di_minor; /* device minor number */
+
++ /* Block allocation strategy */
+ uint64_t di_rgrp; /* dinode rgrp block number */
+ uint64_t di_goal_rgrp; /* rgrp to alloc from next */
+ uint32_t di_goal_dblk; /* data block goal */
+ uint32_t di_goal_mblk; /* metadata block goal */
-+ uint32_t di_flags; /* flags */
-+ uint32_t di_payload_format; /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */
-+ uint16_t di_type; /* type of file */
-+ uint16_t di_height; /* height of metadata */
-+ uint32_t di_incarn; /* incarnation number */
++
++ uint32_t di_flags; /* GFS_DIF_... */
++
++ /* struct gfs_rindex, struct gfs_jindex, or struct gfs_dirent */
++ uint32_t di_payload_format; /* GFS_FORMAT_... */
++ uint16_t di_type; /* GFS_FILE_... type of file */
++ uint16_t di_height; /* height of metadata (0 == stuffed) */
++ uint32_t di_incarn; /* incarnation number (unused) */
+ uint16_t di_pad;
+
+ /* These only apply to directories */
+ uint16_t di_depth; /* Number of bits in the table */
+ uint32_t di_entries; /* The number of entries in the directory */
+
-+ /* This only applies to unused inodes */
-+ struct gfs_inum di_next_unused;
++ /* This list formed a chain of unused inodes */
++ struct gfs_inum di_next_unused; /* used in old versions only */
+
+ uint64_t di_eattr; /* extended attribute block number */
+
+
+/*
+ * indirect block header
++ *
++ * A component of a dinode's indirect addressing metadata tree.
++ * These are pointed to by pointers in dinodes or other indirect blocks.
+ */
+
+struct gfs_indirect {
+#define GFS_DIRENT_SIZE(name_len) ((sizeof(struct gfs_dirent) + (name_len) + 7) & ~7)
+
+struct gfs_dirent {
-+ struct gfs_inum de_inum; /* Inode number */
-+ uint32_t de_hash; /* hash of the filename */
-+ uint16_t de_rec_len; /* the length of the dirent */
-+ uint16_t de_name_len; /* the length of the name */
-+ uint16_t de_type; /* type of dinode this points to */
++ struct gfs_inum de_inum; /* formal inode number and block address */
++ uint32_t de_hash; /* hash of the filename */
++ uint16_t de_rec_len; /* the length of the dirent */
++ uint16_t de_name_len; /* the length of the name */
++ uint16_t de_type; /* type of dinode this points to */
+
+ char de_reserved[14];
+};
+struct gfs_leaf {
+ struct gfs_meta_header lf_header;
+
-+ uint16_t lf_depth; /* Depth of leaf */
-+ uint16_t lf_entries; /* Number of dirents in leaf */
-+ uint32_t lf_dirent_format; /* Format of the dirents */
-+ uint64_t lf_next; /* Next leaf, if overflow */
++ uint16_t lf_depth; /* Depth of leaf */
++ uint16_t lf_entries; /* Number of dirents in leaf */
++ uint32_t lf_dirent_format; /* GFS_FORMAT_DE (version #) */
++ uint64_t lf_next; /* Next leaf, if overflow */
+
+ char lf_reserved[64];
+};
+
+/*
+ * Log header structure
++ *
++ * Two of these are in the first block of a transaction log:
++ * 1) at beginning of block
++ * 2) at end of first 512-byte sector within block
+ */
+
-+#define GFS_LOG_HEAD_UNMOUNT (0x00000001)
++#define GFS_LOG_HEAD_UNMOUNT (0x00000001) /* log is clean, can unmount fs */
+
+struct gfs_log_header {
+ struct gfs_meta_header lh_header;
+
-+ uint32_t lh_flags; /* Flags */
++ uint32_t lh_flags; /* GFS_LOG_HEAD_... */
+ uint32_t lh_pad;
+
+ uint64_t lh_first; /* Block number of first header in this trans */
+
+/*
+ * Log type descriptor
++ *
++ * One of these for each chunk in a transaction
+ */
+
-+#define GFS_LOG_DESC_METADATA (300)
++#define GFS_LOG_DESC_METADATA (300) /* metadata */
+/* ld_data1 is the number of metadata blocks in the descriptor.
+ ld_data2 is unused.
+ */
+
-+#define GFS_LOG_DESC_IUL (400)
++#define GFS_LOG_DESC_IUL (400) /* unlinked inode */
+/* ld_data1 is TRUE if this is a dump.
+ ld_data2 is unused.
+ FixMe!!! ld_data1 should be the number of entries.
+ ld_data2 should be "TRUE if this is a dump".
+ */
+
-+#define GFS_LOG_DESC_IDA (401)
++#define GFS_LOG_DESC_IDA (401) /* de-allocated inode */
+/* ld_data1 is unused.
+ ld_data2 is unused.
+ FixMe!!! ld_data1 should be the number of entries.
+ */
+
-+#define GFS_LOG_DESC_Q (402)
++#define GFS_LOG_DESC_Q (402) /* quota */
+/* ld_data1 is the number of quota changes in the descriptor.
+ ld_data2 is TRUE if this is a dump.
+ */
+
-+#define GFS_LOG_DESC_LAST (500)
++#define GFS_LOG_DESC_LAST (500) /* final in a logged transaction */
+/* ld_data1 is unused.
+ ld_data2 is unused.
+ */
+struct gfs_log_descriptor {
+ struct gfs_meta_header ld_header;
+
-+ uint32_t ld_type; /* Type of data in this log chunk */
++ uint32_t ld_type; /* GFS_LOG_DESC_... Type of this log chunk */
+ uint32_t ld_length; /* Number of buffers in this chunk */
-+ uint32_t ld_data1; /* descriptor specific field */
-+ uint32_t ld_data2; /* descriptor specific field */
++ uint32_t ld_data1; /* descriptor-specific field */
++ uint32_t ld_data2; /* descriptor-specific field */
+
+ char ld_reserved[64];
+};
+
+/*
+ * Metadata block tags
++ *
++ * One for each logged block. Tells where block really belongs on-disk.
++ * These descriptor tags are packed contiguously after a gfs_log_descriptor.
+ */
+
+struct gfs_block_tag {
+ uint64_t bt_blkno; /* inplace block number */
-+ uint32_t bt_flags; /* flags */
++ uint32_t bt_flags; /* ?? */
+ uint32_t bt_pad;
+};
+
+
+struct gfs_quota_tag {
+ int64_t qt_change;
-+ uint32_t qt_flags;
++ uint32_t qt_flags; /* GFS_QTF_... */
+ uint32_t qt_id;
+};
+
+ */
+
+#define GFS_EA_MAX_NAME_LEN (255)
-+#define GFS_EA_MAX_DATA_LEN (65535)
-+
-+#define GFS_EATYPE_LAST (2)
++#define GFS_EA_MAX_DATA_LEN (65536)
+
+#define GFS_EATYPE_UNUSED (0)
-+#define GFS_EATYPE_USR (1)
-+#define GFS_EATYPE_SYS (2)
-+#define GFS_EATYPE_VALID(x) ((x) && (x) <= GFS_EATYPE_LAST) /* this is only
-+ for requests */
++#define GFS_EATYPE_USR (1) /* user attribute */
++#define GFS_EATYPE_SYS (2) /* system attribute */
++
++#define GFS_EATYPE_LAST (2)
++#define GFS_EATYPE_VALID(x) ((x) <= GFS_EATYPE_LAST)
+
+#define GFS_EAFLAG_LAST (0x01) /* last ea in block */
+
+struct gfs_ea_header {
-+ uint32_t ea_rec_len;
-+ uint32_t ea_data_len;
-+ uint8_t ea_name_len; /* no NULL pointer after the string */
-+ uint8_t ea_type; /* GFS_EATYPE_... */
-+ uint8_t ea_flags;
-+ uint8_t ea_num_ptrs;
++ uint32_t ea_rec_len; /* total record length: hdr + name + data */
++ uint32_t ea_data_len; /* data length, in bytes */
++ uint8_t ea_name_len; /* no NULL pointer after the string */
++ uint8_t ea_type; /* GFS_EATYPE_... */
++ uint8_t ea_flags; /* GFS_EAFLAG_... */
++ uint8_t ea_num_ptrs; /* # fs blocks needed for EA */
+ uint32_t ea_pad;
+};
+
+void gfs_desc_print(struct gfs_log_descriptor *desc);
+void gfs_block_tag_print(struct gfs_block_tag *tag);
+void gfs_quota_tag_print(struct gfs_quota_tag *tag);
-+void gfs_ea_header_print(struct gfs_ea_header *tag);
++void gfs_ea_header_print(struct gfs_ea_header *ea, char *name);
+
+/* The hash function for ExHash directories */
+
+ */
+
+void
-+gfs_ea_header_print(struct gfs_ea_header *ea)
++gfs_ea_header_print(struct gfs_ea_header *ea, char *name)
+{
++ char buf[GFS_EA_MAX_NAME_LEN + 1];
++
+ pv(ea, ea_rec_len, "%u");
+ pv(ea, ea_data_len, "%u");
+ pv(ea, ea_name_len, "%u");
+ pv(ea, ea_flags, "%u");
+ pv(ea, ea_num_ptrs, "%u");
+ pv(ea, ea_pad, "%u");
++
++ memset(buf, 0, GFS_EA_MAX_NAME_LEN + 1);
++ memcpy(buf, name, ea->ea_name_len);
++ printk(" name = %s\n", buf);
+}
+
+static const uint32_t crc_32_tab[] =
+#endif /* WANT_GFS_CONVERSION_FUNCTIONS */
+
diff -urN linux-orig/fs/gfs_locking/lock_dlm/group.c linux-patched/fs/gfs_locking/lock_dlm/group.c
---- linux-orig/fs/gfs_locking/lock_dlm/group.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/group.c 2004-08-27 12:42:19.597199439 -0500
-@@ -0,0 +1,814 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/group.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/group.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,818 @@
+/******************************************************************************
+*******************************************************************************
+**
+ if (error)
+ goto fail;
+
-+ error = lm_dlm_hold_lvb(lock, &lvb);
++ lp = (dlm_lock_t *) lock;
++
++ error = dlm_add_lvb(lp);
+ if (error)
+ goto fail_put;
+
-+ lp = (dlm_lock_t *) lock;
++ lvb = lp->lvb;
+ set_bit(LFL_INLOCK, &lp->flags);
+ set_bit(LFL_NOBAST, &lp->flags);
+
+ goto retry;
+ }
+ if (error)
-+ goto fail_unhold;
++ goto fail_lvb;
+
+ memcpy(&beval, lvb, sizeof(beval));
+ exist_val = be32_to_cpu(beval);
+ * This id is already used. It has a non-zero nodeid in the lvb
+ */
+ lm_dlm_unlock_sync(lock, LM_ST_SHARED);
-+ lm_dlm_unhold_lvb(lock, lvb);
++ dlm_del_lvb(lp);
+ lm_dlm_put_lock(lock);
+ error = exist_val;
+ }
+ fail_unlock:
+ lm_dlm_unlock_sync(lock, LM_ST_SHARED);
+
-+ fail_unhold:
-+ lm_dlm_unhold_lvb(lock, lvb);
++ fail_lvb:
++ dlm_del_lvb(lp);
+
+ fail_put:
+ lm_dlm_put_lock(lock);
+ lm_dlm_unlock_sync(lock, LM_ST_EXCLUSIVE);
+
+ end:
-+ lm_dlm_unhold_lvb(lock, lp->lvb);
++ dlm_del_lvb(lp);
+ lm_dlm_put_lock(lock);
+}
+
+ if (error)
+ goto out;
+
-+ error = lm_dlm_hold_lvb(lock, &lvb);
++ lp = (dlm_lock_t *) lock;
++
++ error = dlm_add_lvb(lp);
+ if (error)
+ goto out_put;
+
-+ lp = (dlm_lock_t *) lock;
++ lvb = lp->lvb;
+ set_bit(LFL_INLOCK, &lp->flags);
+ set_bit(LFL_NOBAST, &lp->flags);
+
+ goto retry;
+ }
+ if (error)
-+ goto out_unhold;
++ goto out_lvb;
+
+ memcpy(&beval, lvb, sizeof(beval));
+ *val = be32_to_cpu(beval);
+
+ error = 0;
+
-+ out_unhold:
-+ lm_dlm_unhold_lvb(lock, lvb);
++ out_lvb:
++ dlm_del_lvb(lp);
+
+ out_put:
+ lm_dlm_put_lock(lock);
+ .finish = mg_finish
+};
diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock.c linux-patched/fs/gfs_locking/lock_dlm/lock.c
---- linux-orig/fs/gfs_locking/lock_dlm/lock.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/lock.c 2004-08-27 12:42:19.598199208 -0500
-@@ -0,0 +1,611 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/lock.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/lock.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,689 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+ clear_bit(LFL_WAIT_COMPLETE, &lp->flags);
+
-+ log_debug("qc %x,%"PRIx64" %d,%d id %x sts %d",
++ log_debug("qc %x,%"PRIx64" %d,%d id %x sts %d %x",
+ lp->lockname.ln_type, lp->lockname.ln_number,
-+ lp->cur, lp->req, lp->lksb.sb_lkid, lp->lksb.sb_status);
++ lp->cur, lp->req, lp->lksb.sb_lkid, lp->lksb.sb_status,
++ lp->lksb.sb_flags);
+
+ spin_lock(&dlm->async_lock);
+ list_add_tail(&lp->clist, &dlm->complete);
+ wake_up(&dlm->wait);
+}
+
-+static __inline__ void lock_ast(void *astargs)
++static __inline__ void lock_ast(void *astarg)
+{
-+ dlm_lock_t *lp = (dlm_lock_t *) astargs;
-+ queue_complete(lp);
++ queue_complete((dlm_lock_t *) astarg);
+}
+
-+static __inline__ void lock_bast(void *astargs, int mode)
++static __inline__ void lock_bast(void *astarg, int mode)
+{
-+ dlm_lock_t *lp = (dlm_lock_t *) astargs;
-+ queue_blocking(lp, mode);
++ queue_blocking((dlm_lock_t *) astarg, mode);
+}
+
+/*
+ lkf |= DLM_LKF_NOQUEUEBAST;
+ }
+
++ if (gfs_flags & LM_FLAG_PRIORITY) {
++ lkf |= DLM_LKF_NOORDER;
++ lkf |= DLM_LKF_HEADQUE;
++ }
++
+ if (lp->lksb.sb_lkid != 0) {
+ lkf |= DLM_LKF_CONVERT;
+
-+ if (gfs_flags & LM_FLAG_PRIORITY)
-+ lkf |= DLM_LKF_EXPEDITE;
-+ else if (req > cur)
-+ lkf |= DLM_LKF_QUECVT;
-+
+ /* Conversion deadlock avoidance by DLM */
+
+ if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
++ !(lkf & DLM_LKF_NOQUEUE) &&
+ cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
+ lkf |= DLM_LKF_CONVDEADLK;
+ }
+ lp->dlm = dlm;
+ lp->cur = DLM_LOCK_IV;
+ lp->lvb = NULL;
++ lp->hold_null = NULL;
+ init_completion(&lp->uast_wait);
+ *lpp = lp;
+ return 0;
+ if (lp->lvb)
+ lkf = DLM_LKF_VALBLK;
+
-+ log_debug("un %x,%"PRIx64" id %x cur %d %x", lp->lockname.ln_type,
++ log_debug("un %x,%"PRIx64" %x %d %x", lp->lockname.ln_type,
+ lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
+
-+ error = dlm_unlock(lp->dlm->gdlm_lsp, lp->lksb.sb_lkid, lkf, &lp->lksb,
-+ (void *) lp);
++ error = dlm_unlock(lp->dlm->gdlm_lsp, lp->lksb.sb_lkid, lkf, NULL, lp);
+
+ DLM_ASSERT(!error, printk("%s: error=%d num=%x,%"PRIx64"\n",
+ lp->dlm->fsname, error, lp->lockname.ln_type,
+{
+ dlm_lock_t *lp = (dlm_lock_t *) lock;
+
-+ if (lp->lvb) {
-+ check_cur_state(lp, cur_state);
-+ lp->req = DLM_LOCK_NL;
-+ lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
-+ do_dlm_lock(lp, NULL);
-+ } else {
-+ if (lp->cur == DLM_LOCK_IV)
-+ return 0;
-+ do_dlm_unlock(lp);
-+ }
++ if (lp->cur == DLM_LOCK_IV)
++ return 0;
++ do_dlm_unlock(lp);
+ return LM_OUT_ASYNC;
+}
+
+}
+
+/**
-+ * dlm_hold_lvb - hold on to a lock value block
-+ * @lock: the lock the LVB is associated with
-+ * @lvbp: return the lvb memory here
++ * hold_null_lock - add a NL lock to the resource
++ * @lp: represents the resource
++ *
++ * This can do a synchronous dlm request (requiring a lock_dlm thread to
++ * get the completion) because gfs won't call hold_lvb() during a
++ * callback (from the context of a lock_dlm thread).
+ *
+ * Returns: 0 on success, -EXXX on failure
+ */
+
-+int lm_dlm_hold_lvb(lm_lock_t *lock, char **lvbp)
++static int hold_null_lock(dlm_lock_t *lp)
++{
++ dlm_lock_t *lpn;
++ int error;
++
++ if (lp->hold_null) {
++ printk("lock_dlm: lvb already held\n");
++ return 0;
++ }
++
++ error = create_lp(lp->dlm, &lp->lockname, &lpn);
++ if (error)
++ return error;
++
++ lpn->req = DLM_LOCK_NL;
++ set_bit(LFL_NOBAST, &lpn->flags);
++ set_bit(LFL_INLOCK, &lpn->flags);
++
++ error = do_dlm_lock_sync(lpn, NULL);
++ if (error) {
++ delete_lp(lpn);
++ lpn = NULL;
++ }
++
++ lp->hold_null = lpn;
++ return error;
++}
++
++/**
++ * unhold_null_lock - remove the NL lock from the resource
++ * @lp: represents the resource
++ *
++ * This cannot do a synchronous dlm request (requiring a lock_dlm thread to
++ * get the completion) because gfs may call unhold_lvb() during a
++ * callback (from the context of a lock_dlm thread) which could cause a
++ * deadlock since the other lock_dlm thread could be engaged in recovery.
++ *
++ * Returns: 0 on success, -EXXX on failure
++ */
++
++static void unhold_null_lock(dlm_lock_t *lp)
++{
++ dlm_lock_t *lpn = lp->hold_null;
++ set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
++ do_dlm_unlock(lpn);
++ lp->hold_null = NULL;
++}
++
++int dlm_add_lvb(dlm_lock_t *lp)
+{
-+ dlm_lock_t *lp = (dlm_lock_t *) lock;
+ char *lvb;
+
+ lvb = kmalloc(DLM_LVB_SIZE, GFP_KERNEL);
+
+ lp->lksb.sb_lvbptr = lvb;
+ lp->lvb = lvb;
-+ *lvbp = lvb;
-+
+ return 0;
+}
+
+/**
++ * dlm_hold_lvb - hold on to a lock value block
++ * @lock: the lock the LVB is associated with
++ * @lvbp: return the lvb memory here
++ *
++ * Returns: 0 on success, -EXXX on failure
++ */
++
++int lm_dlm_hold_lvb(lm_lock_t *lock, char **lvbp)
++{
++ dlm_lock_t *lp = (dlm_lock_t *) lock;
++ int error;
++
++ error = dlm_add_lvb(lp);
++ if (error)
++ return error;
++
++ *lvbp = lp->lvb;
++
++ /* Acquire a NL lock because gfs requires the value block to remain
++ intact on the resource while the lvb is "held" even if it's holding
++ no locks on the resource. */
++
++ error = hold_null_lock(lp);
++ if (error) {
++ kfree(lp->lvb);
++ lp->lvb = NULL;
++ lp->lksb.sb_lvbptr = NULL;
++ }
++
++ return error;
++}
++
++void dlm_del_lvb(dlm_lock_t *lp)
++{
++ kfree(lp->lvb);
++ lp->lvb = NULL;
++ lp->lksb.sb_lvbptr = NULL;
++}
++
++/**
+ * dlm_unhold_lvb - release a LVB
+ * @lock: the lock the LVB is associated with
+ * @lvb: the lock value block
+{
+ dlm_lock_t *lp = (dlm_lock_t *) lock;
+
-+ if (lp->cur == DLM_LOCK_NL)
-+ do_dlm_unlock_sync(lp);
-+ kfree(lvb);
-+ lp->lvb = NULL;
-+ lp->lksb.sb_lvbptr = NULL;
++ unhold_null_lock(lp);
++ dlm_del_lvb(lp);
+}
+
+/**
+ do_dlm_lock(lp, r);
+}
diff -urN linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h
---- linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h 2004-08-27 12:42:19.598199208 -0500
-@@ -0,0 +1,358 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/lock_dlm.h 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/lock_dlm.h 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,374 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/socket.h>
++#include <linux/kthread.h>
+#include <net/sock.h>
+#include <linux/lm_interface.h>
+#include <cluster/cnxman.h>
+
+#define LOCK_DLM_MAX_NODES (128)
+
-+#define DROP_LOCKS_COUNT (10000)
-+#define DROP_LOCKS_TIME (60)
++#define DROP_LOCKS_COUNT (50000)
++#define DROP_LOCKS_PERIOD (60)
++#define SHRINK_CACHE_COUNT (100)
++#define SHRINK_CACHE_MAX (1000)
++#define SHRINK_CACHE_TIME (30)
+
+struct dlm;
+struct dlm_lock;
+typedef struct strname strname_t;
+
+#define DFL_FIRST_MOUNT 0
-+#define DFL_THREAD_STOP 1
-+#define DFL_GOT_NODEID 2
-+#define DFL_MG_FINISH 3
-+#define DFL_HAVE_JID 4
-+#define DFL_BLOCK_LOCKS 5
-+#define DFL_START_ERROR 6
-+#define DFL_MOUNT 7
-+#define DFL_UMOUNT 8
-+#define DFL_NEED_STARTDONE 9
-+#define DFL_RECOVER 10
++#define DFL_GOT_NODEID 1
++#define DFL_MG_FINISH 2
++#define DFL_HAVE_JID 3
++#define DFL_BLOCK_LOCKS 4
++#define DFL_START_ERROR 5
++#define DFL_MOUNT 6
++#define DFL_UMOUNT 7
++#define DFL_NEED_STARTDONE 8
++#define DFL_RECOVER 9
+
+struct dlm {
+ uint32_t jid;
+ struct list_head starts;
+
+ wait_queue_head_t wait;
-+ atomic_t threads;
++ struct task_struct * thread1;
++ struct task_struct * thread2;
+ atomic_t lock_count;
+ unsigned long drop_time;
++ unsigned long shrink_time;
++
++ int drop_locks_count;
++ int drop_locks_period;
+
+ int mg_local_id;
+ int mg_last_start;
+
+ struct list_head resources;
+ struct semaphore res_lock;
++ struct list_head null_cache;
++ spinlock_t null_cache_spin;
++ uint32_t null_count;
+};
+
+struct dlm_resource {
+ dlm_lock_t * update;
+ struct list_head async_locks;
+ spinlock_t async_spin;
++ wait_queue_head_t waiters;
+};
+
+struct posix_lock {
+#define LFL_CANCEL 13
+#define LFL_UNLOCK_SYNC 14
+#define LFL_NOBAST 15
++#define LFL_HEADQUE 16
++#define LFL_UNLOCK_DELETE 17
+
+struct dlm_lock {
+ dlm_t * dlm;
+ struct list_head dlist; /* delayed */
+ struct list_head slist; /* submit */
+
++ struct dlm_lock * hold_null; /* NL lock for hold_lvb */
+ struct posix_lock * posix;
++ struct list_head null_list; /* NL lock cache for plocks */
+};
+
+#define NFL_SENT_CB 0
+void process_submit(dlm_lock_t *lp);
+int create_lp(dlm_t *dlm, struct lm_lockname *name, dlm_lock_t **lpp);
+void delete_lp(dlm_lock_t *lp);
++int dlm_add_lvb(dlm_lock_t *lp);
++void dlm_del_lvb(dlm_lock_t *lp);
+
+int lm_dlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+ lm_lock_t **lockp);
+
+/* plock.c */
+
++int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl);
+int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, int wait, int ex, uint64_t start,
-+ uint64_t end);
-+
++ struct file *file, int cmd, struct file_lock *fl);
+int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, uint64_t start, uint64_t end);
-+
-+int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, uint64_t *start, uint64_t *end,
-+ int *ex, unsigned long *rowner);
++ struct file *file, struct file_lock *fl);
++void clear_null_cache(dlm_t *dlm);
++void shrink_null_cache(dlm_t *dlm);
+
+/* main.c */
+
+
+#endif
diff -urN linux-orig/fs/gfs_locking/lock_dlm/main.c linux-patched/fs/gfs_locking/lock_dlm/main.c
---- linux-orig/fs/gfs_locking/lock_dlm/main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/main.c 2004-08-27 12:42:19.598199208 -0500
-@@ -0,0 +1,194 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/main.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/main.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,302 @@
+/******************************************************************************
+*******************************************************************************
+**
+
+#if defined(LOCK_DLM_DEBUG)
+#define LOCK_DLM_DEBUG_SIZE (4096)
-+#define MAX_DEBUG_MSG_LEN (64)
++#define MAX_DEBUG_MSG_LEN (80)
+#else
+#define LOCK_DLM_DEBUG_SIZE (0)
+#define MAX_DEBUG_MSG_LEN (0)
+#endif
++#define MAX_PROC_STRING (16)
++
++int lock_dlm_drop_count;
++int lock_dlm_drop_period;
+
+static char * debug_buf;
+static unsigned int debug_size;
+static unsigned int debug_point;
+static int debug_wrap;
+static spinlock_t debug_lock;
-+static struct proc_dir_entry * debug_proc_entry = NULL;
++static struct proc_dir_entry * proc_dir = NULL;
++static char proc_str[MAX_PROC_STRING + 1];
+
+
+void lock_dlm_debug_log(const char *fmt, ...)
+
+ n = 0;
+ /* n = snprintf(buf, size, "%s ", dlm->fsname); */
++ n = snprintf(buf, size, "%u ", current->pid);
+ size -= n;
+
+ va_start(va, fmt);
+EXPORT_SYMBOL(lock_dlm_debug_dump);
+
+#ifdef CONFIG_PROC_FS
-+int lock_dlm_debug_info(char *b, char **start, off_t offset, int length)
++static int debug_info(char *b, char **start, off_t offset, int length)
+{
+ int i, n = 0;
+
+
+ return n;
+}
++
++static int drop_count_info(char *b, char **start, off_t offset, int length)
++{
++ return sprintf(b, "%d\n", lock_dlm_drop_count);
++}
++
++static int drop_period_info(char *b, char **start, off_t offset, int length)
++{
++ return sprintf(b, "%d\n", lock_dlm_drop_period);
++}
++
++static int copy_string(const char *buffer, unsigned long count)
++{
++ int len;
++
++ if (count > MAX_PROC_STRING)
++ len = MAX_PROC_STRING;
++ else
++ len = count;
++
++ if (copy_from_user(proc_str, buffer, len))
++ return -EFAULT;
++ proc_str[len] = '\0';
++ return len;
++}
++
++static int drop_count_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ int rv = copy_string(buffer, count);
++ if (rv < 0)
++ return rv;
++ lock_dlm_drop_count = (int) simple_strtol(proc_str, NULL, 0);
++ return rv;
++}
++
++static int drop_period_write(struct file *file, const char *buffer,
++ unsigned long count, void *data)
++{
++ int rv = copy_string(buffer, count);
++ if (rv < 0)
++ return rv;
++ lock_dlm_drop_period = (int) simple_strtol(proc_str, NULL, 0);
++ return rv;
++}
++
++static void create_proc_entries(void)
++{
++ struct proc_dir_entry *p, *debug, *drop_count, *drop_period;
++
++ debug = drop_count = drop_period = NULL;
++
++ proc_dir = proc_mkdir("cluster/lock_dlm", 0);
++ if (!proc_dir)
++ return;
++ proc_dir->owner = THIS_MODULE;
++
++ p = create_proc_entry("debug", 0444, proc_dir);
++ if (!p)
++ goto out;
++ p->get_info = debug_info;
++ p->owner = THIS_MODULE;
++ debug = p;
++
++ p = create_proc_entry("drop_count", 0666, proc_dir);
++ if (!p)
++ goto out;
++ p->owner = THIS_MODULE;
++ p->get_info = drop_count_info;
++ p->write_proc = drop_count_write;
++ drop_count = p;
++
++ p = create_proc_entry("drop_period", 0666, proc_dir);
++ if (!p)
++ goto out;
++ p->owner = THIS_MODULE;
++ p->get_info = drop_period_info;
++ p->write_proc = drop_period_write;
++ drop_period = p;
++
++ return;
++
++ out:
++ if (drop_period)
++ remove_proc_entry("drop_period", proc_dir);
++ if (drop_count)
++ remove_proc_entry("drop_count", proc_dir);
++ if (debug)
++ remove_proc_entry("debug", proc_dir);
++
++ remove_proc_entry("cluster/lock_dlm", NULL);
++ proc_dir = NULL;
++}
++
++static void remove_proc_entries(void)
++{
++ if (proc_dir) {
++ remove_proc_entry("debug", proc_dir);
++ remove_proc_entry("drop_period", proc_dir);
++ remove_proc_entry("drop_count", proc_dir);
++ remove_proc_entry("cluster/lock_dlm", NULL);
++ proc_dir = NULL;
++ }
++}
+#endif
+
+/**
+ return error;
+ }
+
++ lock_dlm_drop_count = DROP_LOCKS_COUNT;
++ lock_dlm_drop_period = DROP_LOCKS_PERIOD;
++
+#ifdef CONFIG_PROC_FS
-+ debug_proc_entry = create_proc_entry("cluster/lock_dlm_debug", S_IRUGO,
-+ NULL);
-+ if (debug_proc_entry)
-+ debug_proc_entry->get_info = &lock_dlm_debug_info;
++ create_proc_entries();
+#endif
+ debug_init();
+
+void __exit exit_lock_dlm(void)
+{
+ lm_unregister_proto(&lock_dlm_ops);
-+
+#ifdef CONFIG_PROC_FS
-+ if (debug_proc_entry)
-+ remove_proc_entry("cluster/lock_dlm_debug", NULL);
++ remove_proc_entries();
+#endif
+ debug_setup(0);
+}
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
diff -urN linux-orig/fs/gfs_locking/lock_dlm/mount.c linux-patched/fs/gfs_locking/lock_dlm/mount.c
---- linux-orig/fs/gfs_locking/lock_dlm/mount.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/mount.c 2004-08-27 12:42:19.599198976 -0500
-@@ -0,0 +1,337 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/mount.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/mount.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,352 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <cluster/cnxman.h>
+#include <cluster/service.h>
+
++extern int lock_dlm_drop_count;
++extern int lock_dlm_drop_period;
++
++
+static int init_cman(dlm_t *dlm)
+{
+ int error = -1;
+
+ memset(dlm, 0, sizeof(dlm_t));
+
++ dlm->drop_locks_count = lock_dlm_drop_count;
++ dlm->drop_locks_period = lock_dlm_drop_period;
++
+ dlm->fscb = cb;
+ dlm->fsdata = fsdata;
+
+ INIT_LIST_HEAD(&dlm->submit);
+ INIT_LIST_HEAD(&dlm->starts);
+ INIT_LIST_HEAD(&dlm->resources);
++ INIT_LIST_HEAD(&dlm->null_cache);
+
+ init_waitqueue_head(&dlm->wait);
++ dlm->thread1 = NULL;
++ dlm->thread2 = NULL;
+ atomic_set(&dlm->lock_count, 0);
+ dlm->drop_time = jiffies;
++ dlm->shrink_time = jiffies;
+
+ INIT_LIST_HEAD(&dlm->mg_nodes);
+ init_MUTEX(&dlm->mg_nodes_lock);
+ init_MUTEX(&dlm->res_lock);
+
++ dlm->null_count = 0;
++ spin_lock_init(&dlm->null_cache_spin);
++
+ return dlm;
+}
+
+ release_gdlm(dlm);
+ release_fence(dlm);
+ release_cluster(dlm);
++ clear_null_cache(dlm);
+ kfree(dlm);
+}
+
+ lm_owner:THIS_MODULE,
+};
diff -urN linux-orig/fs/gfs_locking/lock_dlm/plock.c linux-patched/fs/gfs_locking/lock_dlm/plock.c
---- linux-orig/fs/gfs_locking/lock_dlm/plock.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/plock.c 2004-08-27 12:42:19.599198976 -0500
-@@ -0,0 +1,1034 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/plock.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/plock.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,1238 @@
+/******************************************************************************
+*******************************************************************************
+**
+******************************************************************************/
+
+#include "lock_dlm.h"
++#include <linux/fcntl.h>
+
+#define MIN(a,b) ((a) <= (b)) ? (a) : (b)
+#define MAX(a,b) ((a) >= (b)) ? (a) : (b)
+#define NO_WAIT 0
+#define X_WAIT -1
+
-+#define EX 1
++#define EX 1
+#define NO_EX 0
+#define SH NO_EX
+
++#define HEAD 1
+
-+static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
++static int local_conflict(dlm_t *dlm, struct dlm_resource *r,
+ struct lm_lockname *name, unsigned long owner,
+ uint64_t start, uint64_t end, int ex);
+
++static int global_conflict(dlm_t *dlm, struct lm_lockname *name,
++ unsigned long owner, uint64_t start, uint64_t end,
++ int ex);
++
++/* remove lru lp from end of list, null_cache_spin must be held */
++
++static dlm_lock_t *lru_null(dlm_t *dlm)
++{
++ dlm_lock_t *lp;
++
++ lp = list_entry(dlm->null_cache.next, dlm_lock_t, null_list);
++ list_del(&lp->null_list);
++ dlm->null_count--;
++
++ return lp;
++}
++
++/* It's important that the lock_dlm thread not block doing any synchronous
++ dlm operations because a recovery event (which makes sync requests) can
++ happen during this. If both lock_dlm threads do sync requests they deadlock
++ since one is required to process asts. We also break out early if there's a
++ recovery so it doesn't have to wait. */
++
++void shrink_null_cache(dlm_t *dlm)
++{
++ dlm_lock_t *lp;
++
++ while (1) {
++ spin_lock(&dlm->null_cache_spin);
++ if (dlm->null_count <= SHRINK_CACHE_COUNT ||
++ test_bit(DFL_RECOVER, &dlm->flags)) {
++ spin_unlock(&dlm->null_cache_spin);
++ break;
++ }
++
++ lp = lru_null(dlm);
++ spin_unlock(&dlm->null_cache_spin);
++
++ set_bit(LFL_UNLOCK_DELETE, &lp->flags);
++ do_dlm_unlock(lp);
++ delete_lp(lp);
++ schedule();
++ }
++}
++
++void clear_null_cache(dlm_t *dlm)
++{
++ dlm_lock_t *lp, *safe;
++
++ spin_lock(&dlm->null_cache_spin);
++ list_for_each_entry_safe(lp, safe, &dlm->null_cache, null_list) {
++ list_del(&lp->null_list);
++ dlm->null_count--;
++ delete_lp(lp);
++ }
++ spin_unlock(&dlm->null_cache_spin);
++}
++
++static void keep_null_lock(dlm_t *dlm, dlm_lock_t *lp)
++{
++ dlm_lock_t *lp2 = NULL;
++
++ spin_lock(&dlm->null_cache_spin);
++ /* add to front of list wrt list_add_tail/list_for_each */
++ list_add_tail(&lp->null_list, &dlm->null_cache);
++ dlm->null_count++;
++
++ /* help to shrink cache if too many null locks are piling up */
++ if (dlm->null_count > SHRINK_CACHE_MAX)
++ lp2 = lru_null(dlm);
++ spin_unlock(&dlm->null_cache_spin);
++
++ if (lp2) {
++ set_bit(LFL_UNLOCK_DELETE, &lp2->flags);
++ do_dlm_unlock(lp2);
++ delete_lp(lp2);
++ }
++}
++
++static dlm_lock_t *find_null_lock(dlm_t *dlm, struct lm_lockname *name)
++{
++ dlm_lock_t *lp;
++ int found = FALSE;
++
++ spin_lock(&dlm->null_cache_spin);
++ list_for_each_entry(lp, &dlm->null_cache, null_list) {
++ if (lm_name_equal(&lp->lockname, name)) {
++ list_del(&lp->null_list);
++ dlm->null_count--;
++ found = TRUE;
++ break;
++ }
++ }
++ spin_unlock(&dlm->null_cache_spin);
++
++ if (!found)
++ lp = NULL;
++ return lp;
++}
+
+static int lock_resource(struct dlm_resource *r)
+{
+ name.ln_type = LM_TYPE_PLOCK_UPDATE;
+ name.ln_number = r->name.ln_number;
+
-+ error = create_lp(r->dlm, &name, &lp);
-+ if (error)
-+ return error;
++ lp = find_null_lock(r->dlm, &name);
++ if (!lp) {
++ error = create_lp(r->dlm, &name, &lp);
++ if (error)
++ return error;
++ set_bit(LFL_NOBAST, &lp->flags);
++ set_bit(LFL_INLOCK, &lp->flags);
++ } else
++ lp->lkf = DLM_LKF_CONVERT;
+
-+ set_bit(LFL_NOBAST, &lp->flags);
-+ set_bit(LFL_INLOCK, &lp->flags);
+ lp->req = DLM_LOCK_EX;
+ error = do_dlm_lock_sync(lp, NULL);
+ if (error) {
+
+static void unlock_resource(struct dlm_resource *r)
+{
-+ do_dlm_unlock_sync(r->update);
-+ delete_lp(r->update);
++ dlm_lock_t *lp = r->update;
++
++ set_bit(LFL_NOBAST, &lp->flags);
++ set_bit(LFL_INLOCK, &lp->flags);
++ lp->req = DLM_LOCK_NL;
++ lp->lkf = DLM_LKF_CONVERT;
++ do_dlm_lock_sync(lp, NULL);
++ keep_null_lock(r->dlm, lp);
++ r->update = NULL;
+}
+
+static struct dlm_resource *search_resource(dlm_t *dlm, struct lm_lockname *name)
+ INIT_LIST_HEAD(&r->async_locks);
+ init_MUTEX(&r->sema);
+ spin_lock_init(&r->async_spin);
++ init_waitqueue_head(&r->waiters);
+
+ down(&dlm->res_lock);
+ r2 = search_resource(dlm, name);
+
+static unsigned int make_flags_posix(dlm_lock_t *lp, int wait)
+{
-+ unsigned int lkf = 0;
++ unsigned int lkf = DLM_LKF_NOORDER;
++
++ if (test_and_clear_bit(LFL_HEADQUE, &lp->flags))
++ lkf |= DLM_LKF_HEADQUE;
+
+ if (wait == NO_WAIT || wait == X_WAIT)
+ lkf |= DLM_LKF_NOQUEUE;
+
-+ if (lp->lksb.sb_lkid != 0) {
++ if (lp->lksb.sb_lkid != 0)
+ lkf |= DLM_LKF_CONVERT;
-+ if (wait == WAIT)
-+ lkf |= DLM_LKF_EXPEDITE;
-+ }
++
+ return lkf;
+}
+
+
+static void request_lock(dlm_lock_t *lp, int wait)
+{
-+ log_debug("req %x,%"PRIx64" %s %"PRIx64"-%"PRIx64" %u w %u",
-+ lp->lockname.ln_type, lp->lockname.ln_number,
-+ lp->posix->ex ? "ex" : "sh", lp->posix->start,
-+ lp->posix->end, current->pid, wait);
-+
+ set_bit(LFL_INLOCK, &lp->flags);
+ lp->req = lp->posix->ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+ lp->lkf = make_flags_posix(lp, wait);
+
++ log_debug("req %x,%"PRIx64" %s %"PRIx64"-%"PRIx64" lkf %x wait %u",
++ lp->lockname.ln_type, lp->lockname.ln_number,
++ lp->posix->ex ? "ex" : "sh", lp->posix->start,
++ lp->posix->end, lp->lkf, wait);
++
+ do_range_lock(lp);
+}
+
+}
+
+static void add_lock(struct dlm_resource *r, unsigned long owner, int wait,
-+ int ex, uint64_t start, uint64_t end)
++ int ex, uint64_t start, uint64_t end, int head)
+{
+ dlm_lock_t *lp;
+ int error;
+
+ error = create_lock(r, owner, ex, start, end, &lp);
+ DLM_ASSERT(!error, );
++ if (head == HEAD)
++ set_bit(LFL_HEADQUE, &lp->flags);
+
+ hold_resource(r);
+ update_lock(lp, wait);
+{
+ struct dlm_resource *r = lp->posix->resource;
+
-+ log_debug("remove %x,%"PRIx64" %u",
-+ r->name.ln_type, r->name.ln_number, current->pid);
++ log_debug("remove %x,%"PRIx64"", r->name.ln_type, r->name.ln_number);
+
+ do_dlm_unlock_sync(lp);
+ put_lock(lp);
+ po->ex = ex;
+
+ if (ex) {
-+ add_lock(r, owner, X_WAIT, SH, start2, end2);
++ add_lock(r, owner, X_WAIT, SH, start2, end2, HEAD);
+ update_lock(po->lp, wait);
+ } else {
-+ add_lock(r, owner, WAIT, EX, start2, end2);
++ add_lock(r, owner, WAIT, EX, start2, end2, HEAD);
+ update_lock(po->lp, X_WAIT);
+ }
+ return 0;
+ uint64_t end)
+{
+ if (ex) {
-+ add_lock(r, owner, X_WAIT, SH, po->start, start-1);
-+ add_lock(r, owner, X_WAIT, SH, end+1, po->end);
++ add_lock(r, owner, X_WAIT, SH, po->start, start-1, HEAD);
++ add_lock(r, owner, X_WAIT, SH, end+1, po->end, HEAD);
+
+ po->start = start;
+ po->end = end;
+
+ update_lock(po->lp, wait);
+ } else {
-+ add_lock(r, owner, WAIT, EX, po->start, start-1);
-+ add_lock(r, owner, WAIT, EX, end+1, po->end);
++ add_lock(r, owner, WAIT, EX, po->start, start-1, HEAD);
++ add_lock(r, owner, WAIT, EX, end+1, po->end, HEAD);
+
+ po->start = start;
+ po->end = end;
+ if (next_exist(exist, &exist_start, &exist_end))
+ break;
+ if (start < exist_start)
-+ add_lock(r, owner, wait, ex, start, exist_start-1);
++ add_lock(r, owner, wait, ex, start, exist_start-1, 0);
+ start = exist_end + 1;
+ }
+
+ /* cover gap after last existing lock */
+ if (exist_end < end)
-+ add_lock(r, owner, wait, ex, exist_end+1, end);
++ add_lock(r, owner, wait, ex, exist_end+1, end, 0);
+
+ return 0;
+}
+ /* 1. add a shared lock in the non-overlap range
+ 2. convert RE to overlap range and requested mode */
+
-+ add_lock(r, owner, X_WAIT, SH, frag_start, frag_end);
++ add_lock(r, owner, X_WAIT, SH, frag_start, frag_end, HEAD);
+
+ opo->start = over_start;
+ opo->end = over_end;
+ 2. convert RE to non-overlap range
+ 3. wait for shared lock to complete */
+
-+ add_lock(r, owner, WAIT, SH, over_start, over_end);
++ add_lock(r, owner, WAIT, SH, over_start, over_end, HEAD);
+
+ opo->start = frag_start;
+ opo->end = frag_end;
+ else if (!list_empty(&exist))
+ error = lock_case3(&exist, r, owner, wait, ex, start, end);
+ else
-+ add_lock(r, owner, wait, ex, start, end);
++ add_lock(r, owner, wait, ex, start, end, 0);
+
+ out:
+ return error;
+ * fragment, and add a new lock for back fragment */
+
+ add_lock(r, owner, po->ex ? WAIT : X_WAIT, po->ex,
-+ end+1, po->end);
++ end+1, po->end, HEAD);
+
+ po->end = start - 1;
+ update_lock(po->lp, X_WAIT);
+ return error;
+}
+
++static int wait_local(struct dlm_resource *r, unsigned long owner,
++ uint64_t start, uint64_t end, int ex)
++{
++ DECLARE_WAITQUEUE(wait, current);
++ int error = 0;
++
++ add_wait_queue(&r->waiters, &wait);
++
++ for (;;) {
++ set_current_state(TASK_INTERRUPTIBLE);
++
++ if (!local_conflict(r->dlm, r, &r->name, owner, start, end, ex))
++ break;
++
++ if (signal_pending(current)) {
++ up(&r->sema);
++ error = -EINTR;
++ break;
++ }
++
++ up(&r->sema);
++ schedule();
++ down(&r->sema);
++ }
++
++ remove_wait_queue(&r->waiters, &wait);
++ set_current_state(TASK_RUNNING);
++ return error;
++}
++
+int lm_dlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, int wait, int ex, uint64_t start,
-+ uint64_t end)
++ struct file *file, int cmd, struct file_lock *fl)
+{
+ dlm_t *dlm = (dlm_t *) lockspace;
++ unsigned long owner = (unsigned long) fl->fl_owner;
++ int wait = IS_SETLKW(cmd);
++ int ex = (fl->fl_type == F_WRLCK);
++ uint64_t start = fl->fl_start, end = fl->fl_end;
+ struct dlm_resource *r;
+ int error;
+
-+ log_debug("en plock %u %x,%"PRIx64"", current->pid,
-+ name->ln_type, name->ln_number);
++ log_debug("en plock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+ error = get_resource(dlm, name, CREATE, &r);
+ if (error)
+ goto out;
+
-+#if 0
-+ /* Wait, without holding any locks, until this plock request is not
-+ blocked by plocks of *other* *local* processes. Then, none of the
-+ dlm requests below will wait on a lock from a local process.
-+
-+ This should not be necessary since we wait for completion after
-+ up(). This means a local process p1 can unlock lkb X while local p2
-+ is waiting for X (in wait_async_list). */
-+ error = wait_local(r, owner, wait, ex, start, end);
++ error = down_interruptible(&r->sema);
+ if (error)
+ goto out_put;
-+#endif
+
-+ down(&r->sema);
++ /* We wait here until we aren't blocked by any other local locks.
++ Then we can request the lock from the dlm, request the vfs lock
++ (without it blocking) and release r->sema before waiting on the dlm
++ request. [We can't release the semaphore between the dlm and vfs
++ requests, but we must release it before waiting for the ast.] */
++
++ error = local_conflict(dlm, r, name, owner, start, end, ex);
++ if (error) {
++ if (!wait) {
++ error = -EAGAIN;
++ goto out_up;
++ }
++ error = wait_local(r, owner, start, end, ex);
++ if (error)
++ goto out_put;
++ /* wait_local returns with r->sema held if no error */
++ }
++
+ error = lock_resource(r);
+ if (error)
+ goto out_up;
+
-+ /* check_conflict() checks for conflicts with plocks from other local
-+ processes and other nodes. */
-+
-+ if (!wait && check_conflict(dlm, r, name, owner, start, end, ex)) {
-+ error = -1;
++ if (!wait && global_conflict(dlm, name, owner, start, end, ex)) {
++ error = -EAGAIN;
+ unlock_resource(r);
+ goto out_up;
+ }
+ /* If NO_WAIT all requests should return immediately.
+ If WAIT all requests go on r->async_locks which we wait on in
+ wait_async_locks(). This means DLM should not return -EAGAIN and we
-+ should never block waiting for a plock to be released (by a local or
-+ remote process) until we call wait_async_list(). */
++ should never block waiting for a plock to be released until we call
++ wait_async_list(). */
+
+ error = plock_internal(r, owner, wait, ex, start, end);
+ unlock_resource(r);
+
-+ /* wait_async_list() must follow the up() because we must be able
-+ to punlock a range on this resource while there's a blocked plock
-+ request to prevent deadlock between nodes (and processes). */
++ if (!error) {
++ /* this won't block due to wait_local above and not yet
++ having released r->sema */
++ if (posix_lock_file_wait(file, fl) < 0)
++ log_error("lm_dlm_plock: vfs lock error %x,%"PRIx64"",
++ name->ln_type, name->ln_number);
++ }
+
+ out_up:
+ up(&r->sema);
+ wait_async_list(r, owner);
++ wake_up_all(&r->waiters);
++ out_put:
+ put_resource(r);
+ out:
-+ log_debug("ex plock %u error %d", current->pid, error);
++ log_debug("ex plock %d", error);
+ return error;
+}
+
+int lm_dlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, uint64_t start, uint64_t end)
++ struct file *file, struct file_lock *fl)
+{
+ dlm_t *dlm = (dlm_t *) lockspace;
++ unsigned long owner = (unsigned long) fl->fl_owner;
++ uint64_t start = fl->fl_start, end = fl->fl_end;
+ struct dlm_resource *r;
+ int error;
+
-+ log_debug("en punlock %u %x,%"PRIx64"", current->pid,
-+ name->ln_type, name->ln_number);
++ log_debug("en punlock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+ error = get_resource(dlm, name, NO_CREATE, &r);
+ if (error)
+ goto out;
+
+ down(&r->sema);
++
++ if (posix_lock_file_wait(file, fl) < 0)
++ log_error("lm_dlm_punlock: vfs unlock error %x,%"PRIx64"",
++ name->ln_type, name->ln_number);
++
+ error = lock_resource(r);
+ if (error)
+ goto out_up;
+ out_up:
+ up(&r->sema);
+ wait_async_list(r, owner);
++ wake_up_all(&r->waiters);
+ put_resource(r);
+ out:
-+ log_debug("ex punlock %u error %d", current->pid, error);
++ log_debug("ex punlock %d", error);
+ return error;
+}
+
+ complete(&lp->uast_wait);
+}
+
-+static int get_conflict_global(dlm_t *dlm, struct lm_lockname *name,
++static int get_global_conflict(dlm_t *dlm, struct lm_lockname *name,
+ unsigned long owner, uint64_t *start,
+ uint64_t *end, int *ex, unsigned long *rowner)
+{
+ struct dlm_lockinfo *lki;
+ int query = 0, s, error;
+
-+ /* acquire a null lock on which base the query */
++ /* acquire a null lock on which to base the query */
+
-+ error = create_lp(dlm, name, &lp);
-+ if (error)
-+ goto ret;
++ lp = find_null_lock(dlm, name);
++ if (!lp) {
++ error = create_lp(dlm, name, &lp);
++ if (error)
++ goto ret;
+
-+ lp->req = DLM_LOCK_NL;
-+ set_bit(LFL_INLOCK, &lp->flags);
-+ do_dlm_lock_sync(lp, NULL);
++ lp->req = DLM_LOCK_NL;
++ lp->lkf = DLM_LKF_EXPEDITE;
++ set_bit(LFL_INLOCK, &lp->flags);
++ do_dlm_lock_sync(lp, NULL);
++ }
+
+ /* do query, repeating if insufficient space */
+
+
+ /* check query results for blocking locks */
+
++ error = 0;
++
+ for (s = 0; s < qinfo.gqi_lockcount; s++) {
+
+ lki = &qinfo.gqi_lockinfo[s];
+
+ kfree(qinfo.gqi_lockinfo);
+
++ log_debug("global conflict %d %"PRIx64"-%"PRIx64" ex %d own %lu",
++ error, *start, *end, *ex, *rowner);
+ out:
-+ do_dlm_unlock_sync(lp);
-+ kfree(lp);
++ keep_null_lock(dlm, lp);
+ ret:
+ return error;
+}
+
-+static int get_conflict_local(dlm_t *dlm, struct dlm_resource *r,
++static int get_local_conflict(dlm_t *dlm, struct dlm_resource *r,
+ struct lm_lockname *name, unsigned long owner,
+ uint64_t *start, uint64_t *end, int *ex,
+ unsigned long *rowner)
+ return found;
+}
+
-+int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
-+ unsigned long owner, uint64_t *start, uint64_t *end,
-+ int *ex, unsigned long *rowner)
++static int do_plock_get(dlm_t *dlm, struct lm_lockname *name,
++ unsigned long owner, uint64_t *start, uint64_t *end,
++ int *ex, unsigned long *rowner)
+{
-+ dlm_t *dlm = (dlm_t *) lockspace;
+ struct dlm_resource *r;
+ int error, found;
+
+ error = get_resource(dlm, name, NO_CREATE, &r);
+ if (!error) {
-+ down(&r->sema);
-+ found = get_conflict_local(dlm, r, name, owner, start, end, ex,
++ error = down_interruptible(&r->sema);
++ if (error) {
++ put_resource(r);
++ goto out;
++ }
++
++ found = get_local_conflict(dlm, r, name, owner, start, end, ex,
+ rowner);
+ up(&r->sema);
+ put_resource(r);
+ goto out;
+ }
+
-+ error = get_conflict_global(dlm, name, owner, start, end, ex, rowner);
++ error = get_global_conflict(dlm, name, owner, start, end, ex, rowner);
++ if (error == -EAGAIN) {
++ log_debug("pl get global conflict %"PRIx64"-%"PRIx64" %d %lu",
++ *start, *end, *ex, *rowner);
++ error = 1;
++ }
+ out:
+ return error;
+}
+
-+static int check_conflict(dlm_t *dlm, struct dlm_resource *r,
++static int local_conflict(dlm_t *dlm, struct dlm_resource *r,
+ struct lm_lockname *name, unsigned long owner,
+ uint64_t start, uint64_t end, int ex)
+{
+ uint64_t get_start = start, get_end = end;
+ unsigned long get_owner = 0;
-+ int get_ex = ex, error;
++ int get_ex = ex;
+
-+ error = get_conflict_local(dlm, r, name, owner,
-+ &get_start, &get_end, &get_ex, &get_owner);
-+ if (error)
-+ goto out;
++ return get_local_conflict(dlm, r, name, owner,
++ &get_start, &get_end, &get_ex, &get_owner);
++}
+
-+ error = get_conflict_global(dlm, name, owner,
-+ &get_start, &get_end, &get_ex, &get_owner);
-+ out:
-+ log_debug("check_conflict %d %"PRIx64"-%"PRIx64" %"PRIx64"-%"PRIx64" "
-+ "ex %d %d own %lu %lu pid %u", error, start, end,
-+ get_start, get_end, ex, get_ex, owner, get_owner,
-+ current->pid);
-+ return error;
++static int global_conflict(dlm_t *dlm, struct lm_lockname *name,
++ unsigned long owner, uint64_t start, uint64_t end,
++ int ex)
++{
++ uint64_t get_start = start, get_end = end;
++ unsigned long get_owner = 0;
++ int get_ex = ex;
++
++ return get_global_conflict(dlm, name, owner,
++ &get_start, &get_end, &get_ex, &get_owner);
+}
+
++int lm_dlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl)
++{
++ dlm_t *dlm = (dlm_t *) lockspace;
++ unsigned long pid;
++ int ex, error;
++
++ ex = (fl->fl_type == F_WRLCK) ? 1 : 0;
++
++ error = do_plock_get(dlm, name, fl->fl_pid, &fl->fl_start,
++ &fl->fl_end, &ex, &pid);
++ if (error < 0)
++ return error;
++ if (error == 0)
++ fl->fl_type = F_UNLCK;
++ else {
++ fl->fl_type = (ex) ? F_WRLCK : F_RDLCK;
++ fl->fl_pid = pid;
++ }
++
++ return 0;
++}
diff -urN linux-orig/fs/gfs_locking/lock_dlm/thread.c linux-patched/fs/gfs_locking/lock_dlm/thread.c
---- linux-orig/fs/gfs_locking/lock_dlm/thread.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux-patched/fs/gfs_locking/lock_dlm/thread.c 2004-08-27 12:42:19.599198976 -0500
-@@ -0,0 +1,400 @@
+--- linux-orig/fs/gfs_locking/lock_dlm/thread.c 1970-01-01 07:30:00.000000000 +0730
++++ linux-patched/fs/gfs_locking/lock_dlm/thread.c 2004-10-22 21:20:30.000000000 +0800
+@@ -0,0 +1,406 @@
+/******************************************************************************
+*******************************************************************************
+**
+ atomic_dec(&dlm->lock_count);
+ }
+
-+ if (!test_and_clear_bit(LFL_UNLOCK_SYNC, &lp->flags))
-+ goto out;
++ if (test_and_clear_bit(LFL_UNLOCK_SYNC, &lp->flags)) {
++ complete(&lp->uast_wait);
++ return;
++ }
+
-+ complete(&lp->uast_wait);
-+ return;
++ if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
++ delete_lp(lp);
++ return;
++ }
++
++ goto out;
+ }
+
+ /*
+ lp->req = lp->prev_req;
+ lp->prev_req = DLM_LOCK_IV;
+ lp->lkf &= ~DLM_LKF_CONVDEADLK;
-+ if (!(lp->lkf & DLM_LKF_EXPEDITE))
-+ lp->lkf |= DLM_LKF_QUECVT;
+
+ set_bit(LFL_NOCACHE, &lp->flags);
+
+ lp->req = DLM_LOCK_NL;
+ lp->lkf |= DLM_LKF_CONVERT;
+ lp->lkf &= ~DLM_LKF_CONVDEADLK;
-+ lp->lkf &= ~DLM_LKF_QUECVT;
+
+ set_bit(LFL_REREQUEST, &lp->flags);
+ queue_submit(lp);
+ * told it cannot cache data for this lock.
+ */
+
-+ if (lp->lksb.sb_flags == DLM_SBF_DEMOTED)
++ if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
+ set_bit(LFL_NOCACHE, &lp->flags);
+
+ out:
+ dlm_t *dlm = (dlm_t *) data;
+ dlm_lock_t *lp = NULL;
+ dlm_start_t *ds = NULL;
-+ uint8_t complete, blocking, submit, start, finish, drop;
++ uint8_t complete, blocking, submit, start, finish, drop, shrink;
+ DECLARE_WAITQUEUE(wait, current);
+
-+ daemonize("lock_dlm");
-+ atomic_inc(&dlm->threads);
-+
-+ do {
-+ current->state = TASK_INTERRUPTIBLE;
++ while (!kthread_should_stop()) {
++ set_current_state(TASK_INTERRUPTIBLE);
+ add_wait_queue(&dlm->wait, &wait);
+ if (no_work(dlm))
+ schedule();
+ remove_wait_queue(&dlm->wait, &wait);
-+ current->state = TASK_RUNNING;
++ set_current_state(TASK_RUNNING);
+
-+ complete = blocking = submit = start = finish = drop = 0;
++ complete = blocking = submit = start = finish = 0;
++ drop = shrink = 0;
+
+ spin_lock(&dlm->async_lock);
+
+ finish = 1;
+ }
+
-+ if (check_timeout(dlm->drop_time, DROP_LOCKS_TIME)) {
-+ dlm->drop_time = jiffies;
-+ if (atomic_read(&dlm->lock_count) >= DROP_LOCKS_COUNT)
-+ drop = 1;
++ /* Don't get busy doing this stuff during recovery. */
++ if (!test_bit(DFL_RECOVER, &dlm->flags)) {
++ if (check_timeout(dlm->drop_time,
++ dlm->drop_locks_period)) {
++ dlm->drop_time = jiffies;
++ if (atomic_read(&dlm->lock_count) >=
++ dlm->drop_locks_count)
++ drop = 1;
++ }
++
++ if (check_timeout(dlm->shrink_time, SHRINK_CACHE_TIME)){
++ dlm->shrink_time = jiffies;
++ shrink = 1;
++ }
+ }
+ spin_unlock(&dlm->async_lock);
+
+
+ if (drop)
+ dlm->fscb(dlm->fsdata, LM_CB_DROPLOCKS, NULL);
++ if (shrink)
++ shrink_null_cache(dlm);
+
+ schedule();
+ }
-+ while (!test_bit(DFL_THREAD_STOP, &dlm->flags));
+
-+ atomic_dec(&dlm->threads);
+ return 0;
+}
+
+ * Returns: 0 on success, -EXXX on failure
+ */
+
-+int init_async_thread(dlm_t * dlm)
++int init_async_thread(dlm_t *dlm)
+{
++ struct task_struct *p;
+ int error;
+
-+ clear_bit(DFL_THREAD_STOP, &dlm->flags);
-+ atomic_set(&dlm->threads, 0);
-+
-+ error = kernel_thread(dlm_async, dlm, 0);
-+ if (error < 0)
-+ goto out;
-+
-+ error = kernel_thread(dlm_async, dlm, 0);
-+ if (error < 0) {
-+ release_async_thread(dlm);
-+ goto out;
++ p = kthread_run(dlm_async, dlm, "lock_dlm1");
++ error = IS_ERR(p);
++ if (error) {
++ log_all("can't start lock_dlm1 daemon %d", error);
++ return error;
+ }
++ dlm->thread1 = p;
+
-+ while (atomic_read(&dlm->threads) != 2)
-+ schedule();
-+ error = 0;
++ p = kthread_run(dlm_async, dlm, "lock_dlm2");
++ error = IS_ERR(p);
++ if (error) {
++ log_all("can't start lock_dlm2 daemon %d", error);
++ kthread_stop(dlm->thread1);
++ return error;
++ }
++ dlm->thread2 = p;
+
-+ out:
-+ if (error)
-+ printk("lock_dlm: can't start async thread %d\n", error);
-+ return error;
++ return 0;
+}
+
+/**
+ *
+ */
+
-+void release_async_thread(dlm_t * dlm)
++void release_async_thread(dlm_t *dlm)
+{
-+ set_bit(DFL_THREAD_STOP, &dlm->flags);
-+ while (atomic_read(&dlm->threads)) {
-+ wake_up(&dlm->wait);
-+ schedule();
-+ }
++ kthread_stop(dlm->thread1);
++ kthread_stop(dlm->thread2);
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gio_wiretypes.h linux/fs/gfs_locking/lock_gulm/gio_wiretypes.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gio_wiretypes.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gio_wiretypes.h 2004-09-07 16:17:31.772505390 -0500
-@@ -0,0 +1,404 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gio_wiretypes.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gio_wiretypes.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,465 @@
+/******************************************************************************
+*******************************************************************************
+**
+ * the wires.
+ * If I was really cute, this would be effectivily a checksum of this file.
+ */
-+#define GIO_WIREPROT_VERS (0x67000010)
++#define GIO_WIREPROT_VERS (0x67000013)
+
+/*****************Error codes.
+ * everyone uses these same error codes.
+#define gio_Err_BadStateChg (1008)
+#define gio_Err_MemoryIssues (1009)
+
-+#define gio_Err_PushQu (1010) /* client should never see this one */
++#define gio_Err_PushQu (1010) /* client should never see this one */
+#define gio_Err_TryFailed (1011)
+#define gio_Err_AlreadyPend (1013)
+#define gio_Err_Canceled (1015)
+ * uint32: opcode that this is in reply to. (can be zeros)
+ * uint32: error code
+ */
-+#define gulm_err_reply (0x67455252) /* gERR */
++#define gulm_err_reply (0x67455252) /* gERR */
+
-+#define gulm_nop (0x674e4f50) /* gNOP */
++#define gulm_nop (0x674e4f50) /* gNOP */
+
+/********************* Core *****************/
+/*
+ * Core state changes:
+ * uint32: gCSC
+ * uint8: state (slave, pending, arbitrating, master)
++ * uint8: quorate (true/false)
+ * If state == Slave, then the next two will follow.
+ * IPv6: MasterIP
+ * string: MasterName
+ *
++ * Quorum Change:
++ * uint32: gCQC
++ * uint8: quorate (true/false)
++ *
+ * Core shutdown req:
+ * uint32: gCSD
+ *
+ * uint32: gCSP
+ *
+ */
-+#define gulm_core_login_req (0x67434c00) /* gCL0 */
-+#define gulm_core_login_rpl (0x67434c01) /* gCL1 */
-+#define gulm_core_logout_req (0x67434c02) /* gCL2 */
-+#define gulm_core_logout_rpl (0x67434c03) /* gCL3 */
-+#define gulm_core_reslgn_req (0x67434c04) /* gCL4 */
-+#define gulm_core_beat_req (0x67434200) /* gCB0 */
-+#define gulm_core_beat_rpl (0x67434201) /* gCB1 */
-+#define gulm_core_mbr_req (0x67434d41) /* gCMA */
-+#define gulm_core_mbr_updt (0x67434d55) /* gCMU */
-+#define gulm_core_mbr_lstreq (0x67434d6c) /* gCMl */
-+#define gulm_core_mbr_lstrpl (0x67434d4c) /* gCML */
-+#define gulm_core_mbr_force (0x67434645) /* gCFE */
-+#define gulm_core_res_req (0x67435200) /* gCR0 */
-+#define gulm_core_res_list (0x67435201) /* gCR1 */
-+#define gulm_core_state_req (0x67435352) /* gCSR */
-+#define gulm_core_state_chgs (0x67435343) /* gCSC */
-+#define gulm_core_shutdown (0x67435344) /* gCSD */
-+#define gulm_core_forcepend (0x67435350) /* gCSP */
++#define gulm_core_login_req (0x67434c00) /* gCL0 */
++#define gulm_core_login_rpl (0x67434c01) /* gCL1 */
++#define gulm_core_logout_req (0x67434c02) /* gCL2 */
++#define gulm_core_logout_rpl (0x67434c03) /* gCL3 */
++#define gulm_core_reslgn_req (0x67434c04) /* gCL4 */
++#define gulm_core_beat_req (0x67434200) /* gCB0 */
++#define gulm_core_beat_rpl (0x67434201) /* gCB1 */
++#define gulm_core_mbr_req (0x67434d41) /* gCMA */
++#define gulm_core_mbr_updt (0x67434d55) /* gCMU */
++#define gulm_core_mbr_lstreq (0x67434d6c) /* gCMl */
++#define gulm_core_mbr_lstrpl (0x67434d4c) /* gCML */
++#define gulm_core_mbr_force (0x67434645) /* gCFE */
++#define gulm_core_res_req (0x67435200) /* gCR0 */
++#define gulm_core_res_list (0x67435201) /* gCR1 */
++#define gulm_core_state_req (0x67435352) /* gCSR */
++#define gulm_core_state_chgs (0x67435343) /* gCSC */
++#define gulm_core_quorm_chgs (0x67435143) /* gCSC */
++#define gulm_core_shutdown (0x67435344) /* gCSD */
++#define gulm_core_forcepend (0x67435350) /* gCSP */
+
+/* in the st field */
+#define gio_Mbr_Logged_in (0x05)
+ * uint32: poller idx
+ * list stop:
+ */
-+#define gulm_info_stats_req (0x67495300) /* gIS0 */
-+#define gulm_info_stats_rpl (0x67495301) /* gIS1 */
-+#define gulm_info_set_verbosity (0x67495600) /* gIV0 */
-+#define gulm_socket_close (0x67534300) /* gSC0 */
-+#define gulm_info_slave_list_req (0x67494c00) /* gIL0 */
-+#define gulm_info_slave_list_rpl (0x67494c01) /* gIL1 */
++#define gulm_info_stats_req (0x67495300) /* gIS0 */
++#define gulm_info_stats_rpl (0x67495301) /* gIS1 */
++#define gulm_info_set_verbosity (0x67495600) /* gIV0 */
++#define gulm_socket_close (0x67534300) /* gSC0 */
++#define gulm_info_slave_list_req (0x67494c00) /* gIL0 */
++#define gulm_info_slave_list_rpl (0x67494c01) /* gIL1 */
+
+/********************* Lock Traffic *****************
+ * All lock traffic.
+ * uint8: Slave/Master
+ * xdr of current lock state if no errors and master sending reply
+ * and you're a slave.
++ * uh, i think i assume that it is only four bytes in some places.
++ * Need to look into this...
+ *
+ * logout req:
+ * uint32: gLL2
+ * lock req:
+ * uint32: gLR0
+ * raw: key
++ * uint64: sub id
++ * uint64: start
++ * uint64: stop
+ * uint8: state
+ * uint32: flags
+ * raw: lvb -- Only exists if hasLVB flag is true.
+ * lock rpl:
+ * uint32: gLR1
+ * raw: key
++ * uint64: sub id
++ * uint64: start
++ * uint64: stop
+ * uint8: state
+ * uint32: flags
+ * uint32: error code
+ * lock state update:
+ * uint32: gLRU
+ * string: node name
++ * uint64: sub id
++ * uint64: start
++ * uint64: stop
+ * raw: key
+ * uint8: state
+ * uint32: flags
+ * Action req:
+ * uint32: gLA0
+ * raw: key
++ * uint64: sub id
+ * uint8: action
+ * raw: lvb -- Only exists if action is SyncLVB
+ * Action Rpl:
+ * uint32: gLA1
+ * raw: key
++ * uint64: sub id
+ * uint8: action
+ * uint32: error code
+ *
+ * Action update:
+ * uint32: gLAU
+ * string: node name
++ * uint64: sub id
+ * raw: key
+ * uint8: action
+ * raw: lvb -- Only exists if action is SyncLVB
+ * uint32: gLUR
+ * raw: key
+ *
++ * Query Lock request:
++ * uint32: gLQ0
++ * raw: key
++ * uint64: subid
++ * uint64: start
++ * uint64: stop
++ * uint8: state
++ *
++ * Query Lock Reply:
++ * uint32: gLQ1
++ * raw: key
++ * uint64: subid
++ * uint64: start
++ * uint64: stop
++ * uint8: state
++ * uint32: error
++ * list start mark
++ * string: node
++ * uint64: subid
++ * uint64: start
++ * uint64: stop
++ * uint8: state
++ * list stop mark
++ *
+ * Drop lock Callback:
+ * uint32: gLC0
+ * raw: key
++ * uint64: subid
+ * uint8: state
+ *
+ * Drop all locks callback: This is the highwater locks thing
+ *
+ * Drop expired locks:
+ * uint32: gLEO
-+ * string: node name if NULL, then drap all exp for mask.
++ * string: node name if NULL, then drop all exp for mask.
+ * raw: keymask if keymask & key == key, then dropexp on this lock.
+ *
+ * Lock list req:
+ * list start mark
+ * uint8: key length
+ * raw: key
-+ * uint8: state
+ * uint8: lvb length
+ * if lvb length > 0, raw: LVB
+ * uint32: Holder count
+ * list start mark
+ * string: holders
++ * uint64: subid
++ * uint8: state
++ * uint64: start
++ * uint64: stop
+ * list stop mark
+ * uint32: LVB holder count
+ * list start mark
+ * string: LVB Holders
++ * uint64: subid
+ * list stop mark
+ * uint32: Expired holder count
+ * list start mark
+ * string: ExpHolders
++ * uint64: subid
+ * list stop mark
+ * list stop mark
+ *
+ */
-+#define gulm_lock_login_req (0x674C4C00) /* gLL0 */
-+#define gulm_lock_login_rpl (0x674C4C01) /* gLL1 */
-+#define gulm_lock_logout_req (0x674C4C02) /* gLL2 */
-+#define gulm_lock_logout_rpl (0x674C4C03) /* gLL3 */
-+#define gulm_lock_sel_lckspc (0x674C5300) /* gLS0 */
-+#define gulm_lock_state_req (0x674C5200) /* gLR0 */
-+#define gulm_lock_state_rpl (0x674C5201) /* gLR1 */
-+#define gulm_lock_state_updt (0x674C5255) /* gLRU */
-+#define gulm_lock_action_req (0x674C4100) /* gLA0 */
-+#define gulm_lock_action_rpl (0x674C4101) /* gLA1 */
-+#define gulm_lock_action_updt (0x674C4155) /* gLAU */
-+#define gulm_lock_update_rpl (0x674c5552) /* gLUR */
-+#define gulm_lock_cb_state (0x674C4300) /* gLC0 */
-+#define gulm_lock_cb_dropall (0x674C4302) /* gLC2 */
-+#define gulm_lock_drop_exp (0x674C454F) /* gLEO */
-+#define gulm_lock_dump_req (0x674c4400) /* gLD0 */
-+#define gulm_lock_dump_rpl (0x674c4401) /* gLD1 */
-+#define gulm_lock_rerunqueues (0x674c5152) /* gLQR */
++#define gulm_lock_login_req (0x674C4C00) /* gLL0 */
++#define gulm_lock_login_rpl (0x674C4C01) /* gLL1 */
++#define gulm_lock_logout_req (0x674C4C02) /* gLL2 */
++#define gulm_lock_logout_rpl (0x674C4C03) /* gLL3 */
++#define gulm_lock_sel_lckspc (0x674C5300) /* gLS0 */
++#define gulm_lock_state_req (0x674C5200) /* gLR0 */
++#define gulm_lock_state_rpl (0x674C5201) /* gLR1 */
++#define gulm_lock_state_updt (0x674C5255) /* gLRU */
++#define gulm_lock_action_req (0x674C4100) /* gLA0 */
++#define gulm_lock_action_rpl (0x674C4101) /* gLA1 */
++#define gulm_lock_action_updt (0x674C4155) /* gLAU */
++#define gulm_lock_update_rpl (0x674c5552) /* gLUR */
++#define gulm_lock_query_req (0x674c5100) /* gLQ0 */
++#define gulm_lock_query_rpl (0x674c5101) /* gLQ1 */
++#define gulm_lock_cb_state (0x674C4300) /* gLC0 */
++#define gulm_lock_cb_dropall (0x674C4302) /* gLC2 */
++#define gulm_lock_drop_exp (0x674C454F) /* gLEO */
++#define gulm_lock_dump_req (0x674c4400) /* gLD0 */
++#define gulm_lock_dump_rpl (0x674c4401) /* gLD1 */
++#define gulm_lock_rerunqueues (0x674c5251) /* gLRQ */
+
+/* marks for the login */
+#define gio_lck_st_Slave (0x00)
+#define gio_lck_fg_hasLVB (0x00000010)
+#define gio_lck_fg_Cachable (0x00000020)
+#define gio_lck_fg_Piority (0x00000040)
++ /* this is just an idea, but it might be useful. Basically just says to
++ * not keep the exp hold, just drop this hold like a shared would be.
++ */
++#define gio_lck_fg_DropOnExp (0x00000080)
++ /* this is saved on each holder, basically, you are gonna ignore any
++ * callbacks about this lock, so tell the server not to even bother
++ * sending them. A tiny performance boost by lowering the network load.
++ */
++#define gio_lck_fg_NoCallBacks (0x00000100)
+
+#endif /*__gio_wiretypes_h__*/
+/* vim: set ai cin et sw=3 ts=3 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm.h linux/fs/gfs_locking/lock_gulm/gulm.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm.h 2004-09-07 16:17:31.773505171 -0500
-@@ -0,0 +1,291 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm.h linux-patched/fs/gfs_locking/lock_gulm/gulm.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,237 @@
+/******************************************************************************
+*******************************************************************************
+**
+#ifndef GULM_DOT_H
+#define GULM_DOT_H
+
-+#define GULM_RELEASE_NAME "v6.0.0"
++#define GULM_RELEASE_NAME "<CVS>"
+
++/* uh, do I need all of these headers? */
+#ifdef MODVERSIONS
+#include <linux/modversions.h>
+#endif /* MODVERSIONS */
+#include <linux/smp_lock.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
++#include <linux/list.h>
++#include <linux/init.h>
++#include <linux/types.h>
+#include <linux/fs.h>
+
+#ifndef TRUE
+#define SCNX64 "LX"
+#endif
+
-+#include <linux/list.h>
+
+#undef MAX
+#define MAX(a,b) ((a>b)?a:b)
+#define GIO_LVB_SIZE (32)
+#define GIO_NAME_SIZE (32)
+#define GIO_NAME_LEN (GIO_NAME_SIZE-1)
++#define GULM_CRC_INIT (0x6d696b65)
++#define gulm_gfs_lmSize (1<<13) /* map size is a power of 2 */
++#define gulm_gfs_lmBits (0x1FFF) /* & is faster than % */
+
+/* What we know about this filesytem */
+struct gulm_fs_s {
+ uint32_t fsJID;
+ uint32_t lvb_size;
+
-+ struct semaphore get_lock; /* I am not 100% sure this is needed.
-+ * But it only hurts performance,
-+ * not correctness if it is
-+ * useless. Sometime post52, need
-+ * to investigate.
-+ */
-+
+ /* Stuff for the first mounter lock and state */
+ int firstmounting;
+ /* the recovery done func needs to behave slightly differnt when we are
+ * the first node in an fs.
+ */
+
-+ void *mountlock; /* this lock holds the Firstmounter state of the FS */
-+ /* this is because all lock traffic is async, and really at this point
-+ * in time we want a sync behavor, so I'm left with doing something to
-+ * achive that.
-+ *
-+ * this works, but it is crufty, but I don't want to build a huge
-+ * queuing system for one lock that we touch twice at the beginning and
-+ * once on the end.
-+ *
-+ * I should change the firstmounter lock to work like the journal locks
-+ * and the node locks do. Things are a lot cleaner now with the libgulm
-+ * interface than before. (when the firstmounter lock code was written)
-+ */
-+ struct completion sleep;
-+
+ /* Stuff for JID mapping locks */
+ uint32_t JIDcount; /* how many JID locks are there. */
++ struct semaphore headerlock;
+};
+typedef struct gulm_fs_s gulm_fs_t;
+
-+/* What we know about each locktable.
-+ * only one now-a-days. (the LTPX)
-+ * */
-+typedef struct lock_table_s {
-+ uint32_t magic_one;
-+
-+ int running;
-+ struct task_struct *recver_task;
-+ struct completion startup;
-+ struct semaphore sender;
-+
-+ struct task_struct *sender_task;
-+ wait_queue_head_t send_wchan;
-+ spinlock_t queue_sender;
-+ struct list_head to_be_sent;
-+
-+ int hashbuckets;
-+ spinlock_t *hshlk;
-+ struct list_head *lkhsh;
-+
-+ /* stats
-+ * it may be wise to make some of these into atomic numbers.
-+ * or something. or not.
-+ * */
-+ uint32_t locks_total;
-+ uint32_t locks_unl;
-+ uint32_t locks_exl;
-+ uint32_t locks_shd;
-+ uint32_t locks_dfr;
-+ uint32_t locks_lvbs;
-+ atomic_t locks_pending;
-+ /* cannot count expired here. clients don't know this */
-+
-+ uint32_t lops; /* just incr on each op */
-+
-+} lock_table_t;
-+
+typedef struct gulm_cm_s {
+ uint8_t myName[64];
+ uint8_t clusterID[256]; /* doesn't need to be 256. */
-+ uint8_t loaded; /* True|False whether we grabbed the config data */
+ uint8_t starts;
+
+ uint32_t handler_threads; /* howmany to have */
+
+ uint64_t GenerationID;
+
-+ lock_table_t ltpx;
++ /* lm interface pretty much requires that we maintian a table of
++ * locks. The way lvbs work is a prefect example of why. As is
++ * the panic you get if you send a cb up about a lock that has been
++ * put away.
++ */
++ struct list_head *gfs_lockmap;
++ spinlock_t *gfs_locklock;
+
+ gulm_interface_p hookup;
+
+
+/* things about each lock. */
+typedef struct gulm_lock_s {
-+ struct list_head gl_list;
-+ atomic_t count;
-+
-+ uint32_t magic_one;
-+ gulm_fs_t *fs; /* which filesystem we belong to. */
-+ uint8_t key[GIO_KEY_SIZE];
-+ uint16_t keylen;
-+ uint8_t last_suc_state; /* last state we succesfully got. */
-+ char *lvb;
-+
-+ /* this is true when there is a lock request sent out for this lock.
-+ * All it really means is that if we've lost the master, and reconnect
-+ * to another, this lock needs to have it's request resent.
-+ *
-+ * This now has two stages. Since a lock could be pending, but still in
-+ * the send queue. So we don't want to resend requests that haven't
-+ * been sent yet.
-+ *
-+ * we don't handle the master losses here any more. LTPX does that for
-+ * us. Should consider removing the dupicated code then.
-+ */
-+ int actuallypending; /* may need to be atomic */
-+ int in_to_be_sent;
-+
-+ enum { glck_nothing, glck_action, glck_state } req_type;
-+ /* these three for the lock req. We save them here so we can rebuild
-+ * the lock request if there was a server failover. (?still needed?)
-+ */
-+ unsigned int cur_state;
-+ unsigned int req_state;
-+ unsigned int flags;
-+
-+ /* these three for actions. First is the action, next is result, last is
-+ * what threads wait on for the reply.
-+ */
-+ int action;
-+ int result; /* ok, both are using this. */
-+ struct completion actsleep;
-+
++ struct list_head gl_list;
++ atomic_t count; /* gfs can call multiple gets and puts for same lock. */
++
++ uint8_t *key;
++ uint16_t keylen;
++ gulm_fs_t *fs; /* which fs we belong to */
++ char *lvb;
++ int cur_state; /* for figuring out wat reply to tell gfs. */
+} gulm_lock_t;
+
++
+/*****************************************************************************/
+/* cross pollenate prototypes */
+
-+/* from gulm_lt.c */
-+int pack_lock_key(uint8_t *key, uint16_t keylen, uint8_t type,
-+ uint8_t *fsname, uint8_t *pk, uint8_t pklen);
-+void lt_logout (void);
-+int lt_login (void);
++/* from gulm_firstlock.c */
+int get_mount_lock (gulm_fs_t * fs, int *first);
+int downgrade_mount_lock (gulm_fs_t * fs);
+int drop_mount_lock (gulm_fs_t * fs);
-+int send_drop_all_exp (lock_table_t * lt);
-+int send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name);
++
++/* from gulm_lt.c */
++int gulm_lt_init (void);
++void gulm_lt_release(void);
++int pack_lock_key(uint8_t *key, uint16_t keylen, uint8_t type,
++ uint8_t *fsname, uint8_t *pk, uint8_t pklen);
++int pack_drop_mask(uint8_t *mask, uint16_t mlen, uint8_t *fsname);
++void do_drop_lock_req (uint8_t *key, uint16_t keylen, uint8_t state);
++int gulm_get_lock (lm_lockspace_t * lockspace, struct lm_lockname *name,
++ lm_lock_t ** lockp);
++void gulm_put_lock (lm_lock_t * lock);
++unsigned int gulm_lock (lm_lock_t * lock, unsigned int cur_state,
++ unsigned int req_state, unsigned int flags);
++unsigned int gulm_unlock (lm_lock_t * lock, unsigned int cur_state);
++void gulm_cancel (lm_lock_t * lock);
++int gulm_hold_lvb (lm_lock_t * lock, char **lvbp);
++void gulm_unhold_lvb (lm_lock_t * lock, char *lvb);
++void gulm_sync_lvb (lm_lock_t * lock, char *lvb);
++
++/* from gulm_plock.c */
++int gulm_punlock (lm_lockspace_t * lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl);
++int gulm_plock (lm_lockspace_t *lockspace, struct lm_lockname *name,
++ struct file *file, int cmd, struct file_lock *fl);
++int gulm_plock_get (lm_lockspace_t * lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl);
+
+/*from gulm_core.c */
+void cm_logout (void);
+ lm_callback_t cb, lm_fsdata_t * fsdata,
+ unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
+
++/* from gulm_jid.c */
++void jid_header_lock_drop (uint8_t * key, uint16_t keylen);
++
+extern struct lm_lockops gulm_ops;
+
+#endif /* GULM_DOT_H */
+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_core.c linux/fs/gfs_locking/lock_gulm/gulm_core.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_core.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_core.c 2004-09-07 16:17:31.773505171 -0500
-@@ -0,0 +1,255 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_core.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_core.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,250 @@
+/******************************************************************************
+*******************************************************************************
+**
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
-+#include "util.h"
++#include "gulm_lock_queue.h"
+#include "utils_tostr.h"
+
+extern gulm_cm_t gulm_cm;
+ }
+ gulm_cm.GenerationID = gen;
+
-+ error = lt_login ();
-+ if (error != 0) {
-+ log_err ("lt_login failed. %d\n", error);
-+ lg_core_logout (gulm_cm.hookup); /* XXX is this safe? */
-+ return error;
-+ }
+
+ log_msg (lgm_Network2, "Logged into local core.\n");
+
+ */
+ if (gulm_cm.starts && nodestate == lg_core_Logged_out &&
+ strcmp(gulm_cm.myName, nodename) == 0 ) {
-+ lt_logout();
++ glq_shutdown ();
+ cm_thd_running = FALSE;
+ lg_core_logout (gulm_cm.hookup);
+ return -1;
+ return 0;
+}
+
-+int gulm_core_statechange (void *misc, uint8_t corestate,
++int gulm_core_statechange (void *misc, uint8_t corestate, uint8_t quorate,
+ struct in6_addr *masterip, char *mastername)
+{
+ int *cst = (int *)misc;
+
+ err = 0;
+ exit:
++ if (err > 0) err = - err;
+ return err;
+}
+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_fs.c linux/fs/gfs_locking/lock_gulm/gulm_fs.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_fs.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_fs.c 2004-09-07 16:17:31.774504952 -0500
-@@ -0,0 +1,620 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_firstlock.c linux-patched/fs/gfs_locking/lock_gulm/gulm_firstlock.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_firstlock.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_firstlock.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,310 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
++#include <linux/crc32.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
-+#include "util.h"
-+#include "load_info.h"
+#include "handler.h"
-+#include "gulm_procinfo.h"
++#include "gulm_lock_queue.h"
++
++extern gulm_cm_t gulm_cm;
++
++/****************************************************************************/
++struct gulm_flck_return_s {
++ int error;
++ struct completion sleep;
++};
++
++/**
++ * gulm_firstlock_finish -
++ * @item:
++ *
++ *
++ * Returns: void
++ */
++void gulm_firstlock_finish (struct glck_req *item)
++{
++ struct gulm_flck_return_s *g = (struct gulm_flck_return_s *)item->misc;
++ g->error = item->error;
++ complete (&g->sleep);
++}
++
++/**
++ * gulm_cancel_firstlock -
++ * @misc:
++ *
++ */
++void gulm_cancel_firstlock (void *misc)
++{
++ gulm_fs_t *fs = (gulm_fs_t *)misc;
++ glckr_t *item;
++
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ log_err ("Out of memory, Cannot cancel Firstlock request.\n");
++ return;
++ }
++
++ /* after cancel is processed, glq will call kfree on item->key. */
++ item->key = kmalloc(GIO_KEY_SIZE, GFP_KERNEL);
++ if (item->key == NULL) {
++ glq_recycle_req(item);
++ log_err ("Out of memory, Cannot cancel Firstlock request.\n");
++ return;
++ }
++ item->keylen = pack_lock_key(item->key, GIO_KEY_SIZE, 'F',
++ fs->fs_name, "irstMount", 9);
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_cancel;
++ item->finish = NULL;
++
++ glq_cancel(item);
++}
++
++/**
++ * do_lock_time_out -
++ * @d:
++ *
++ * after timeout, set cancel request on the handler queue. (since we cannot
++ * call it from within the timer code. (socket io within interrupt space is
++ * bad.))
++ *
++ */
++static void
++do_lock_time_out (unsigned long d)
++{
++ gulm_fs_t *fs = (gulm_fs_t *)d;
++ qu_function_call (&fs->cq, gulm_cancel_firstlock, fs);
++}
++
++/**
++ * get_mount_lock -
++ * @fs:
++ * @first:
++ *
++ * Get the Firstmount lock.
++ * We try to grab it Exl. IF we get that, then we are the first client
++ * mounting this fs. Otherwise we grab it shared to show that there are
++ * clients using this fs.
++ *
++ * Returns: int
++ */
++int
++get_mount_lock (gulm_fs_t * fs, int *first)
++{
++ int err, keylen;
++ struct timer_list locktimeout;
++ struct gulm_flck_return_s gret;
++ uint8_t key[GIO_KEY_SIZE];
++ glckr_t *item;
++
++ keylen = pack_lock_key(key, GIO_KEY_SIZE, 'F', fs->fs_name, "irstMount", 9);
++ if( keylen <= 0 ) return keylen;
++
++
++ try_it_again:
++ *first = FALSE; /* assume we're not first */
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ err = -ENOMEM;
++ goto fail;
++ }
++
++ /* glq does not try to free the key for state or action requests. */
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Exclusive;
++ item->flags = lg_lock_flag_Try|lg_lock_flag_IgnoreExp|lg_lock_flag_NoCallBacks;
++ item->error = gret.error = 0;
++
++ init_completion (&gret.sleep);
++
++ item->misc = &gret;
++ item->finish = gulm_firstlock_finish;
++
++ glq_queue (item);
++ wait_for_completion (&gret.sleep);
++
++ if (gret.error == 0) {
++ /* we got the lock, we're the first mounter. */
++ *first = TRUE;
++ log_msg (lgm_locking, "fsid=%s: Got mount lock Exclusive.\n",
++ fs->fs_name);
++ return 0;
++ } else {
++ log_msg (lgm_locking,
++ "fsid=%s: Didn't get mount lock Exl, someone else "
++ "was first, trying for shared.\n", fs->fs_name);
++
++ /* the try failed, pick it up shared.
++ * If it takes too long, start over.
++ * */
++ init_timer (&locktimeout);
++ locktimeout.function = do_lock_time_out;
++ locktimeout.data = (unsigned long)fs;
++ mod_timer (&locktimeout, jiffies + (120 * HZ));
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ err = -ENOMEM;
++ goto fail;
++ }
++
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Shared;
++ item->flags = lg_lock_flag_NoCallBacks;
++ item->error = gret.error = 0;
++
++ init_completion (&gret.sleep);
++
++ item->misc = &gret;
++ item->finish = gulm_firstlock_finish;
++
++ glq_queue (item);
++ wait_for_completion (&gret.sleep);
++
++ del_timer (&locktimeout);
++
++ if (gret.error == 0) {
++ /* kewl we got it. */
++ log_msg (lgm_locking,
++ "fsid=%s: Got mount lock shared.\n",
++ fs->fs_name);
++ return 0;
++ }
++
++ log_msg (lgm_locking,
++ "fsid=%s: Shared req timed out, trying Exl again.\n",
++ fs->fs_name);
++ goto try_it_again;
++ }
++ fail:
++ log_err ("Exit get_mount_lock err=%d\n", err);
++ return err;
++}
++
++/**
++ * downgrade_mount_lock -
++ * @fs:
++ *
++ * drop the Firstmount lock down to shared. This lets others mount.
++ *
++ * Returns: int
++ */
++int
++downgrade_mount_lock (gulm_fs_t * fs)
++{
++ int keylen;
++ struct gulm_flck_return_s gret;
++ uint8_t key[GIO_KEY_SIZE];
++ glckr_t *item;
++
++ keylen = pack_lock_key(key, GIO_KEY_SIZE, 'F',
++ fs->fs_name, "irstMount", 9);
++ if( keylen <= 0 ) return keylen;
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return -ENOMEM;
++ }
++
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Shared;
++ item->flags = lg_lock_flag_NoCallBacks;
++ item->error = gret.error = 0;
++
++ init_completion (&gret.sleep);
++
++ item->misc = &gret;
++ item->finish = gulm_firstlock_finish;
++
++ glq_queue (item);
++ wait_for_completion (&gret.sleep);
++
++ if (gret.error != 0)
++ log_err ("fsid=%s: Couldn't unlock mount lock!!!!!! %d\n",
++ fs->fs_name, gret.error);
++ return 0;
++}
++
++/**
++ * drop_mount_lock - drop our hold on the firstmount lock.
++ * @fs: <> the filesystem pointer.
++ *
++ * Returns: int
++ */
++int
++drop_mount_lock (gulm_fs_t * fs)
++{
++ int keylen;
++ struct gulm_flck_return_s gret;
++ uint8_t key[GIO_KEY_SIZE];
++ glckr_t *item;
++
++ keylen = pack_lock_key(key, GIO_KEY_SIZE, 'F', fs->fs_name, "irstMount", 9);
++ if( keylen <= 0 ) return keylen;
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return -ENOMEM;
++ }
++
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Unlock;
++ item->flags = 0;
++ item->error = gret.error = 0;
++
++ init_completion (&gret.sleep);
++
++ item->misc = &gret;
++ item->finish = gulm_firstlock_finish;
++
++ glq_queue (item);
++ wait_for_completion (&gret.sleep);
++
++ if (gret.error != 0)
++ log_err ("fsid=%s: Couldn't unlock mount lock!!!!!! %d\n",
++ fs->fs_name, gret.error);
++ return 0;
++}
++
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_fs.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_fs.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,633 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#include "gulm.h"
++
++#include <linux/kernel.h>
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#define __KERNEL_SYSCALLS__
++#include <linux/unistd.h>
++#include <linux/utsname.h> /* for extern system_utsname */
++
++#include "handler.h"
++#include "gulm_lock_queue.h"
+#include "gulm_jid.h"
+
+/* things about myself */
+/* globals for this file.*/
+uint32_t filesystems_count = 0;
+LIST_HEAD (filesystems_list);
-+struct semaphore filesystem_lck; /* we use a sema instead of a spin here because
-+ * all of the interruptible things we do inside
-+ * of it.
-+ * If i stop doing nasty things within this it doesn't need
-+ * to be a sema.
++struct semaphore filesystem_lck; /* we use a sema instead of a spin
++ * here because all of the
++ * interruptible things we do
++ * inside of it. If i stop doing
++ * nasty things within this it
++ * doesn't need to be a sema.
+ */
+struct semaphore start_stop_lock;
+atomic_t start_stop_cnt;
+ "In fs (%s), jid %d was found for name (%s).\n",
+ rf->fs->fs_name, jid, rf->name);
+
-+ /* all that the replay journal call back into gfs does is malloc
-+ * some memory and add it to a list. So we really don't need to
-+ * queue that action. Since that is what gfs is doing.
++ /* all that the replay journal call back into gfs does is
++ * malloc some memory and add it to a list. So we really
++ * don't need to queue that action. Since that is what gfs
++ * is doing.
+ *
+ * This will need to change if gfs changes.
+ *
+ fs = list_entry (tmp, gulm_fs_t, fs_list);
+ qu_drop_req (&fs->cq, fs->cb, fs->fsdata, LM_CB_DROPLOCKS, 0,
+ 0);
-+ /* If this decides to block someday, we need to change this function.
++ /* If this decides to block someday, we need to change this
++ * function.
+ */
+ }
+ up (&filesystem_lck);
+/*****************************************************************************/
+
+/**
-+ * clear_locks -
-+ *
-+ * quick check to see if there was leaking
-+ * should I panic on these? or just complain?
-+ *
-+ * Returns: void
-+ */
-+void
-+clear_locks (void)
-+{
-+ int i;
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+
-+ for (i = 0; i < lt->hashbuckets; i++) {
-+ struct list_head *lcktmp, *lckfoo;
-+ spin_lock (<->hshlk[i]);
-+ list_for_each_safe (lcktmp, lckfoo, <->lkhsh[i]) {
-+ gulm_lock_t *lck = NULL;
-+ lck = list_entry (lcktmp, gulm_lock_t, gl_list);
-+ /* need to relelase it. umm, should any even exist? */
-+ log_err ("AH! Rogue lock buffer! refcount:%d\n",
-+ atomic_read (&lck->count));
-+
-+ if (lck->lvb) {
-+ log_err ("AH! Rogue lock buffer with LVB!\n");
-+ kfree (lck->lvb);
-+ }
-+
-+ list_del (lcktmp);
-+ kfree (lck);
-+
-+ }
-+ spin_unlock (<->hshlk[i]);
-+ }
-+ kfree (lt->hshlk);
-+ lt->hshlk = NULL;
-+ kfree (lt->lkhsh);
-+ lt->lkhsh = NULL;
-+}
-+
-+/*****************************************************************************/
-+/**
+ * start_gulm_threads -
+ * @host_data:
+ *
+ * Returns: int
+ */
+int
-+start_gulm_threads (char *csnm, char *host_data)
++start_gulm_threads (char *csnm, char *hostdata)
+{
+ int error = 0;
+
+ strncpy (gulm_cm.clusterID, csnm, 255);
+ gulm_cm.clusterID[255] = '\0';
+
++ if (hostdata != NULL && strlen (hostdata) > 0) {
++ strncpy (gulm_cm.myName, hostdata, 64);
++ } else {
++ strncpy (gulm_cm.myName, system_utsname.nodename, 64);
++ }
++ gulm_cm.myName[63] = '\0';
++
++
+ error = lg_initialize (&gulm_cm.hookup, gulm_cm.clusterID,
+ "GFS Kernel Interface");
+ if (error != 0) {
+ }
+ gulm_cm.starts = TRUE;
+
-+ error = load_info (host_data);
-+ if (error != 0) {
-+ log_err ("load_info failed. %d\n", error);
-+ goto fail;
-+ }
-+
-+ jid_init ();
++ /* breaking away from ccs. just hardcoding defaults here.
++ * Noone really used these anyways and if ppl want them
++ * badly, we'll find another way to set them. (modprobe
++ * options for example. or maybe sysfs?)
++ * */
++ gulm_cm.handler_threads = 2;
++ gulm_cm.verbosity = lgm_Network | lgm_Stomith | lgm_Forking;
+
+ error = cm_login ();
+ if (error != 0) {
+ log_err ("cm_login failed. %d\n", error);
+ goto fail;
+ }
++ error = glq_startup ();
++ if (error != 0) {
++ log_err ("glq_startup failed. %d\n", error);
++ goto fail;
++ }
+
-+ /* lt_login() is called after the success packet for cm_login()
-+ * returns.
-+ */
+ }
+ fail:
+ up (&start_stop_lock);
+ atomic_dec (&start_stop_cnt);
+ if (atomic_read (&start_stop_cnt) == 0) {
+ /* last one, put it all away. */
-+ lt_logout ();
++ glq_shutdown ();
+ cm_logout ();
-+ clear_locks ();
+ lg_release (gulm_cm.hookup);
+ gulm_cm.hookup = NULL;
-+ gulm_cm.loaded = FALSE;
+ gulm_cm.GenerationID = 0;
+ }
+ up (&start_stop_lock);
+}
+
+/*****************************************************************************/
++/**
++ * send_drop_exp -
++ * @fs:
++ * @name:
++ *
++ *
++ * Returns: int
++ */
++int send_drop_exp (gulm_fs_t * fs, char *name)
++{
++ glckr_t *item;
++
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ log_err("drop_exp: failed to get needed memory. skipping.\n");
++ return -ENOMEM;
++ }
++
++ item->keylen = 3 + strlen(fs->fs_name);
++ item->key = kmalloc(item->keylen, GFP_KERNEL);
++ if (item->key == NULL) {
++ glq_recycle_req(item);
++ log_err("drop_exp: failed to get needed memory. skipping.\n");
++ return -ENOMEM;
++ }
++ item->keylen = pack_drop_mask(item->key, item->keylen, fs->fs_name);
++
++ /* pretent lvb is name for drops. */
++ if (name != NULL) {
++ item->lvblen = strlen(name) +1;
++ item->lvb = kmalloc(item->lvblen, GFP_KERNEL);
++ if (item->lvb == NULL) {
++ glq_recycle_req(item); /* frees key for us */
++ log_err("drop_exp: failed to get needed memory. skipping.\n");
++ return -ENOMEM;
++ }
++ memcpy(item->lvb, name, item->lvblen);
++ } else {
++ item->lvb = NULL;
++ item->lvblen = 0;
++ }
++
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_drop;
++ item->state = 0;
++ item->flags = 0;
++ item->error = 0;
++ item->finish = NULL;
++
++ glq_queue (item);
++
++ return 0;
++}
++/*****************************************************************************/
+
+/**
+ * gulm_mount
+ gulm->cb = cb;
+ gulm->fsdata = fsdata;
+ gulm->lvb_size = min_lvb_size;
-+ init_completion (&gulm->sleep);
-+ init_MUTEX (&gulm->get_lock);
+
+ if ((error = start_gulm_threads (work, host_data)) != 0) {
+ log_err ("Got a %d trying to start the threads.\n", error);
+ log_msg (lgm_JIDMap, "fsid=%s: We will be using jid %d\n",
+ gulm->fs_name, gulm->fsJID);
+
-+ if (add_to_proc (gulm) != 0) {
-+ /* ignored for now */
-+ }
-+
+ lockstruct->ls_jid = gulm->fsJID;
+ lockstruct->ls_first = first;
+ lockstruct->ls_lvb_size = gulm->lvb_size;
+ lockstruct->ls_lockspace = gulm;
+ lockstruct->ls_ops = &gulm_ops;
-+#ifdef USE_SYNC_LOCKING
-+ lockstruct->ls_flags = 0;
-+
-+ log_msg (lgm_Network2, "Done: %s, sync mode\n", table_name);
-+#else
+ lockstruct->ls_flags = LM_LSFLAG_ASYNC;
-+
+ log_msg (lgm_Network2, "Done: %s, async mode\n", table_name);
-+#endif
+
+ gulm_cm.starts = FALSE;
++ if(work != NULL ) kfree(work);
+ return 0;
+
+ fail_callback:
+ stop_callback_qu (&gulm->cq);
+
+ fail_free_gulm:
-+ kfree (gulm);
+ stop_gulm_threads ();
++ kfree (gulm);
+
+ fail:
+
+ gulm_cm.starts = FALSE;
+ log_msg (lgm_Always, "fsid=%s: Exiting gulm_mount with errors %d\n",
+ table_name, error);
++ /* VFS does weird things with the error results, so before we try
++ * to return a gulm error code, flip it to -1.
++ */
++ if (error > 999 || error < -999 ) error = -1;
+ return error;
+}
+
+{
+ gulm_fs_t *fs = (gulm_fs_t *) lockspace;
+ int err = 0;
-+ lock_table_t *lt = &gulm_cm.ltpx;
+
+ /* first send the drop all exp message.
+ * */
-+ err = send_drop_exp (fs, lt, NULL);
++ err = send_drop_exp (fs, NULL);
+ if (err < 0)
+ log_err
+ ("fsid=%s: Problems sending DropExp request to LTPX: %d\n",
+
+ stop_callback_qu (&gulm_fs->cq);
+
-+ remove_from_proc (gulm_fs);
-+
+ kfree (gulm_fs);
+
+ stop_gulm_threads ();
+ log_msg (lgm_JIDMap, "fsid=%s: Found %s for jid %d\n",
+ fs->fs_name, name, jid);
+
-+ err = send_drop_exp (fs, &gulm_cm.ltpx, name);
++ err = send_drop_exp (fs, name);
+
+ if (jid != fs->fsJID) {
+ /* rather dumb to do this to ourselves right after we mount... */
+
+}
+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_jid.c linux/fs/gfs_locking/lock_gulm/gulm_jid.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_jid.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_jid.c 2004-09-07 16:17:31.797499912 -0500
-@@ -0,0 +1,817 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,739 @@
+/******************************************************************************
+*******************************************************************************
+**
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
-+#include "util.h"
++#include "gulm_lock_queue.h"
+
+extern gulm_cm_t gulm_cm;
+
+ */
+#define jid_header_lvb_size (8)
+
-+struct jid_lookup_item_s {
-+ struct list_head jp_list;
-+ uint8_t *key;
-+ uint16_t keylen;
-+ uint8_t *lvb;
-+ uint16_t lvblen;
-+ struct completion waitforit;
-+};
-+typedef struct jid_lookup_item_s jid_lookup_item_t;
-+
-+LIST_HEAD (jid_pending_locks);
-+spinlock_t jid_pending;
-+struct semaphore jid_listlock;
-+
-+/**
-+ * jid_init -
-+ */
-+void
-+jid_init (void)
-+{
-+ spin_lock_init (&jid_pending);
-+ init_MUTEX (&jid_listlock);
-+}
-+
+/**
+ * jid_get_header_name -
+ * @fs: <
+{
+ int len;
+
-+ len = pack_lock_key(key, *keylen, 'J', fsname, "Header\0\0\0", 9);
-+ if( len <=0 ) return len;
-+
-+ *keylen = len;
-+
-+ return 0;
-+}
-+
-+int
-+jid_get_listlock_name (uint8_t * fsname, uint8_t * key, uint16_t * keylen)
-+{
-+ int len;
-+
-+ len = pack_lock_key(key, *keylen, 'J', fsname, "Llistlock", 9);
++ len = pack_lock_key(key, *keylen, 'J', fsname, "Header", 6);
+ if( len <=0 ) return len;
+
+ *keylen = len;
+}
+
+/**
-+ * jid_hold_lvb -
++ * gulm_jid_finish -
++ * @item:
++ *
++ *
++ * Returns: void
++ */
++void gulm_jid_finish (struct glck_req *item)
++{
++ struct completion *sleep = (struct completion *)item->misc;
++ complete (sleep);
++}
++
++/**
++ * jid_lvb_action -
+ * @key:
+ * @keylen:
++ * @lvb:
++ * @lvblen:
++ * @action:
+ *
+ *
++ * Returns: void
+ */
-+void
-+jid_hold_lvb (uint8_t * key, uint16_t keylen)
++void jid_lvb_action (uint8_t * key, uint16_t keylen, uint8_t * lvb,
++ uint16_t lvblen, uint8_t action)
+{
-+ jid_lookup_item_t jp;
-+ GULM_ASSERT (keylen > 6,);
-+ jp.key = key;
-+ jp.keylen = keylen;
-+ jp.lvb = NULL;
-+ jp.lvblen = 0;
-+ INIT_LIST_HEAD (&jp.jp_list);
-+ init_completion (&jp.waitforit);
-+
-+ spin_lock (&jid_pending);
-+ list_add (&jp.jp_list, &jid_pending_locks);
-+ spin_unlock (&jid_pending);
-+
-+ lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_HoldLVB,
-+ NULL, 0);
++ struct completion sleep;
++ glckr_t *item;
+
-+ wait_for_completion (&jp.waitforit);
-+}
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return;
++ }
+
-+void
-+jid_unhold_lvb (uint8_t * key, uint16_t keylen)
-+{
-+ jid_lookup_item_t jp;
-+ GULM_ASSERT (keylen > 6,);
-+ jp.key = key;
-+ jp.keylen = keylen;
-+ jp.lvb = NULL;
-+ jp.lvblen = 0;
-+ INIT_LIST_HEAD (&jp.jp_list);
-+ init_completion (&jp.waitforit);
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_action;
++ item->state = action;
++ item->flags = 0;
++ item->error = 0;
++ item->lvb = lvb;
++ item->lvblen = lvblen;
+
-+ spin_lock (&jid_pending);
-+ list_add (&jp.jp_list, &jid_pending_locks);
-+ spin_unlock (&jid_pending);
++ init_completion (&sleep);
+
-+ lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_UnHoldLVB,
-+ NULL, 0);
++ item->misc = &sleep;
++ item->finish = gulm_jid_finish;
+
-+ wait_for_completion (&jp.waitforit);
++ glq_queue (item);
++ wait_for_completion (&sleep);
+}
-+
+void
+jid_sync_lvb (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
+{
-+ jid_lookup_item_t jp;
-+ GULM_ASSERT (keylen > 6,);
-+ jp.key = key;
-+ jp.keylen = keylen;
-+ jp.lvb = NULL;
-+ jp.lvblen = 0;
-+ INIT_LIST_HEAD (&jp.jp_list);
-+ init_completion (&jp.waitforit);
-+
-+ spin_lock (&jid_pending);
-+ list_add (&jp.jp_list, &jid_pending_locks);
-+ spin_unlock (&jid_pending);
-+
-+ lg_lock_action_req (gulm_cm.hookup, key, keylen, lg_lock_act_SyncLVB,
-+ lvb, lvblen);
-+
-+ wait_for_completion (&jp.waitforit);
++ jid_lvb_action (key, keylen, lvb, lvblen, lg_lock_act_SyncLVB);
+}
-+
-+/**
-+ * jid_action_reply -
-+ * @key:
-+ * @keylen:
-+ *
-+ * called from the lock handler callback.
-+ *
-+ * Returns: void
-+ */
+void
-+jid_action_reply (uint8_t * key, uint16_t keylen)
-+{
-+ struct list_head *tmp, *nxt;
-+ jid_lookup_item_t *jp, *fnd = NULL;
-+ spin_lock (&jid_pending);
-+ list_for_each_safe (tmp, nxt, &jid_pending_locks) {
-+ jp = list_entry (tmp, jid_lookup_item_t, jp_list);
-+ if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
-+ fnd = jp;
-+ list_del (tmp);
-+ break;
-+ }
-+ }
-+ spin_unlock (&jid_pending);
-+
-+ if (fnd != NULL)
-+ complete (&fnd->waitforit);
++jid_unhold_lvb (uint8_t * key, uint16_t keylen)
++{
++ jid_lvb_action (key, keylen, NULL, 0, lg_lock_act_UnHoldLVB);
++}
++void
++jid_hold_lvb (uint8_t * key, uint16_t keylen)
++{
++ jid_lvb_action (key, keylen, NULL, 0, lg_lock_act_HoldLVB);
+}
+
++
+/**
+ * jid_get_lock_state_inr -
+ * @key:
+jid_get_lock_state_inr (uint8_t * key, uint16_t keylen, uint8_t state,
+ uint32_t flags, uint8_t * lvb, uint16_t lvblen)
+{
-+ jid_lookup_item_t jp;
++ struct completion sleep;
++ glckr_t *item;
+ GULM_ASSERT (keylen > 6,
+ printk("keylen: %d\n", keylen););
-+ jp.key = key;
-+ jp.keylen = keylen;
-+ jp.lvb = lvb;
-+ jp.lvblen = lvblen;
-+ INIT_LIST_HEAD (&jp.jp_list);
-+ init_completion (&jp.waitforit);
+
-+ spin_lock (&jid_pending);
-+ list_add (&jp.jp_list, &jid_pending_locks);
-+ spin_unlock (&jid_pending);
++ init_completion (&sleep);
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return;
++ }
++
++ item->key = key;
++ item->keylen = keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = state;
++ item->flags = flags;
++ item->error = 0;
++ item->lvb = lvb;
++ item->lvblen = lvblen;
++
++ item->misc = &sleep;
++ item->finish = gulm_jid_finish;
+
-+ lg_lock_state_req (gulm_cm.hookup, key, keylen, state, flags, lvb, lvblen);
++ glq_queue (item);
+
-+ wait_for_completion (&jp.waitforit);
++ wait_for_completion (&sleep);
+}
+
+/**
+ jid_get_lock_state_inr (key, keylen, state, 0, NULL, 0);
+}
+
-+/**
-+ * jid_state_reply -
-+ * @key:
-+ * @keylen:
-+ * @lvb:
-+ * @lvblen:
-+ *
-+ *
-+ */
-+void
-+jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb, uint16_t lvblen)
-+{
-+ struct list_head *tmp, *nxt;
-+ jid_lookup_item_t *jp, *fnd = NULL;
-+ spin_lock (&jid_pending);
-+ list_for_each_safe (tmp, nxt, &jid_pending_locks) {
-+ jp = list_entry (tmp, jid_lookup_item_t, jp_list);
-+ if (memcmp (key, jp->key, MIN (keylen, jp->keylen)) == 0) {
-+ fnd = jp;
-+ list_del (tmp);
-+ break;
-+ }
-+ }
-+ spin_unlock (&jid_pending);
-+
-+ if (fnd != NULL) {
-+ if (lvb != NULL && fnd->lvb != NULL)
-+ memcpy (fnd->lvb, lvb, MIN (fnd->lvblen, lvblen));
-+ complete (&fnd->waitforit);
-+ }
-+}
-+
+/****************************************************************************/
+
+/**
-+ * jid_hold_list_lock -
-+ * @fs:
-+ *
-+ * only make one call to this per node.
-+ *
-+ * Returns: void
-+ */
-+void
-+jid_hold_list_lock (gulm_fs_t * fs)
-+{
-+ uint8_t key[GIO_KEY_SIZE];
-+ uint16_t keylen = GIO_KEY_SIZE;
-+
-+ down (&jid_listlock);
-+
-+ keylen = sizeof (key);
-+ jid_get_listlock_name (fs->fs_name, key, &keylen);
-+ jid_get_lock_state (key, keylen, lg_lock_state_Exclusive);
-+
-+}
-+
-+/**
-+ * jid_release_list_lock -
-+ * @fs:
-+ *
-+ *
-+ * Returns: void
-+ */
-+void
-+jid_release_list_lock (gulm_fs_t * fs)
-+{
-+ uint8_t key[GIO_KEY_SIZE];
-+ uint16_t keylen = GIO_KEY_SIZE;
-+
-+ keylen = sizeof (key);
-+ jid_get_listlock_name (fs->fs_name, key, &keylen);
-+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
-+
-+ up (&jid_listlock);
-+}
-+
-+/**
+ * jid_rehold_lvbs -
+ * @fs:
+ *
+ fs->JIDcount |= (uint32_t) (lvb[2]) << 16;
+ fs->JIDcount |= (uint32_t) (lvb[3]) << 24;
+
-+ for (i = oldjcnt; i < fs->JIDcount; i++) {
-+ keylen = sizeof (key);
-+ jid_get_lock_name (fs->fs_name, i, key, &keylen);
-+ jid_hold_lvb (key, keylen);
++ if( fs->JIDcount > oldjcnt ) {
++ for (i = oldjcnt; i < fs->JIDcount; i++) {
++ keylen = sizeof (key);
++ jid_get_lock_name (fs->fs_name, i, key, &keylen);
++ jid_hold_lvb (key, keylen);
++ }
+ }
+
+}
+
+ keylen = sizeof (key);
+ jid_get_header_name (fs->fs_name, key, &keylen);
++ down (&fs->headerlock);
+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb,
+ jid_header_lvb_size);
+ jidc = (uint32_t) (lvb[0]) << 0;
+ jidc |= (uint32_t) (lvb[1]) << 8;
+ jidc |= (uint32_t) (lvb[2]) << 16;
+ jidc |= (uint32_t) (lvb[3]) << 24;
-+ jidc += 300;
++ jidc += 1;
+ lvb[3] = (jidc >> 24) & 0xff;
+ lvb[2] = (jidc >> 16) & 0xff;
+ lvb[1] = (jidc >> 8) & 0xff;
+ jid_sync_lvb (key, keylen, lvb, jid_header_lvb_size);
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+ /* do an unlock here, so that when rehold grabs it shared, there is no
-+ * lvb writing.
++ * lvb writing. yeah, bit icky. fix some other day.
+ */
+
+ jid_rehold_lvbs (fs);
++ up (&fs->headerlock);
+}
+
+/**
+ goto exit;
+ }
+
-+ jid_hold_list_lock (fs);
-+
+ jid_get_lock_name (fs->fs_name, jid, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb, 64);
++ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
++ lg_lock_flag_IgnoreExp, lvb, 64);
+
+ if (lvb[0] != 0) {
+ memcpy (name, &lvb[1], strlen (&lvb[1]) + 1);
+
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
-+ jid_release_list_lock (fs);
-+
+ exit:
+ return err;
+}
+ * @fs:
+ * @jid:
+ *
-+ * actually may only need to et first byte to zero
++ * actually may only need to set first byte to zero
+ *
+ * Returns: int
+ */
+ if (jid >= fs->JIDcount)
+ goto exit;
+
-+ jid_hold_list_lock (fs);
-+
+ jid_get_lock_name (fs->fs_name, jid, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive, lvb, 64);
++ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
++ lg_lock_flag_IgnoreExp, lvb, 64);
+ lvb[0] = 0;
+ jid_sync_lvb (key, keylen, lvb, strlen (&lvb[1]) + 2);
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
-+ jid_release_list_lock (fs);
-+
+ exit:
+ return 0;
+}
+ * @fs:
+ * @jid:
+ *
-+ * This is broken.
++ * grab EXL on names until we find one we want. (or have all.)
++ * grab that one.
++ * Unlock everything we got.
+ *
+ * Returns: int
+ */
+void
+get_journalID (gulm_fs_t * fs)
+{
-+ uint32_t i = 0, jifc;
+ uint8_t key[GIO_KEY_SIZE], lvb[64];
+ uint16_t keylen = GIO_KEY_SIZE;
-+ int first_clear = -1;
-+
-+ retry:
-+ jid_hold_list_lock (fs);
++ int i, first_clear = -1, lockedto;
+
++retry:
+ /* find an empty space, or ourselves again */
-+ for (i = 0; i < fs->JIDcount; i++) {
++ for (i = 0, lockedto = 0; i < fs->JIDcount; i++, lockedto++) {
+ keylen = sizeof (key);
+ jid_get_lock_name (fs->fs_name, i, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
-+ lvb, 64);
-+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
++ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
++ lg_lock_flag_IgnoreExp, lvb, 64);
+ if (first_clear == -1 && lvb[0] == 0 ) {
+ first_clear = i;
+ } else if (strcmp (gulm_cm.myName, &lvb[1]) == 0) {
+ }
+ }
+ if (first_clear >= 0) {
-+ /* take the jid we have found */
-+ keylen = sizeof (key);
-+ jid_get_lock_name (fs->fs_name, first_clear, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
-+ lvb, 64);
++ /* we should be hold all jid mapping locks up to this one
++ * (and maybe beyond) EXL, so just lvb sync to the one we
++ * want.
++ */
+ lvb[0] = 2;
+ memcpy (&lvb[1], gulm_cm.myName, strlen (gulm_cm.myName) + 1);
++
++ keylen = sizeof (key);
++ jid_get_lock_name (fs->fs_name, first_clear, key, &keylen);
+ jid_sync_lvb (key, keylen, lvb, strlen (gulm_cm.myName) + 2);
-+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
+ fs->fsJID = first_clear;
+ }
+
-+ /* unlock the header lock */
-+ jid_release_list_lock (fs);
++ /* unlock them so others can find */
++ for (; lockedto >= 0; lockedto--) {
++ keylen = sizeof (key);
++ jid_get_lock_name (fs->fs_name, lockedto, key, &keylen);
++ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
++ }
+
+ if (first_clear < 0) {
-+ /* nothing found, grow and try again. */
+ jid_grow_space (fs);
+ goto retry;
+ }
-+
+}
+
+/**
+ uint8_t key[GIO_KEY_SIZE], lvb[64];
+ uint16_t keylen = GIO_KEY_SIZE;
+
-+ /* grab list lock */
-+ jid_hold_list_lock (fs);
-+
+ for (i = 0; i < fs->JIDcount; i++) {
+ keylen = sizeof (key);
+ jid_get_lock_name (fs->fs_name, i, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Exclusive,
-+ lvb, 64);
++ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
++ lg_lock_flag_IgnoreExp, lvb, 64);
+ if (strcmp (name, &lvb[1]) == 0) {
+ *jid = i;
+ found = 0;
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
+ }
-+ /* unlock the list lock */
-+ jid_release_list_lock (fs);
+
+ return found;
+}
+ uint16_t keylen = GIO_KEY_SIZE;
+ unsigned int ujid;
+
-+ /* grab list lock */
-+ jid_hold_list_lock (fs);
-+
+ for (i = 0; i < fs->JIDcount; i++) {
+ keylen = sizeof (key);
+ jid_get_lock_name (fs->fs_name, i, key, &keylen);
-+ jid_get_lock_state_lvb (key, keylen, lg_lock_state_Shared, lvb,
-+ 64);
++ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
++ lg_lock_flag_IgnoreExp, lvb, 64);
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
+ if (lvb[0] == 1) {
+ fs->cb (fs->fsdata, LM_CB_NEED_RECOVERY, &ujid);
+ }
+ }
-+
-+ /* unlock the list lock */
-+ jid_release_list_lock (fs);
+}
+
++
+/**
+ * jid_fs_init -
+ * @fs:
+
+ fs->JIDcount = 0;
+
++ init_MUTEX (&fs->headerlock);
++
+ jid_get_header_name (fs->fs_name, key, &keylen);
+ jid_hold_lvb (key, keylen);
+ jid_rehold_lvbs (fs);
+ jid_get_lock_name (fs->fs_name, i, key, &keylen);
+ jid_unhold_lvb (key, keylen);
+ }
-+ keylen = sizeof (key);
++ keylen = GIO_KEY_SIZE;
+ jid_get_header_name (fs->fs_name, key, &keylen);
+ jid_unhold_lvb (key, keylen);
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
+ gulm_fs_t *fs = (gulm_fs_t *) d;
+ jid_get_header_name (fs->fs_name, key, &keylen);
++
++ down (&fs->headerlock);
+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
+
+ jid_rehold_lvbs (fs);
++ up (&fs->headerlock);
+}
+
+/**
+jid_header_lock_drop (uint8_t * key, uint16_t keylen)
+{
+ gulm_fs_t *fs;
++ uint8_t *fsname;
++ uint8_t len;
++ uint8_t ktype, jtype;
++ ktype = key[0];
++ len = key[1];
++ fsname = &key[2];
++ jtype = key[4 + len];
++
+ /* make sure this is the header lock.... */
-+ if (key[1] == 'H' && (fs = get_fs_by_name (&key[10])) != NULL) {
++ if (ktype == 'J' && jtype == 'H' &&
++ (fs = get_fs_by_name (fsname)) != NULL) {
+ qu_function_call (&fs->cq, jid_unlock_callback, fs);
+ }
+}
+
++
+/****************************************************************************/
++/* I don't know why these are in this file. Laziness I would presume. */
++/* 6 bytes for stuff in key (lengths and type bytes)
++ * 32 for fs name
++ * 64 for node name.
++ */
++#define NodeLockNameLen (6 + 32 + 64)
++
+/**
-+ * jid_get_lsresv_name -
-+ * @fsname:
-+ * @key:
-+ * @keylen:
++ * gulm_nodelock_finish -
++ * @item:
+ *
+ *
-+ * Returns: int
++ * Returns: void
+ */
-+int
-+jid_get_lsresv_name (char *fsname, uint8_t * key, uint16_t * keylen)
++void gulm_nodelock_finish (struct glck_req *item)
+{
-+ int len;
-+
-+ len = strlen(gulm_cm.myName);
-+ len = pack_lock_key(key, *keylen, 'N', fsname, gulm_cm.myName,
-+ MIN(64,len));
-+ if( len <=0 ) return len;
-+
-+ *keylen = len;
-+
-+ return 0;
++ struct completion *sleep = (struct completion *)item->misc;
++ complete (sleep);
+}
+
+/**
+ * jid_lockstate_reserve -
+ * @fs:
+ *
++ * if we are expired, this will block until someone else has
++ * cleaned our last mess up.
++ *
++ * Will very well may need to put in some kind of timeout
++ * otherwise this may do a forever lockup much like the
++ * FirstMounter lock had.
+ *
+ * Returns: void
+ */
+void
+jid_lockstate_reserve (gulm_fs_t * fs, int first)
+{
-+ uint8_t key[5 + 32 + 64];
-+ uint16_t keylen = 5 + 32 + 64;
-+ /* 5 bytes for stuff in key (lengths and type bytes)
-+ * 32 for fs name
-+ * 64 for node name.
-+ */
++ int len;
++ struct completion sleep;
++ glckr_t *item;
++ uint8_t *key;
++
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return;
++ }
++
++ key = kmalloc(NodeLockNameLen, GFP_KERNEL);
++ item->key = key;
++ if (item->key == NULL) {
++ glq_recycle_req(item);
++ return;
++ }
++ len = strlen(gulm_cm.myName);
++ item->keylen = pack_lock_key(item->key, NodeLockNameLen, 'N',
++ fs->fs_name, gulm_cm.myName, MIN(64,len));
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Exclusive;
++ item->flags = (first?lg_lock_flag_IgnoreExp:0)|lg_lock_flag_NoCallBacks;
++ item->error = 0;
+
-+ jid_get_lsresv_name (fs->fs_name, key, &keylen);
++ init_completion (&sleep);
+
-+ /* if we are expired, this will block until someone else has
-+ * cleaned our last mess up.
-+ *
-+ * Will very well may need to put in some kind of timeout
-+ * otherwise this may do a forever lockup much like the
-+ * FirstMounter lock had.
-+ */
-+ jid_get_lock_state_inr (key, keylen, lg_lock_state_Exclusive,
-+ first?lg_lock_flag_IgnoreExp:0, NULL, 0);
++ item->misc = &sleep;
++ item->finish = gulm_nodelock_finish;
+
++ glq_queue (item);
++ wait_for_completion (&sleep);
++ kfree(key);
+}
+
+/**
+void
+jid_lockstate_release (gulm_fs_t * fs)
+{
-+ uint8_t key[5 + 32 + 64];
-+ uint16_t keylen = 5 + 32 + 64;
++ int len;
++ struct completion sleep;
++ glckr_t *item;
++ uint8_t *key;
+
-+ jid_get_lsresv_name (fs->fs_name, key, &keylen);
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return;
++ }
+
-+ jid_get_lock_state (key, keylen, lg_lock_state_Unlock);
++ key = kmalloc(NodeLockNameLen, GFP_KERNEL);
++ item->key = key;
++ if (item->key == NULL) {
++ glq_recycle_req(item);
++ return;
++ }
++ len = strlen(gulm_cm.myName);
++ item->keylen = pack_lock_key(item->key, NodeLockNameLen, 'N',
++ fs->fs_name, gulm_cm.myName, MIN(64,len));
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Unlock;
++ item->flags = 0;
++ item->error = 0;
++
++ init_completion (&sleep);
++
++ item->misc = &sleep;
++ item->finish = gulm_nodelock_finish;
++
++ glq_queue (item);
++ wait_for_completion (&sleep);
++ kfree(key);
++}
++
++
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_jid.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_jid.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,33 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#ifndef __GULM_JID_H__
++#define __GULM_JID_H__
++#include "gulm.h"
++void jid_fs_init (gulm_fs_t * fs);
++void jid_fs_release (gulm_fs_t * fs);
++int get_journalID (gulm_fs_t * fs);
++int lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name);
++void release_JID (gulm_fs_t * fs, uint32_t jid, int owner);
++void put_journalID (gulm_fs_t * fs);
++void check_for_stale_expires (gulm_fs_t * fs);
++
++int
++ find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, uint32_t * jid);
++
++void jid_lockstate_reserve (gulm_fs_t * fs, int first);
++void jid_lockstate_release (gulm_fs_t * fs);
++
++/* to be called from the lg_lock callbacks. */
++void jid_header_lock_drop (uint8_t * key, uint16_t keylen);
++#endif /*__GULM_JID_H__*/
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lock_queue.c linux-patched/fs/gfs_locking/lock_gulm/gulm_lock_queue.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_lock_queue.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lock_queue.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,775 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#include "gulm.h"
++
++#include <linux/kernel.h>
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#include <linux/smp_lock.h>
++#include <linux/crc32.h>
++#define __KERNEL_SYSCALLS__
++#include <linux/unistd.h>
++
++#include "handler.h"
++
++#include "gulm_lock_queue.h"
++
++/* The Queues. */
++struct list_head glq_Free;
++spinlock_t glq_FreeLock;
++unsigned int glq_FreeCount;
++struct list_head glq_OutQueue;
++spinlock_t glq_OutLock;
++unsigned int glq_OutCount;
++/* Not sure that ReplyMap really needs to be this big. Things shouldn't be
++ * on it that long. maybe 1<<8? must test and see. later.
++ */
++#define ReplyMapSize (1<<13) /* map size is a power of 2 */
++#define ReplyMapBits (0x1FFF) /* & is faster than % */
++struct list_head *glq_ReplyMap;
++spinlock_t *glq_ReplyLock;
++
++/* The Threads. */
++struct task_struct *glq_recver_task = NULL;
++struct task_struct *glq_sender_task = NULL;
++struct completion glq_startedup;
++int glq_running;
++wait_queue_head_t glq_send_wchan;
++
++/* */
++extern gulm_cm_t gulm_cm;
++
++/* The code. */
++/**
++ * glq_init -
++ *
++ * Returns: int
++ */
++int glq_init(void)
++{
++ int i;
++
++ glq_running = FALSE;
++ glq_recver_task = NULL;
++ glq_sender_task = NULL;
++ init_waitqueue_head (&glq_send_wchan);
++ init_completion (&glq_startedup);
++
++ INIT_LIST_HEAD (&glq_Free);
++ spin_lock_init (&glq_FreeLock);
++ glq_FreeCount = 0;
++ INIT_LIST_HEAD (&glq_OutQueue);
++ spin_lock_init (&glq_OutLock);
++ glq_OutCount = 0;
++
++ glq_ReplyMap = kmalloc(sizeof(struct list_head) * ReplyMapSize, GFP_KERNEL);
++ if( glq_ReplyMap == NULL ) {
++ return -ENOMEM;
++ }
++ glq_ReplyLock = kmalloc(sizeof(spinlock_t) * ReplyMapSize, GFP_KERNEL);
++ if( glq_ReplyLock == NULL ) {
++ kfree(glq_ReplyMap);
++ return -ENOMEM;
++ }
++ for(i=0; i < ReplyMapSize; i++) {
++ INIT_LIST_HEAD (&glq_ReplyMap[i]);
++ spin_lock_init (&glq_ReplyLock[i]);
++ }
++ /* ?Add some empty reqs to the Free list right now? */
++ return 0;
++}
++
++/**
++ * glq_release -
++ *
++ * doesn't grab spins, because by the time this is called, there should be
++ * no other threads anywhere that could possibly be working on these lists.
++ *
++ * Returns: void
++ */
++void glq_release(void)
++{
++ struct list_head *tmp, *lltmp;
++ glckr_t *item;
++ int i;
++
++ list_for_each_safe (tmp, lltmp, &glq_OutQueue) {
++ item = list_entry (tmp, glckr_t, list);
++ list_del (tmp);
++ if (item->key != NULL) kfree (item->key);
++ if (item->lvb != NULL) kfree (item->lvb);
++ kfree (item);
++ }
++ glq_FreeCount = 0;
++ list_for_each_safe (tmp, lltmp, &glq_Free) {
++ item = list_entry (tmp, glckr_t, list);
++ list_del (tmp);
++ if (item->key != NULL) kfree (item->key);
++ if (item->lvb != NULL) kfree (item->lvb);
++ kfree (item);
++ }
++ glq_OutCount = 0;
++ for(i=0; i < ReplyMapSize; i++) {
++ list_for_each_safe (tmp, lltmp, &glq_ReplyMap[i]) {
++ item = list_entry (tmp, glckr_t, list);
++ list_del (tmp);
++ if (item->key != NULL) kfree (item->key);
++ if (item->lvb != NULL) kfree (item->lvb);
++ kfree (item);
++ }
++ }
++
++ kfree(glq_ReplyLock);
++ kfree(glq_ReplyMap);
++}
++
++/**
++ * glq_get_new_req -
++ *
++ * WARNING! For state and action requests, glq will not free the key or
++ * lvb pointers. For drop and cancel glq WILL free the pointer when it is
++ * finished.
++ *
++ * Returns: glckr_t
++ */
++glckr_t *glq_get_new_req(void)
++{
++ struct list_head *tmp;
++ glckr_t *item = NULL;
++
++ /* try to reclaim a recycled req first. */
++ spin_lock (&glq_FreeLock);
++ if (!list_empty (&glq_Free)) {
++ tmp = glq_Free.next;
++ list_del (tmp);
++ item = list_entry (tmp, glckr_t, list);
++ glq_FreeCount --;
++ }
++ spin_unlock (&glq_FreeLock);
++
++ /* nothing on Free list, make new. */
++ if (item == NULL) {
++ item = kmalloc(sizeof(glckr_t), GFP_KERNEL);
++ if (item == NULL)
++ return NULL;
++ memset(item, 0, sizeof(glckr_t));
++ }
++
++ /* initialize.
++ * reset list so its good.
++ */
++ INIT_LIST_HEAD (&item->list);
++
++ return item;
++}
++
++/**
++ * glq_recycle_req -
++ * @lckr_t:
++ *
++ * assumes that item is not on any lists.
++ *
++ * Returns: void
++ */
++void glq_recycle_req(glckr_t *item)
++{
++ /* clean it up */
++ INIT_LIST_HEAD (&item->list);
++
++ if (item->type == glq_req_type_drop ||
++ item->type == glq_req_type_cancel) {
++ if (item->key != NULL) {
++ kfree(item->key);
++ item->key = NULL;
++ }
++ if (item->lvb != NULL) {
++ kfree(item->lvb);
++ item->lvb = NULL;
++ }
++ } else {
++ item->key = NULL;
++ item->lvb = NULL;
++ }
++ item->misc = NULL;
++ item->finish = NULL;
++
++ /* everything else is ignoreable. */
++
++ /* onto the Free list. unless too many. */
++ spin_lock (&glq_FreeLock);
++ if (glq_FreeCount > 20) { /* XXX icky hidden constant */
++ kfree (item);
++ }else{
++ list_add (&item->list, &glq_Free);
++ glq_FreeCount ++;
++ }
++ spin_unlock (&glq_FreeLock);
++}
++
++/**
++ * glq_calc_hash_key_long -
++ * @key:
++ * @keylen:
++ * @subid:
++ * @start:
++ * @stop:
++ *
++ *
++ * Returns: int
++ */
++int glq_calc_hash_key_long(uint8_t *key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop)
++{
++ int ret = GULM_CRC_INIT;
++ ret = crc32 (ret, &keylen, sizeof(uint16_t));
++ ret = crc32 (ret, key, keylen);
++ ret = crc32 (ret, &subid, sizeof(uint64_t));
++ ret = crc32 (ret, &start, sizeof(uint64_t));
++ ret = crc32 (ret, &stop, sizeof(uint64_t));
++ ret &= ReplyMapBits;
++ return ret;
++}
++
++/**
++ * glq_calc_hash_key -
++ * @item:
++ *
++ *
++ * Returns: int
++ */
++int glq_calc_hash_key(glckr_t *item)
++{
++ return glq_calc_hash_key_long (item->key, item->keylen, item->subid,
++ item->start, item->stop);
++}
++
++/**
++ * glq_queue -
++ * @item:
++ *
++ *
++ * Returns: void
++ */
++void glq_queue(glckr_t *item)
++{
++ spin_lock (&glq_OutLock);
++ list_add (&item->list, &glq_OutQueue);
++ glq_OutCount++;
++ spin_unlock (&glq_OutLock);
++ wake_up (&glq_send_wchan);
++}
++
++/**
++ * glq_cancel -
++ * @item:
++ *
++ * You MUST call glq_get_new_req() and fill that with the info of the
++ * request you want to cancel.
++ *
++ * Returns: void
++ */
++void glq_cancel(glckr_t *cancel)
++{
++ int found = FALSE;
++ struct list_head *tmp, *lltmp;
++ glckr_t *item;
++
++ spin_lock (&glq_OutLock);
++ list_for_each_safe (tmp, lltmp, &glq_OutQueue) {
++ item = list_entry (tmp, glckr_t, list);
++ if (item->subid == cancel->subid &&
++ item->start == cancel->start &&
++ item->stop == cancel->stop &&
++ item->keylen == cancel->keylen &&
++ memcmp(item->key, cancel->key, cancel->keylen) ) {
++ /* found it. */
++ list_del (tmp);
++ found = TRUE;
++ item->error = lg_err_Canceled;
++ if (item->finish != NULL )
++ item->finish (item);
++ glq_recycle_req (item);
++ break;
++ }
++ }
++ spin_unlock(&glq_OutLock);
++
++ if (!found) {
++ cancel->type = glq_req_type_cancel;
++ glq_queue (cancel);
++ }
++}
++
++/**
++ * glq_send_queue_empty -
++ *
++ * Returns: int
++ */
++static int glq_send_queue_empty(void)
++{
++ int ret;
++ spin_lock (&glq_OutLock);
++ ret = list_empty (&glq_OutQueue);
++ spin_unlock (&glq_OutLock);
++ return ret;
++}
++
++/**
++ * glq_sender_thread -
++ * @data:
++ *
++ *
++ * Returns: int
++ */
++int glq_sender_thread(void *data)
++{
++ int err=0, bucket;
++ struct list_head *tmp;
++ glckr_t *item = NULL;
++ DECLARE_WAITQUEUE (__wait_chan, current);
++
++ daemonize ("gulm_glq_sender");
++ glq_sender_task = current;
++ complete (&glq_startedup);
++
++ while (glq_running) {
++ /* wait for item */
++ current->state = TASK_INTERRUPTIBLE;
++ add_wait_queue (&glq_send_wchan, &__wait_chan);
++ if( glq_send_queue_empty () )
++ schedule ();
++ remove_wait_queue (&glq_send_wchan, &__wait_chan);
++ current->state = TASK_RUNNING;
++ if (!glq_running) break;
++
++ /* pull item off queue */
++ spin_lock (&glq_OutLock);
++ if (list_empty (&glq_OutQueue) ) {
++ spin_unlock (&glq_OutLock);
++ continue;
++ }
++ tmp = glq_OutQueue.prev;
++ list_del (tmp);
++ glq_OutCount--;
++ spin_unlock (&glq_OutLock);
++ item = list_entry (tmp, glckr_t, list);
++
++ /* send to local ltpx or die */
++ if (item->type == glq_req_type_state ) {
++ INIT_LIST_HEAD (&item->list);
++ bucket = glq_calc_hash_key(item);
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_add (&item->list, &glq_ReplyMap[bucket]);
++ spin_unlock (&glq_ReplyLock[bucket]);
++ err = lg_lock_state_req (gulm_cm.hookup, item->key,
++ item->keylen, item->subid, item->start,
++ item->stop, item->state, item->flags,
++ item->lvb, item->lvblen);
++ } else if (item->type == glq_req_type_action) {
++ INIT_LIST_HEAD (&item->list);
++ bucket = glq_calc_hash_key(item);
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_add (&item->list, &glq_ReplyMap[bucket]);
++ spin_unlock (&glq_ReplyLock[bucket]);
++ err = lg_lock_action_req (gulm_cm.hookup, item->key,
++ item->keylen, item->subid, item->state,
++ item->lvb, item->lvblen);
++ } else if (item->type == glq_req_type_query ) {
++ INIT_LIST_HEAD (&item->list);
++ bucket = glq_calc_hash_key(item);
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_add (&item->list, &glq_ReplyMap[bucket]);
++ spin_unlock (&glq_ReplyLock[bucket]);
++ err = lg_lock_query_req (gulm_cm.hookup, item->key,
++ item->keylen, item->subid, item->start,
++ item->stop, item->state);
++ } else if (item->type == glq_req_type_drop) {
++ err = lg_lock_drop_exp (gulm_cm.hookup, item->lvb,
++ item->key, item->keylen);
++ /* drop exp has no reply. */
++ glq_recycle_req (item);
++ } else if (item->type == glq_req_type_cancel) {
++ err = lg_lock_cancel_req (gulm_cm.hookup, item->key,
++ item->keylen, item->subid);
++ /* cancels have no reply. */
++ glq_recycle_req (item);
++ } else {
++ /* bad type. */
++ log_err ("Unknown send type %d, tossing request.\n",
++ item->type);
++ glq_recycle_req (item);
++ }
++ if (err != 0 ) {
++ log_err ("gulm_glq_sender error %d\n", err);
++ glq_running = FALSE;
++ glq_recycle_req (item);
++ break;
++ }
++ }
++ complete (&glq_startedup);
++ return 0;
++}
++
++/**
++ * glq_login_reply -
++ * @misc:
++ * @err:
++ * @which:
++ *
++ *
++ * Returns: int
++ */
++int glq_login_reply (void *misc, uint32_t error, uint8_t which)
++{
++ if (error != 0) {
++ glq_running = FALSE;
++ log_err ("glq: Got error %d from login request.\n", error);
++ }
++ return error;
++}
++
++/**
++ * glq_logout_reply -
++ * @misc:
++ *
++ *
++ * Returns: int
++ */
++int glq_logout_reply (void *misc)
++{
++ glq_running = FALSE; /* if it isn't already. */
++ return 0;
++}
++
++/**
++ * glq_lock_state -
++ * @misc:
++ * @key:
++ * @keylen:
++ * @state:
++ * @flags:
++ * @error:
++ * @LVB:
++ * @LVBlen:
++ *
++ *
++ * Returns: int
++ */
++int
++glq_lock_state (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
++ uint8_t state, uint32_t flags, uint32_t error,
++ uint8_t * LVB, uint16_t LVBlen)
++{
++ int bucket, found = FALSE;
++ struct list_head *tmp;
++ glckr_t *item=NULL;
++
++ /* lookup and remove from ReplyMap */
++ bucket = glq_calc_hash_key_long(key, keylen, subid, start, stop);
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_for_each(tmp, &glq_ReplyMap[bucket]) {
++ item = list_entry (tmp, glckr_t, list);
++ if (item->subid == subid &&
++ item->start == start &&
++ item->stop == stop &&
++ item->keylen == keylen &&
++ memcmp(item->key, key, keylen) == 0 ) {
++ /* found it. */
++ list_del (tmp);
++ found = TRUE;
++ break;
++ }
++ }
++ spin_unlock(&glq_ReplyLock[bucket]);
++
++ if( !found ) {
++ /* not found complaint */
++ return 0;
++ }
++
++ /* restuff results */
++ item->state = state;
++ item->flags = flags;
++ item->error = error;
++ if (item->lvb != NULL && LVB != NULL) {
++ item->lvblen = MIN(item->lvblen, LVBlen);
++ memcpy(item->lvb, LVB, item->lvblen);
++ }
++
++ /* call finish */
++ if (item->finish != NULL) item->finish (item);
++
++ /* put on Free */
++ glq_recycle_req(item);
++ return 0;
++}
++
++/**
++ * glq_lock_action -
++ * @misc:
++ * @key:
++ * @keylen:
++ * @action:
++ * @error:
++ *
++ *
++ * Returns: int
++ */
++int
++glq_lock_action (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint8_t action, uint32_t error)
++{
++ int bucket, found = FALSE;
++ struct list_head *tmp;
++ glckr_t *item = NULL;
++
++ /* lookup and remove from ReplyMap */
++ bucket = glq_calc_hash_key_long(key, keylen, subid, 0, ~((uint64_t)0));
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_for_each(tmp, &glq_ReplyMap[bucket]) {
++ item = list_entry (tmp, glckr_t, list);
++ if (item->subid == subid &&
++ item->start == 0 &&
++ item->stop == ~((uint64_t)0) &&
++ item->keylen == keylen &&
++ memcmp(item->key, key, keylen) == 0 ) {
++ /* found it. */
++ list_del (tmp);
++ found = TRUE;
++ break;
++ }
++ }
++ spin_unlock(&glq_ReplyLock[bucket]);
++
++ if( !found ) {
++ /* not found complaint */
++ return 0;
++ }
++
++ /* restuff results */
++ item->error = error;
++
++ /* call finish */
++ if (item->finish != NULL) item->finish (item);
++
++ /* put on Free */
++ glq_recycle_req(item);
++ return 0;
++}
++
++/**
++ * glq_lock_query -
++ * this is an ugly interface.....
++ * there is somehtign that needs to be done here to clean things up. I'm
++ * not sure what that is right now, and I need to have somehting working.
++ * So we're going with this for now.
++ *
++ */
++int
++glq_lock_query (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
++ uint8_t state, uint32_t error, uint8_t * cnode,
++ uint64_t csubid, uint64_t cstart, uint64_t cstop,
++ uint8_t cstate)
++{
++ int bucket, found = FALSE;
++ struct list_head *tmp;
++ glckr_t *item = NULL;
++
++ /* lookup and remove from ReplyMap */
++ bucket = glq_calc_hash_key_long(key, keylen, subid, start, stop);
++ spin_lock (&glq_ReplyLock[bucket]);
++ list_for_each(tmp, &glq_ReplyMap[bucket]) {
++ item = list_entry (tmp, glckr_t, list);
++ if (item->subid == subid &&
++ item->start == start &&
++ item->stop == stop &&
++ item->keylen == keylen &&
++ memcmp(item->key, key, keylen) == 0 ) {
++ /* found it. */
++ list_del (tmp);
++ found = TRUE;
++ break;
++ }
++ }
++ spin_unlock(&glq_ReplyLock[bucket]);
++
++ if( !found ) {
++ /* not found complaint */
++ return 0;
++ }
++
++ /* restuff results */
++ item->error = error;
++ item->subid = csubid;
++ item->start = cstart;
++ item->stop = cstop;
++ item->state = cstate;
++
++ /* call finish */
++ if (item->finish != NULL) item->finish (item);
++
++ /* put on Free */
++ glq_recycle_req(item);
++ return 0;
++}
++
++/**
++ * glq_drop_lock_req -
++ * @misc:
++ * @key:
++ * @keylen:
++ * @state:
++ *
++ *
++ * Returns: int
++ */
++int
++glq_drop_lock_req (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint8_t state)
++{
++ do_drop_lock_req (key, keylen, state);
++ jid_header_lock_drop (key, keylen);
++ return 0;
++}
++
++/**
++ * glq_drop_all -
++ * @misc:
++ *
++ *
++ * Returns: int
++ */
++int glq_drop_all (void *misc)
++{
++ passup_droplocks ();
++ return 0;
++}
++
++/**
++ * glq_error -
++ * @misc:
++ * @error:
++ *
++ *
++ * Returns: int
++ */
++int glq_error (void *misc, uint32_t error)
++{
++ log_err ("glq: weird last gasp error %d\n", error);
++ return error;
++}
++
++static lg_lockspace_callbacks_t glq_lock_ops = {
++ login_reply:glq_login_reply,
++ logout_reply:glq_logout_reply,
++ lock_state:glq_lock_state,
++ lock_action:glq_lock_action,
++ lock_query:glq_lock_query,
++ drop_lock_req:glq_drop_lock_req,
++ drop_all:glq_drop_all,
++ error:glq_error
++};
++/**
++ * glq_recving_thread -
++ * @data:
++ *
++ *
++ * Returns: int
++ */
++int glq_recving_thread(void *data)
++{
++ int err;
++ daemonize ("gulm_glq_recver");
++ glq_recver_task = current;
++ complete (&glq_startedup);
++
++ while (glq_running) {
++ err = lg_lock_handle_messages (gulm_cm.hookup, &glq_lock_ops, NULL);
++ if (err != 0) {
++ log_err ("gulm_glq_recver error %d\n", err);
++ glq_running = FALSE;
++ wake_up (&glq_send_wchan);
++ break;
++ }
++ }
++ complete (&glq_startedup);
++ return 0;
++}
++
++/**
++ * glq_shutdown -
++ *
++ * Returns: void
++ */
++void glq_shutdown(void)
++{
++ if (glq_running) glq_running = FALSE;
++ if (glq_sender_task != NULL) {
++ wake_up (&glq_send_wchan);
++ wait_for_completion (&glq_startedup);
++ glq_sender_task = NULL;
++ }
++ if (glq_recver_task != NULL) {
++ lg_lock_logout (gulm_cm.hookup);
++ wait_for_completion (&glq_startedup);
++ glq_recver_task = NULL;
++ }
++}
++
++/**
++ * glq_startup -
++ *
++ * Returns: int
++ */
++int glq_startup(void)
++{
++ int err;
++
++ if (glq_running) return 0;
++
++ err = lg_lock_login (gulm_cm.hookup, "GFS ");
++ if (err != 0) {
++ log_err ("Failed to send lock login. %d\n", err);
++ return -err;
++ }
++
++ glq_running = TRUE;
++ if( glq_recver_task == NULL ) {
++ err = kernel_thread (glq_recving_thread, NULL, 0);
++ if( err < 0 ) {
++ log_err ("Failed to start glq_recving_thread %d\n",
++ err);
++ glq_shutdown();
++ return err;
++ }
++ wait_for_completion (&glq_startedup);
++ }
+
++ if (glq_sender_task == NULL) {
++ err = kernel_thread (glq_sender_thread, NULL, 0);
++ if( err < 0 ) {
++ log_err ("Failed to start glq_sender_thread %d\n",
++ err);
++ glq_shutdown();
++ return err;
++ }
++ wait_for_completion (&glq_startedup);
++ }
++ return 0;
+}
+
-+
+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_jid.h linux/fs/gfs_locking/lock_gulm/gulm_jid.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_jid.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_jid.h 2004-09-07 16:17:31.797499912 -0500
-@@ -0,0 +1,41 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lock_queue.h linux-patched/fs/gfs_locking/lock_gulm/gulm_lock_queue.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_lock_queue.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lock_queue.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,79 @@
+/******************************************************************************
+*******************************************************************************
+**
+*******************************************************************************
+******************************************************************************/
+
-+#ifndef __GULM_JID_H__
-+#define __GULM_JID_H__
-+#include "gulm.h"
-+void jid_init (void);
-+void jid_fs_init (gulm_fs_t * fs);
-+void jid_fs_release (gulm_fs_t * fs);
-+int get_journalID (gulm_fs_t * fs);
-+int lookup_jid_by_name (gulm_fs_t * fs, uint8_t * name, uint32_t * injid);
-+int lookup_name_by_jid (gulm_fs_t * fs, uint32_t jid, uint8_t * name);
-+void release_JID (gulm_fs_t * fs, uint32_t jid, int owner);
-+void put_journalID (gulm_fs_t * fs);
-+void check_for_stale_expires (gulm_fs_t * fs);
+
-+int
-+ find_jid_by_name_and_mark_replay (gulm_fs_t * fs, uint8_t * name, uint32_t * jid);
++/*
++ * So what if we change this to a request chain like I've got in the
++ * servers. There are three lists. Free, Send, Reply. Where the Reply
++ * list is actually more of a hash.
++ *
++ * Activity goes:
++ * - Grab struct from Free, malloc if needed.
++ * - Stuff, stick on Send.
++ * - Send, sends, then sticks on Reply.
++ * - When handle_messages gets a reply, it looks up on Reply and handles
++ * from there.
++ *
++ * If Reply is a hash, it must be keyed on all important parts!
++ * (keyname, subid, start, stop)
++ */
++#ifndef __gulm_lockqueue_h__
++#define __gulm_lockqueue_h__
++#define glq_req_type_state (1)
++#define glq_req_type_action (2)
++#define glq_req_type_drop (3)
++#define glq_req_type_cancel (4)
++#define glq_req_type_query (5)
++typedef struct glck_req {
++ struct list_head list;
++
++ /* these five for the key for hash-map look ups.
++ * Any part of any of these can change and thus be a unique request.
++ * (this struct is only put into a hash map to match replies.)
++ */
++ uint8_t *key;
++ uint16_t keylen;
++ uint64_t subid;
++ uint64_t start;
++ uint64_t stop;
++
++ /* other info about this request. */
++ uint8_t type;
++ uint8_t state; /* also action */ /* changes on reply (anyflag) */
++ uint32_t flags; /* changes on reply */
++ uint8_t *lvb; /* changes on reply */
++ uint16_t lvblen;
++ uint32_t error; /* changes on reply */
+
-+void jid_start_journal_reply (gulm_fs_t * fs, uint32_t jid);
-+void jid_finish_journal_reply (gulm_fs_t * fs, uint32_t jid);
++ /* when we get a reply, do this
++ * this glck_req will not be on any list when finish is called. Upon
++ * the return of finish, it will be placed onto the Free list.
++ */
++ void *misc;
++ void (*finish)(struct glck_req *glck);
+
-+void jid_lockstate_reserve (gulm_fs_t * fs, int first);
-+void jid_lockstate_release (gulm_fs_t * fs);
++} glckr_t;
+
-+/* to be called from the lg_lock callbacks. */
-+void jid_state_reply (uint8_t * key, uint16_t keylen, uint8_t * lvb,
-+ uint16_t lvblen);
-+void jid_action_reply (uint8_t * key, uint16_t keylen);
-+void jid_header_lock_drop (uint8_t * key, uint16_t keylen);
-+#endif /*__GULM_JID_H__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h linux/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 2004-09-07 16:17:31.797499912 -0500
++/* prototypes */
++int glq_init(void);
++int glq_startup(void);
++void glq_shutdown(void);
++void glq_release(void);
++glckr_t *glq_get_new_req(void);
++void glq_recycle_req(glckr_t *);
++void glq_queue(glckr_t *);
++void glq_cancel(glckr_t *);
++
++
++#endif /*__gulm_lockqueue_h__*/
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_log_msg_bits.h 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,40 @@
+/******************************************************************************
+*******************************************************************************
+#define lgm_BitFieldSize (32)
+
+#endif /*__gulm_log_msg_bits_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_lt.c linux/fs/gfs_locking/lock_gulm/gulm_lt.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_lt.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_lt.c 2004-09-07 16:18:11.108886960 -0500
-@@ -0,0 +1,2021 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_lt.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_lt.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,990 @@
+/******************************************************************************
+*******************************************************************************
+**
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/file.h>
++#include <linux/crc32.h>
+#define __KERNEL_SYSCALLS__
+#include <linux/unistd.h>
+
-+#include "util.h"
+#include "handler.h"
++#include "gulm_lock_queue.h"
+#include "utils_tostr.h"
+#include "gulm_jid.h"
+
+ return workspace;
+}
+
++#if 0
+static void __inline__
+db_lck_entered (gulm_lock_t * lck)
+{
+ lck_key_to_hex (lck->key, lck->keylen, bb);
+ printk ("Finished lock 0x%s result:%#x\n", bb, lck->result);
+}
++#endif
+
+static void __inline__
+dump_gulm_lock_t (gulm_lock_t * lck)
+
+ lck_key_to_hex (lck->key, lck->keylen, bb);
+ log_msg (lgm_Always, " key = 0x%s\n", bb);
-+ log_msg (lgm_Always, " req_type = %#x\n", lck->req_type);
-+ log_msg (lgm_Always, " last_suc_state = %#x\n", lck->last_suc_state);
-+ log_msg (lgm_Always, " actuallypending = %d\n", lck->actuallypending);
-+ log_msg (lgm_Always, " in_to_be_sent = %d\n", lck->in_to_be_sent);
+ log_msg (lgm_Always, " cur_state = %d\n", lck->cur_state);
-+ log_msg (lgm_Always, " req_state = %d\n", lck->req_state);
-+ log_msg (lgm_Always, " flags = %#x\n", lck->flags);
-+ log_msg (lgm_Always, " action = %d\n", lck->action);
-+ log_msg (lgm_Always, " result = %d\n", lck->result);
+}
+
+/* DEBUG_BY_LOCK is gone. I may later add something back if needed.
+ * @keylen:
+ *
+ * key is: <type><fsname len><fsname>\0<pk len><pk>\0
-+ * <type> is: G J F N
++ * <type> is: G J F N P
+ * <fsname len> is 0-256
+ *
+ * Returns: int
+ */
-+int pack_lock_key(uint8_t *key, uint16_t keylen, uint8_t type,
-+ uint8_t *fsname, uint8_t *pk, uint8_t pklen)
-+{
-+ int fsnlen;
-+ fsnlen = strlen(fsname);
-+
-+ if( keylen <= (fsnlen + pklen + 5) ) return -1;
-+
-+ memset (key, 0, keylen);
-+
-+ key[0] = type;
-+
-+ key[1] = fsnlen;
-+ memcpy(&key[2], fsname, fsnlen);
-+ key[2 + fsnlen] = 0;
-+
-+ key[3 + fsnlen] = pklen;
-+
-+ memcpy(&key[4 + fsnlen], pk, pklen);
-+
-+ key[4 + fsnlen + pklen] = 0;
-+
-+ return fsnlen + pklen + 5;
-+}
-+
-+/**
-+ * unpack_lock_key -
-+ * @key: <
-+ * @keylen: <
-+ * @type: >
-+ * @fsname: >
-+ * @fsnlen: >
-+ * @pk: >
-+ * @pklen: >
-+ *
-+ * if you're gonna fiddle with bytes returned here, copy first!
-+ *
-+ * this is broken. do I even really need this?
-+ *
-+ * Returns: int
-+ */
-+int unpack_lock_key(uint8_t *key, uint16_t keylen, uint8_t *type,
-+ uint8_t **fsname, uint8_t *fsnlen,
-+ uint8_t **pk, uint8_t *pklen)
-+{
-+ int fsnl, pkl;
-+ if( type != NULL )
-+ *type = key[0];
-+
-+ fsnl = key[1];
-+ if( fsnlen != NULL && *fsname != NULL ) {
-+ *fsnlen = key[1];
-+ *fsname = &key[2];
-+ }
-+
-+ /* 0 = key[2 + fsnl] */
-+
-+ pkl = key[3 + fsnl];
-+ if( pklen != NULL && *pk != NULL ) {
-+ *pklen = key[3 + fsnl];
-+ *pk = &key[4 + fsnl];
-+ }
-+
-+ /* 0 = key[4 + fsnl + *pklen] */
-+
-+ return fsnl + pkl + 5;
-+}
-+
-+/**
-+ * pack_drop_mask -
-+ * @mask:
-+ * @fsname:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int pack_drop_mask(uint8_t *mask, uint16_t mlen, uint8_t *fsname)
-+{
-+ int fsnlen;
-+ fsnlen = strlen(fsname);
-+
-+ memset (mask, 0, GIO_KEY_SIZE);
-+
-+ mask[0] = 0xff;
-+ mask[1] = fsnlen;
-+ memcpy(&mask[2], fsname, fsnlen);
-+ mask[2 + fsnlen] = 0;
-+ /* rest should be 0xff */
-+
-+ return 3 + fsnlen;
-+}
-+
-+/**
-+ * find_and_mark_lock -
-+ * @key:
-+ * @keylen:
-+ * @lockp:
-+ *
-+ * looks for a lock struct of key. If found, marks it.
-+ *
-+ * Returns: TRUE or FALSE
-+ */
-+int
-+find_and_mark_lock (uint8_t * key, uint8_t keylen, gulm_lock_t ** lockp)
-+{
-+ int found = FALSE;
-+ uint32_t bkt;
-+ gulm_lock_t *lck = NULL;
-+ struct list_head *tmp;
-+
-+ /* now find the lock */
-+ bkt = hash_lock_key (key, keylen);
-+ bkt %= gulm_cm.ltpx.hashbuckets;
-+
-+ spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
-+ list_for_each (tmp, &gulm_cm.ltpx.lkhsh[bkt]) {
-+ lck = list_entry (tmp, gulm_lock_t, gl_list);
-+ if (memcmp (lck->key, key, keylen) == 0) {
-+ found = TRUE;
-+ atomic_inc (&lck->count);
-+ break;
-+ }
-+ }
-+ spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
-+
-+ if (found)
-+ *lockp = lck;
-+
-+ return found;
-+}
-+
-+/**
-+ * mark_lock -
-+ * @lck:
-+ *
-+ * like above, but since we have the lock, don't search for it.
-+ *
-+ * Returns: int
-+ */
-+void __inline__
-+mark_lock (gulm_lock_t * lck)
-+{
-+ atomic_inc (&lck->count);
-+}
-+
-+/**
-+ * unmark_and_release_lock -
-+ * @lck:
-+ *
-+ * decrement the counter on a lock, freeing it if it reaches 0.
-+ * (also removes it from the hash table)
-+ *
-+ * TRUE if lock was freed.
-+ *
-+ * Returns: TRUE or FALSE
-+ */
-+int
-+unmark_and_release_lock (gulm_lock_t * lck)
-+{
-+ uint32_t bkt;
-+ int deld = FALSE;
-+
-+ bkt = hash_lock_key (lck->key, lck->keylen);
-+ bkt %= gulm_cm.ltpx.hashbuckets;
-+ spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
-+ if (atomic_dec_and_test (&lck->count)) {
-+ list_del (&lck->gl_list);
-+ deld = TRUE;
-+ }
-+ spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
-+ if (deld) {
-+ gulm_cm.ltpx.locks_total--;
-+ gulm_cm.ltpx.locks_unl--;
-+ if (lck->lvb != NULL) {
-+ kfree (lck->lvb);
-+ }
-+ kfree (lck);
-+ }
-+
-+ return deld;
-+}
-+
-+/****************************************************************************/
-+
-+/**
-+ * gulm_key_to_lm_lockname -
-+ * @key:
-+ * @lockname:
-+ *
-+ */
-+void
-+gulm_key_to_lm_lockname (uint8_t * key, struct lm_lockname *lockname)
-+{
-+ int pos;
-+
-+ pos = key[1] + 4;
-+ /* pos now points to the first byte of the GFS lockname that was
-+ * embedded in the gulm lock key
-+ */
-+
-+ (*lockname).ln_type = key[pos];
-+ (*lockname).ln_number = (u64) (key[pos+1]) << 56;
-+ (*lockname).ln_number |= (u64) (key[pos+2]) << 48;
-+ (*lockname).ln_number |= (u64) (key[pos+3]) << 40;
-+ (*lockname).ln_number |= (u64) (key[pos+4]) << 32;
-+ (*lockname).ln_number |= (u64) (key[pos+5]) << 24;
-+ (*lockname).ln_number |= (u64) (key[pos+6]) << 16;
-+ (*lockname).ln_number |= (u64) (key[pos+7]) << 8;
-+ (*lockname).ln_number |= (u64) (key[pos+8]) << 0;
-+}
-+
-+void
-+do_drop_lock_req (gulm_fs_t * fs, uint8_t state, uint8_t key[GIO_KEY_SIZE])
-+{
-+ unsigned int type;
-+ struct lm_lockname lockname;
-+ /* i might want to shove most of this function into the new
-+ * lockcallback handing queue.
-+ * later.
-+ */
-+
-+ /* don't do callbacks on the gulm mount lock.
-+ * */
-+ if (key[0] != 'G') {
-+ return;
-+ }
-+
-+ switch (state) {
-+ case lg_lock_state_Unlock:
-+ type = LM_CB_DROPLOCKS;
-+ break;
-+ case lg_lock_state_Exclusive:
-+ type = LM_CB_NEED_E;
-+ break;
-+ case lg_lock_state_Shared:
-+ type = LM_CB_NEED_S;
-+ break;
-+ case lg_lock_state_Deferred:
-+ type = LM_CB_NEED_D;
-+ break;
-+ default:
-+ type = LM_CB_DROPLOCKS;
-+ break;
-+ }
-+ gulm_key_to_lm_lockname (key, &lockname);
-+
-+ qu_drop_req (&fs->cq, fs->cb, fs->fsdata, type,
-+ lockname.ln_type, lockname.ln_number);
-+}
-+
-+/**
-+ * send_async_reply -
-+ * @lck:
-+ *
-+ *
-+ * Returns: void
-+ */
-+void
-+send_async_reply (gulm_lock_t * lck)
-+{
-+ gulm_fs_t *fs = lck->fs;
-+ struct lm_lockname lockname;
-+
-+ if (lck->key[0] == 'F') {
-+ /* whee! it is the first mounter lock. two things:
-+ * A: gfs could care less about this.
-+ * B: we need to up the sleeper in the fs. (hack)
-+ */
-+ complete (&fs->sleep);
-+ return;
-+ }
-+
-+ if( lck->key[0] != 'G' ) return;
-+
-+ gulm_key_to_lm_lockname (lck->key, &lockname);
-+
-+ qu_async_rpl (&fs->cq, fs->cb, fs->fsdata, &lockname, lck->result);
-+}
-+
-+/**
-+ * send_drop_exp_inter -
-+ * @lt:
-+ * @name:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+send_drop_exp_inter (gulm_fs_t * fs, lock_table_t * lt, char *name)
-+{
-+ int err, len;
-+ uint8_t mask[GIO_KEY_SIZE];
-+
-+ len = pack_drop_mask(mask, GIO_KEY_SIZE, fs->fs_name);
-+
-+ err = lg_lock_drop_exp (gulm_cm.hookup, name, mask, len);
-+
-+ return err;
-+}
-+
-+/**
-+ * send_lock_action -
-+ * @lck:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+send_lock_action (gulm_lock_t * lck, uint8_t action)
-+{
-+ int err;
-+
-+ GULM_ASSERT (lck->req_type == glck_action, dump_gulm_lock_t (lck););
-+
-+ err = lg_lock_action_req (gulm_cm.hookup, lck->key, lck->keylen, action,
-+ lck->lvb, lck->fs->lvb_size);
-+ if (err != 0)
-+ log_err ("Issues sending action request. %d\n", err);
-+
-+ return err;
-+}
-+
-+/**
-+ * send_lock_req -
-+ * @lck:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+send_lock_req (gulm_lock_t * lck)
-+{
-+ gulm_fs_t *fs = lck->fs;
-+ int err;
-+ uint32_t flags = 0;
-+ uint8_t state;
-+
-+ GULM_ASSERT (lck->req_type == glck_state, dump_gulm_lock_t (lck););
-+
-+ switch (lck->req_state) {
-+ case LM_ST_EXCLUSIVE:
-+ state = lg_lock_state_Exclusive;
-+ break;
-+ case LM_ST_DEFERRED:
-+ state = lg_lock_state_Deferred;
-+ break;
-+ case LM_ST_SHARED:
-+ state = lg_lock_state_Shared;
-+ break;
-+ case LM_ST_UNLOCKED:
-+ state = lg_lock_state_Unlock;
-+ break;
-+ default:
-+ GULM_ASSERT (0, log_err ("fsid=%s: Anit no lock state %d.\n",
-+ fs->fs_name, lck->req_state););
-+ break;
-+ }
-+ if (lck->flags & LM_FLAG_TRY) {
-+ flags |= lg_lock_flag_Try;
-+ }
-+ if (lck->flags & LM_FLAG_TRY_1CB) {
-+ flags |= lg_lock_flag_Try | lg_lock_flag_DoCB;
-+ }
-+ if (lck->flags & LM_FLAG_NOEXP) {
-+ flags |= lg_lock_flag_IgnoreExp;
-+ }
-+ if (lck->flags & LM_FLAG_ANY) {
-+ flags |= lg_lock_flag_Any;
-+ }
-+ if (lck->flags & LM_FLAG_PRIORITY) {
-+ flags |= lg_lock_flag_Piority;
-+ }
-+ if (lck->lvb != NULL) {
-+ print_lk_lvb (lck->key, lck->lvb, lck->req_state, "Sending");
-+ }
-+
-+ err = lg_lock_state_req (gulm_cm.hookup, lck->key, lck->keylen,
-+ state, flags, lck->lvb, lck->fs->lvb_size);
-+ if (err != 0)
-+ log_err ("Issues sending state request. %d\n", err);
-+
-+ return err;
-+}
-+
-+/**
-+ * toggle_lock_counters -
-+ *
-+ * called after a succesful request to change lock state. Decrements
-+ * counts for what the lock was, and increments for what it is now.
-+ */
-+void
-+toggle_lock_counters (lock_table_t * lt, int old, int new)
-+{
-+ /* what we had it in */
-+ switch (old) {
-+ case LM_ST_EXCLUSIVE:
-+ lt->locks_exl--;
-+ break;
-+ case LM_ST_DEFERRED:
-+ lt->locks_dfr--;
-+ break;
-+ case LM_ST_SHARED:
-+ lt->locks_shd--;
-+ break;
-+ case LM_ST_UNLOCKED:
-+ lt->locks_unl--;
-+ break;
-+ }
-+ /* what we have it in */
-+ switch (new) {
-+ case LM_ST_EXCLUSIVE:
-+ lt->locks_exl++;
-+ break;
-+ case LM_ST_DEFERRED:
-+ lt->locks_dfr++;
-+ break;
-+ case LM_ST_SHARED:
-+ lt->locks_shd++;
-+ break;
-+ case LM_ST_UNLOCKED:
-+ lt->locks_unl++;
-+ break;
-+ }
-+}
-+
-+/**
-+ * calc_lock_result -
-+ * @lck:
-+ * @state:
-+ * @error:
-+ * @flags:
-+ *
-+ * This calculates the correct result to return for gfs lock requests.
-+ *
-+ * Returns: int
-+ */
-+int
-+calc_lock_result (gulm_lock_t * lck,
-+ uint8_t state, uint32_t error, uint32_t flags)
-+{
-+ gulm_fs_t *fs = lck->fs;
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+ int result = -69;
-+
-+ /* adjust result based on success status. */
-+ switch (error) {
-+ case lg_err_Ok:
-+ /* set result to current lock state. */
-+ if (!(lck->flags & LM_FLAG_ANY)) {
-+ /* simple case, we got what we asked for. */
-+ result = lck->req_state;
-+ } else {
-+ /* complex case, we got something else, but we said that was ok */
-+ switch (state) {
-+ case lg_lock_state_Shared:
-+ result = LM_ST_SHARED;
-+ break;
-+ case lg_lock_state_Deferred:
-+ result = LM_ST_DEFERRED;
-+ break;
-+
-+ case lg_lock_state_Exclusive:
-+ case lg_lock_state_Unlock:
-+ GULM_ASSERT (0,
-+ dump_gulm_lock_t (lck);
-+ log_err
-+ ("fsid=%s: lock state %d is invalid on "
-+ "ANY flag return\n", fs->fs_name,
-+ state);
-+ );
-+ break;
-+
-+ default:
-+ GULM_ASSERT (0,
-+ dump_gulm_lock_t (lck);
-+ log_err_lck (lck,
-+ "fsid=%s: Anit no lock state %d.\n",
-+ fs->fs_name, state);
-+ );
-+ break;
-+ }
-+ }
-+
-+ /* toggle counters.
-+ * due to ANY flag, new state may not be req_state.
-+ * */
-+ toggle_lock_counters (lt, lck->cur_state, result);
-+
-+ /* if no internal unlocks, it is cachable. */
-+ if (result != LM_ST_UNLOCKED && (flags & lg_lock_flag_Cachable))
-+ result |= LM_OUT_CACHEABLE;
-+
-+ /* record and move on
-+ * */
-+ lck->last_suc_state = result & LM_OUT_ST_MASK;
-+ break;
-+ case lg_err_Canceled:
-+ result = LM_OUT_CANCELED | lck->cur_state;
-+ break;
-+ case lg_err_TryFailed:
-+ result = lck->cur_state; /* if we didn't get it. */
-+ break;
-+ default:
-+ result = -error;
-+ break;
-+ }
-+
-+ return result;
-+}
-+
-+/**
-+ * my_strdup -
-+ * @s:
-+ *
-+ *
-+ * Returns: char
-+ */
-+char *
-+my_strdup (char *s)
-+{
-+ char *tmp;
-+ int len;
-+ len = strlen (s) + 1;
-+ tmp = kmalloc (len, GFP_KERNEL);
-+ if (tmp == NULL)
-+ return NULL;
-+ memcpy (tmp, s, len);
-+ return tmp;
-+}
-+
-+/* Instead of directly calling the send function below, the functions will
-+ * create of of these.
-+ * Which exist only because I cannot stick the lock_t onto two lists
-+ * at once.
-+ *
-+ * this could use some clean up.
-+ */
-+typedef struct send_req_s {
-+ struct list_head sr_list;
-+ enum { sr_lock, sr_act, sr_cancel, sr_drop } type;
-+ gulm_lock_t *who;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
-+ char *name;
-+} send_req_t;
-+
-+/**
-+ * alloc_send_req -
-+ * @oid:
-+ *
-+ *
-+ * Returns: send_req_t
-+ */
-+send_req_t *
-+alloc_send_req (void)
-+{
-+ send_req_t *tmp;
-+ tmp = kmalloc (sizeof (send_req_t), GFP_KERNEL);
-+ GULM_ASSERT (tmp != NULL,); /* so evil.... */
-+ return tmp;
-+}
-+
-+/**
-+ * send_drop_exp -
-+ * @fs:
-+ * @lt:
-+ * @name:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+send_drop_exp (gulm_fs_t * fs, lock_table_t * lt, char *name)
-+{
-+ send_req_t *sr;
-+
-+ sr = alloc_send_req ();
-+ INIT_LIST_HEAD (&sr->sr_list);
-+ sr->type = sr_drop;
-+ sr->who = NULL;
-+ sr->fs = fs;
-+ sr->lt = lt;
-+ if (name != NULL) {
-+ sr->name = my_strdup (name);
-+ } else {
-+ sr->name = NULL;
-+ }
-+
-+ spin_lock (<->queue_sender);
-+ list_add (&sr->sr_list, <->to_be_sent);
-+ spin_unlock (<->queue_sender);
-+
-+ wake_up (<->send_wchan);
-+ return 0;
-+}
-+
-+/**
-+ * add_lock_to_send_req_queue -
-+ * @lt:
-+ * @lck:
-+ *
-+ *
-+ * Returns: void
-+ */
-+void
-+add_lock_to_send_req_queue (lock_table_t * lt, gulm_lock_t * lck, int type)
-+{
-+ send_req_t *sr;
-+
-+ sr = alloc_send_req ();
-+ INIT_LIST_HEAD (&sr->sr_list);
-+ sr->type = type;
-+ sr->who = lck;
-+ sr->fs = NULL;
-+ sr->lt = NULL;
-+ sr->name = NULL;
-+ if (type != sr_cancel)
-+ lck->in_to_be_sent = TRUE;
-+
-+ mark_lock (lck);
-+
-+ spin_lock (<->queue_sender);
-+ list_add (&sr->sr_list, <->to_be_sent);
-+ spin_unlock (<->queue_sender);
-+
-+ wake_up (<->send_wchan);
-+}
-+
-+/**
-+ * queue_empty -
-+ * @lt:
-+ *
-+ *
-+ * Returns: int
-+ */
-+static __inline__ int
-+queue_empty (lock_table_t * lt)
-+{
-+ int ret;
-+ spin_lock (<->queue_sender);
-+ ret = list_empty (<->to_be_sent);
-+ spin_unlock (<->queue_sender);
-+ return ret;
-+}
-+
-+/**
-+ * lt_io_sender_thread -
-+ * @data:
-+ *
-+ * Right now, only gfs lock requests should go through this thread.
-+ * Must look, May not even need this.
-+ * well, it is nice to get the socket io off of what ever process the user
-+ * is running that is going through gfs into here. ?is it?
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+lt_io_sender_thread (void *data)
++int pack_lock_key(uint8_t *key, uint16_t keylen, uint8_t type,
++ uint8_t *fsname, uint8_t *pk, uint8_t pklen)
+{
-+ lock_table_t *lt = (lock_table_t *) data;
-+ struct list_head *tmp;
-+ send_req_t *sr = NULL;
-+ int err = 0;
++ int fsnlen;
++ fsnlen = strlen(fsname);
+
-+ daemonize ("gulm_LT_sender");
-+ lt->sender_task = current;
-+ complete (<->startup);
++ if( keylen <= (fsnlen + pklen + 5) ) return -1;
+
-+ while (lt->running) {
-+ do {
-+ DECLARE_WAITQUEUE (__wait_chan, current);
-+ current->state = TASK_INTERRUPTIBLE;
-+ add_wait_queue (<->send_wchan, &__wait_chan);
-+ if (queue_empty (lt))
-+ schedule ();
-+ remove_wait_queue (<->send_wchan, &__wait_chan);
-+ current->state = TASK_RUNNING;
-+ } while (0);
-+ if (!lt->running)
-+ break;
++ memset (key, 0, keylen);
+
-+ /* check to make sure socket is ok. */
-+ down (<->sender);
++ key[0] = type;
+
-+ /* pop next item to be sent
-+ * (it will get pushed back if there was problems.)
-+ */
-+ spin_lock (<->queue_sender);
-+ if (list_empty (<->to_be_sent)) {
-+ spin_unlock (<->queue_sender);
-+ up (<->sender);
-+ continue;
-+ }
-+ tmp = (<->to_be_sent)->prev;
-+ list_del (tmp);
-+ spin_unlock (<->queue_sender);
-+ sr = list_entry (tmp, send_req_t, sr_list);
-+
-+ /* send. */
-+ if (sr->type == sr_lock) {
-+ err = send_lock_req (sr->who);
-+ if (err == 0) {
-+ sr->who->in_to_be_sent = FALSE;
-+ unmark_and_release_lock (sr->who);
-+ }
-+ } else if (sr->type == sr_act) {
-+ err = send_lock_action (sr->who, sr->who->action);
-+ if (err == 0) {
-+ sr->who->in_to_be_sent = FALSE;
-+ unmark_and_release_lock (sr->who);
-+ }
-+ } else if (sr->type == sr_cancel) {
-+ err =
-+ lg_lock_cancel_req (gulm_cm.hookup, sr->who->key,
-+ sr->who->keylen);
-+ if (err == 0)
-+ unmark_and_release_lock (sr->who);
-+ } else if (sr->type == sr_drop) {
-+ /* XXX sr->lt isn't really needed.
-+ * just lt should be fine.
-+ * look into it someday.
-+ */
-+ err = send_drop_exp_inter (sr->fs, sr->lt, sr->name);
-+ } else {
-+ log_err ("Unknown send_req type! %d\n", sr->type);
-+ }
-+ up (<->sender);
++ key[1] = fsnlen;
++ memcpy(&key[2], fsname, fsnlen);
++ key[2 + fsnlen] = 0;
+
-+ /* if no errors, remove from queue. */
-+ if (err == 0) {
-+ if (sr->type == sr_drop && sr->name != NULL)
-+ kfree (sr->name);
-+ kfree (sr);
-+ sr = NULL;
-+ } else {
-+ /* if errors, re-queue.
-+ * the send_* funcs already reported the error, so we won't
-+ * repeat that.
-+ * */
-+ spin_lock (<->queue_sender);
-+ /* reset the pointers. otherwise things get weird. */
-+ INIT_LIST_HEAD (&sr->sr_list);
-+ list_add_tail (&sr->sr_list, <->to_be_sent);
-+ spin_unlock (<->queue_sender);
++ key[3 + fsnlen] = pklen;
+
-+ current->state = TASK_INTERRUPTIBLE;
-+ schedule_timeout (3 * HZ);
++ memcpy(&key[4 + fsnlen], pk, pklen);
+
-+ /* gotta break shit up.
-+ * else this loops hard and fast.
-+ */
-+ }
-+ } /* while( lt->running ) */
++ key[4 + fsnlen + pklen] = 0;
+
-+ complete (<->startup);
-+ return 0;
++ return fsnlen + pklen + 5;
+}
+
+/**
-+ * cancel_pending_sender -
-+ * @lck:
-+ *
-+ * want to cancel a lock request that we haven't sent to the server yet.
++ * unpack_lock_key -
++ * @key: <
++ * @keylen: <
++ * @type: >
++ * @fsname: >
++ * @fsnlen: >
++ * @pk: >
++ * @pklen: >
+ *
-+ * this must skip over unlock requests. (never cancel unlocks)
++ * if you're gonna fiddle with bytes returned here, copy first!
++ *
++ * this is broken. do I even really need this?
+ *
+ * Returns: int
+ */
-+int
-+cancel_pending_sender (gulm_lock_t * lck)
++int unpack_lock_key(uint8_t *key, uint16_t keylen, uint8_t *type,
++ uint8_t **fsname, uint8_t *fsnlen,
++ uint8_t **pk, uint8_t *pklen)
+{
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+ struct list_head *tmp, *nxt;
-+ send_req_t *sr;
-+ int found = FALSE;
++ int fsnl, pkl;
++ if( type != NULL )
++ *type = key[0];
+
-+ spin_lock (<->queue_sender);
++ fsnl = key[1];
++ if( fsnlen != NULL && *fsname != NULL ) {
++ *fsnlen = key[1];
++ *fsname = &key[2];
++ }
+
-+ list_for_each_safe (tmp, nxt, <->to_be_sent) {
-+ sr = list_entry (tmp, send_req_t, sr_list);
-+ if (sr->who == lck) { /* good enough? */
-+ if (lck->req_type == sr_cancel)
-+ continue;
-+ if (lck->req_state == LM_ST_UNLOCKED)
-+ continue; /*donot cancel unlocks */
-+ list_del (tmp);
-+ kfree (sr);
-+ found = TRUE;
-+ lck->in_to_be_sent = FALSE;
++ /* 0 = key[2 + fsnl] */
+
-+ /* Now we need to tell the waiting lock req that it got canceled.
-+ * basically, we need to fake a lg_err_Canceled return....
-+ */
-+ lck->result = LM_OUT_CANCELED | lck->cur_state;
-+ lck->actuallypending = FALSE;
-+ lck->req_type = glck_nothing;
-+ atomic_dec (<->locks_pending);
-+#ifndef USE_SYNC_LOCKING
-+ send_async_reply (lck);
-+#else
-+ complete (&lck->actsleep);
-+#endif
-+ unmark_and_release_lock (lck);
-+ break;
-+ }
++ pkl = key[3 + fsnl];
++ if( pklen != NULL && *pk != NULL ) {
++ *pklen = key[3 + fsnl];
++ *pk = &key[4 + fsnl];
+ }
+
-+ spin_unlock (<->queue_sender);
-+ return found;
-+}
++ /* 0 = key[4 + fsnl + *pklen] */
+
-+/**
-+ * gulm_lt_login_reply -
-+ * @misc:
-+ * @error:
-+ * @which:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+gulm_lt_login_reply (void *misc, uint32_t error, uint8_t which)
-+{
-+ if (error != 0) {
-+ gulm_cm.ltpx.running = FALSE;
-+ log_err ("LTPX: Got a %d from the login request.\n", error);
-+ } else {
-+ log_msg (lgm_Network2, "Logged into local LTPX.\n");
-+ }
-+ return error;
++ return fsnl + pkl + 5;
+}
+
+/**
-+ * gulm_lt_logout_reply -
-+ * @misc:
++ * pack_drop_mask -
++ * @mask:
++ * @fsname:
+ *
+ *
+ * Returns: int
+ */
-+int
-+gulm_lt_logout_reply (void *misc)
++int pack_drop_mask(uint8_t *mask, uint16_t mlen, uint8_t *fsname)
+{
-+ gulm_cm.ltpx.running = FALSE;
-+ log_msg (lgm_Network2, "Logged out of local LTPX.\n");
-+ return 0;
++ int fsnlen;
++ fsnlen = strlen(fsname);
++
++ memset (mask, 0, mlen);
++
++ mask[0] = 0xff;
++ mask[1] = fsnlen;
++ memcpy(&mask[2], fsname, fsnlen);
++ mask[2 + fsnlen] = 0;
++ /* rest should be 0xff */
++
++ return 3 + fsnlen;
+}
+
+/**
-+ * gulm_lt_lock_state -
-+ * @misc:
-+ * @key:
-+ * @keylen:
-+ * @state:
-+ * @flags:
-+ * @error:
-+ * @LVB:
-+ * @LVBlen:
-+ *
++ * gulm_lt_init -
+ *
+ * Returns: int
+ */
-+int
-+gulm_lt_lock_state (void *misc, uint8_t * key, uint16_t keylen,
-+ uint8_t state, uint32_t flags, uint32_t error,
-+ uint8_t * LVB, uint16_t LVBlen)
++int gulm_lt_init (void)
+{
-+ gulm_lock_t *lck;
-+
-+ if (key[0] == 'J' || key[0] == 'N' ) {
-+ jid_state_reply (key, keylen, LVB, LVBlen);
-+ return 0;
-+ }
-+
-+ if (!find_and_mark_lock (key, keylen, &lck)) {
-+ log_err_lk (key, keylen, "Got a lock state reply for a lock "
-+ "that we don't know of. state:%#x flags:%#x error:%#x\n",
-+ state, flags, error);
-+ return 0;
++ int i;
++ gulm_cm.gfs_lockmap = kmalloc(sizeof(struct list_head) * gulm_gfs_lmSize, GFP_KERNEL);
++ if (gulm_cm.gfs_lockmap == NULL)
++ return -ENOMEM;
++ gulm_cm.gfs_locklock = kmalloc(sizeof(spinlock_t) * gulm_gfs_lmSize, GFP_KERNEL);
++ if (gulm_cm.gfs_locklock == NULL) {
++ kfree(gulm_cm.gfs_lockmap);
++ return -ENOMEM;
+ }
-+
-+ lck->result = calc_lock_result (lck, state, error, flags);
-+
-+ if ((lck->result & LM_OUT_ST_MASK) != LM_ST_UNLOCKED &&
-+ lck->lvb != NULL) {
-+ memcpy (lck->lvb, LVB, MIN (lck->fs->lvb_size, LVBlen));
++ for(i=0; i < gulm_gfs_lmSize; i++) {
++ spin_lock_init (&gulm_cm.gfs_locklock[i]);
++ INIT_LIST_HEAD (&gulm_cm.gfs_lockmap[i]);
+ }
-+
-+ lck->actuallypending = FALSE;
-+ lck->req_type = glck_nothing;
-+ atomic_dec (&gulm_cm.ltpx.locks_pending);
-+#ifndef USE_SYNC_LOCKING
-+ send_async_reply (lck);
-+#else
-+ complete (&lck->actsleep);
-+#endif
-+
-+ if (error != 0 && error != lg_err_TryFailed && error != lg_err_Canceled)
-+ log_msg_lck (lck, "Error: %d:%s (req:%#x rpl:%#x lss:%#x)\n",
-+ error, gio_Err_to_str (error),
-+ lck->req_state, state, lck->last_suc_state);
-+
-+ unmark_and_release_lock (lck);
+ return 0;
+}
+
+/**
-+ * gulm_lt_lock_action -
-+ * @misc:
-+ * @key:
-+ * @keylen:
-+ * @action:
-+ * @error:
-+ *
-+ *
-+ * Returns: int
++ * gulm_lt_release -
+ */
-+int
-+gulm_lt_lock_action (void *misc, uint8_t * key, uint16_t keylen,
-+ uint8_t action, uint32_t error)
++void gulm_lt_release(void)
+{
++ struct list_head *tmp, *lltmp;
+ gulm_lock_t *lck;
++ int i;
+
-+ if (key[0] == 'J') {
-+ jid_action_reply (key, keylen);
-+ return 0;
-+ }
++ for(i=0; i < gulm_gfs_lmSize; i++) {
++ list_for_each_safe (tmp, lltmp, &gulm_cm.gfs_lockmap[i]) {
++ lck = list_entry (tmp, gulm_lock_t, gl_list);
++ list_del (tmp);
+
-+ if (!find_and_mark_lock (key, keylen, &lck)) {
-+ log_err_lk (key, keylen, "Got a lock action reply for a lock "
-+ "that we don't know of. action:%#x error:%#x\n",
-+ action, error);
-+ return 0;
-+ }
++ if (lck->lvb != NULL) kfree (lck->lvb);
+
-+ if (action == lg_lock_act_HoldLVB ||
-+ action == lg_lock_act_UnHoldLVB || action == lg_lock_act_SyncLVB) {
-+ /* */
-+ lck->result = error;
-+ if (error != lg_err_Ok) {
-+ log_err ("on action reply act:%d err:%d\n", action,
-+ error);
++ kfree(lck);
+ }
-+ lck->req_type = glck_nothing;
-+ lck->actuallypending = FALSE;
-+ complete (&lck->actsleep);
-+ } else {
-+ log_err_lck (lck, "Got strange Action %#x\n", action);
+ }
-+ unmark_and_release_lock (lck);
-+ return 0;
++
++ kfree (gulm_cm.gfs_lockmap);
++ kfree (gulm_cm.gfs_locklock);
+}
+
+/**
-+ * gulm_lt_drop_lock_req -
-+ * @misc:
++ * find_and_mark_lock -
+ * @key:
+ * @keylen:
-+ * @state:
++ * @lockp:
+ *
++ * looks for a lock struct of key. If found, marks it.
+ *
-+ * Returns: int
++ * Returns: TRUE or FALSE
+ */
+int
-+gulm_lt_drop_lock_req (void *misc, uint8_t * key, uint16_t keylen,
-+ uint8_t state)
++find_and_mark_lock (uint8_t * key, uint8_t keylen, gulm_lock_t ** lockp)
+{
-+ gulm_lock_t *lck;
++ int found = FALSE;
++ uint32_t bkt;
++ gulm_lock_t *lck = NULL;
++ struct list_head *tmp;
+
-+ if (key[0] == 'J') {
-+ jid_header_lock_drop (key, keylen);
-+ return 0;
-+ }
++ /* now find the lock */
++ bkt = crc32 (GULM_CRC_INIT, key, keylen);
++ bkt &= gulm_gfs_lmBits;
+
-+ if (!find_and_mark_lock (key, keylen, &lck)) {
-+ log_err_lk (key, keylen, "Got a drop lcok request for a lock "
-+ "that we don't know of. state:%#x\n", state);
-+ return 0;
++ spin_lock (&gulm_cm.gfs_locklock[bkt]);
++ list_for_each (tmp, &gulm_cm.gfs_lockmap[bkt]) {
++ lck = list_entry (tmp, gulm_lock_t, gl_list);
++ if (memcmp (lck->key, key, keylen) == 0) {
++ found = TRUE;
++ atomic_inc (&lck->count);
++ break;
++ }
+ }
++ spin_unlock (&gulm_cm.gfs_locklock[bkt]);
+
-+ do_drop_lock_req (lck->fs, state, key);
-+
-+ unmark_and_release_lock (lck);
-+ return 0;
-+}
++ if (found)
++ *lockp = lck;
+
-+/**
-+ * gulm_lt_drop_all -
-+ * @misc:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+gulm_lt_drop_all (void *misc)
-+{
-+ passup_droplocks ();
-+ return 0;
++ return found;
+}
+
+/**
-+ * gulm_lt_error -
-+ * @misc:
-+ * @err:
++ * mark_lock -
++ * @lck:
+ *
++ * like above, but since we have the lock, don't search for it.
+ *
+ * Returns: int
+ */
-+int
-+gulm_lt_error (void *misc, uint32_t err)
++void __inline__
++mark_lock (gulm_lock_t * lck)
+{
-+ log_err ("LTPX: RANDOM ERROR %d\n", err);
-+ return err;
++ atomic_inc (&lck->count);
+}
+
-+static lg_lockspace_callbacks_t lock_cb = {
-+ login_reply:gulm_lt_login_reply,
-+ logout_reply:gulm_lt_logout_reply,
-+ lock_state:gulm_lt_lock_state,
-+ lock_action:gulm_lt_lock_action,
-+ drop_lock_req:gulm_lt_drop_lock_req,
-+ drop_all:gulm_lt_drop_all,
-+ error:gulm_lt_error
-+};
-+
+/**
-+ * lt_io_recving_thread -
-+ * @data:
++ * unmark_and_release_lock -
++ * @lck:
+ *
++ * decrement the counter on a lock, freeing it if it reaches 0.
++ * (also removes it from the hash table)
+ *
-+ * Returns: int
++ * TRUE if lock was freed.
++ *
++ * Returns: TRUE or FALSE
+ */
+int
-+lt_io_recving_thread (void *data)
++unmark_and_release_lock (gulm_lock_t * lck)
+{
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+ int err;
++ uint32_t bkt;
++ int deld = FALSE;
+
-+ daemonize ("gulm_LT_recver");
-+ lt->recver_task = current;
-+ complete (<->startup);
++ bkt = crc32 (GULM_CRC_INIT, lck->key, lck->keylen);
++ bkt &= gulm_gfs_lmBits;
+
-+ while (lt->running) {
-+ err = lg_lock_handle_messages (gulm_cm.hookup, &lock_cb, NULL);
-+ if (err != 0) {
-+ log_err ("gulm_LT_recver err %d\n", err);
-+ lt->running = FALSE; /* should stop the sender thread. */
-+ wake_up (<->send_wchan);
-+ break;
++ spin_lock (&gulm_cm.gfs_locklock[bkt]);
++ if (atomic_dec_and_test (&lck->count)) {
++ list_del (&lck->gl_list);
++ deld = TRUE;
++ }
++ spin_unlock (&gulm_cm.gfs_locklock[bkt]);
++ if (deld) {
++ if (lck->lvb != NULL) {
++ kfree (lck->lvb);
+ }
-+ } /* while( lt->running ) */
++ kfree (lck->key);
++ kfree (lck);
++ }
+
-+ complete (<->startup);
-+ return 0;
++ return deld;
+}
+
++/****************************************************************************/
++
+/**
-+ * lt_logout - log out of all of the lock tables
++ * gulm_key_to_lm_lockname -
++ * @key:
++ * @lockname:
++ *
+ */
+void
-+lt_logout (void)
++gulm_key_to_lm_lockname (uint8_t * key, struct lm_lockname *lockname)
+{
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+ int err;
-+
-+ if (lt->running) {
-+ lt->running = FALSE;
-+
-+ /* stop sender thread */
-+ wake_up (<->send_wchan);
-+ wait_for_completion (<->startup);
-+
-+ /* stop recver thread */
-+ down (<->sender);
-+ err = lg_lock_logout (gulm_cm.hookup);
-+ up (<->sender);
++ int pos;
+
-+ /* wait for thread to finish */
-+ wait_for_completion (<->startup);
-+ }
++ pos = key[1] + 4;
++ /* pos now points to the first byte of the GFS lockname that was
++ * embedded in the gulm lock key, skipping over the fs name.
++ */
+
++ (*lockname).ln_type = key[pos];
++ (*lockname).ln_number = (u64) (key[pos+1]) << 56;
++ (*lockname).ln_number |= (u64) (key[pos+2]) << 48;
++ (*lockname).ln_number |= (u64) (key[pos+3]) << 40;
++ (*lockname).ln_number |= (u64) (key[pos+4]) << 32;
++ (*lockname).ln_number |= (u64) (key[pos+5]) << 24;
++ (*lockname).ln_number |= (u64) (key[pos+6]) << 16;
++ (*lockname).ln_number |= (u64) (key[pos+7]) << 8;
++ (*lockname).ln_number |= (u64) (key[pos+8]) << 0;
+}
+
+/**
-+ * lt_login - login to lock tables.
++ * do_drop_lock_req -
++ * @key:
++ * @keylen:
++ * @state:
+ *
-+ * Returns: int
++ *
++ * Returns: void
+ */
-+int
-+lt_login (void)
++void
++do_drop_lock_req (uint8_t *key, uint16_t keylen, uint8_t state)
+{
-+ int err;
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+
-+ if (lt->running)
-+ log_err
-+ ("Trying to log into LTPX when it appears to be logged in!\n");
++ gulm_lock_t *lck;
++ unsigned int type;
++ struct lm_lockname lockname;
+
-+ err = lg_lock_login (gulm_cm.hookup, "GFS ");
-+ if (err != 0) {
-+ log_err ("Failed to send login request. %d\n", err);
-+ goto fail;
++ if (!find_and_mark_lock (key, keylen, &lck)) {
++ return;
+ }
+
-+ /* start recver thread. */
-+ lt->running = TRUE;
-+ err = kernel_thread (lt_io_recving_thread, lt, 0);
-+ if (err < 0) {
-+ log_err ("Failed to start gulm_lt_IOd. (%d)\n", err);
-+ goto fail;
++ switch (state) {
++ case lg_lock_state_Unlock:
++ type = LM_CB_DROPLOCKS;
++ break;
++ case lg_lock_state_Exclusive:
++ type = LM_CB_NEED_E;
++ break;
++ case lg_lock_state_Shared:
++ type = LM_CB_NEED_S;
++ break;
++ case lg_lock_state_Deferred:
++ type = LM_CB_NEED_D;
++ break;
++ default:
++ type = LM_CB_DROPLOCKS;
++ break;
+ }
-+ wait_for_completion (<->startup);
++ gulm_key_to_lm_lockname (key, &lockname);
+
-+ /* start sender thread */
-+ err = kernel_thread (lt_io_sender_thread, lt, 0);
-+ if (err < 0) {
-+ log_err ("Failed to start gulm_LT_sender. (%d)\n", err);
-+ goto fail;
-+ }
-+ wait_for_completion (<->startup);
++ qu_drop_req (&lck->fs->cq, lck->fs->cb, lck->fs->fsdata, type,
++ lockname.ln_type, lockname.ln_number);
+
-+ return 0;
-+ fail:
-+ lt_logout ();
-+ log_msg (lgm_Always, "Exiting lt_login. err:%d\n", err);
-+ return err;
++ unmark_and_release_lock (lck);
+}
+
+/****************************************************************************/
+
+/**
-+ * internal_gulm_get_lock -
-+ * @fs:
-+ * @key:
-+ * @keylen:
-+ * @lockp:
++ * calc_lock_result -
++ * @lck:
++ * @state:
++ * @error:
++ * @flags:
+ *
++ * This calculates the correct result to return for gfs lock requests.
+ *
-+ * Returns: 0 on success, -EXXX on failure
++ * Returns: int
+ */
+int
-+internal_gulm_get_lock (gulm_fs_t * fs, uint8_t * key, uint8_t keylen,
-+ gulm_lock_t ** lockp)
++calc_lock_result (gulm_lock_t * lck,
++ uint8_t state, uint32_t error, uint32_t flags)
+{
-+ int found = FALSE;
-+ uint32_t bkt;
-+ gulm_lock_t *lck = NULL;
-+
-+ found = find_and_mark_lock (key, keylen, &lck);
++ gulm_fs_t *fs = lck->fs;
++ int result = -69;
+
-+ /* malloc space */
-+ if (found) {
-+ GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,);
-+ } else {
-+ lck = kmalloc (sizeof (gulm_lock_t), GFP_KERNEL);
-+ if (lck == NULL) {
-+ log_err
-+ ("fsid=%s: Out of memory for lock struct in get_lock!\n",
-+ fs->fs_name);
-+ return -ENOMEM;
++ /* adjust result based on success status. */
++ switch (error) {
++ case lg_err_Ok:
++ /* set result to current lock state. */
++ switch (state) {
++ case lg_lock_state_Shared:
++ result = LM_ST_SHARED;
++ break;
++ case lg_lock_state_Deferred:
++ result = LM_ST_DEFERRED;
++ break;
++ case lg_lock_state_Exclusive:
++ result = LM_ST_EXCLUSIVE;
++ break;
++ case lg_lock_state_Unlock:
++ result = LM_ST_UNLOCKED;
++ break;
++ default:
++ GULM_ASSERT (0,
++ dump_gulm_lock_t (lck);
++ log_err_lck
++ (lck, "fsid=%s: Anit no lock state %d.\n",
++ fs->fs_name, state);
++ );
++ break;
+ }
-+ memset (lck, 0, sizeof (gulm_lock_t));
-+ INIT_LIST_HEAD (&lck->gl_list);
-+ atomic_set (&lck->count, 1);
-+ lck->magic_one = 0xAAAAAAAA;
-+ lck->fs = fs;
-+ memcpy (lck->key, key, keylen);
-+ lck->keylen = keylen;
-+ lck->lvb = NULL;
-+ init_completion (&lck->actsleep);
-+ lck->actuallypending = FALSE;
-+ lck->in_to_be_sent = FALSE;
-+ lck->result = 0;
-+ lck->action = -1;
-+ lck->req_type = glck_nothing;
-+ lck->last_suc_state = LM_ST_UNLOCKED;
-+
-+ gulm_cm.ltpx.locks_total++;
-+ gulm_cm.ltpx.locks_unl++;
+
-+ bkt = hash_lock_key (key, keylen);
-+ bkt %= gulm_cm.ltpx.hashbuckets;
++ /* if no internal unlocks, it is cachable. */
++ if (result != LM_ST_UNLOCKED && (flags & lg_lock_flag_Cachable))
++ result |= LM_OUT_CACHEABLE;
+
-+ spin_lock (&gulm_cm.ltpx.hshlk[bkt]);
-+ list_add (&lck->gl_list, &gulm_cm.ltpx.lkhsh[bkt]);
-+ spin_unlock (&gulm_cm.ltpx.hshlk[bkt]);
++ break;
++ case lg_err_Canceled:
++ result = LM_OUT_CANCELED | lck->cur_state;
++ break;
++ case lg_err_TryFailed:
++ result = lck->cur_state; /* if we didn't get it. */
++ break;
++ default:
++ result = -error;
++ break;
+ }
+
-+ *lockp = lck;
-+
-+ return 0;
++ return result;
+}
+
++/****************************************************************************/
++
+/**
+ * gulm_get_lock -
+ * @lockspace:
+gulm_get_lock (lm_lockspace_t * lockspace, struct lm_lockname *name,
+ lm_lock_t ** lockp)
+{
-+ int err, len;
++ int err=0, len, bkt;
+ gulm_fs_t *fs = (gulm_fs_t *) lockspace;
-+ uint8_t key[GIO_KEY_SIZE]; uint8_t temp[9];
-+
-+ down (&fs->get_lock);
-+
++ uint8_t key[GIO_KEY_SIZE];
++ uint8_t temp[9];
++ gulm_lock_t *lck=NULL;
+
+ temp[0] = name->ln_type & 0xff;
+ temp[1] = (name->ln_number >> 56) & 0xff;
+ len = pack_lock_key(key, GIO_KEY_SIZE, 'G', fs->fs_name, temp, 9);
+ if( len <=0 ) {err = len; goto exit;}
+
-+ err = internal_gulm_get_lock (fs, key, len, (gulm_lock_t **) lockp);
++ if (!find_and_mark_lock (key, len, &lck)) {
++ /* not found, must create. */
++ lck = kmalloc(sizeof(gulm_lock_t), GFP_KERNEL);
++ if (lck == NULL) {
++ err = -ENOMEM;
++ goto exit;
++ }
++ INIT_LIST_HEAD (&lck->gl_list);
++ atomic_set (&lck->count, 1);
++ lck->key = kmalloc (len, GFP_KERNEL);
++ if (lck->key == NULL) {
++ kfree(lck);
++ err = -ENOMEM;
++ goto exit;
++ }
++ memcpy (lck->key, key, len);
++ lck->keylen = len;
++ lck->fs = fs;
++ lck->lvb = NULL;
++ lck->cur_state = LM_ST_UNLOCKED;
++
++ bkt = crc32 (GULM_CRC_INIT, key, len);
++ bkt &= gulm_gfs_lmBits;
++
++ spin_lock (&gulm_cm.gfs_locklock[bkt]);
++ list_add (&lck->gl_list, &gulm_cm.gfs_lockmap[bkt]);
++ spin_unlock (&gulm_cm.gfs_locklock[bkt]);
++
++ }
++ *lockp = lck;
+
-+ up (&fs->get_lock);
+exit:
+ return err;
+}
+void
+gulm_put_lock (lm_lock_t * lock)
+{
-+ gulm_lock_t *lck = (gulm_lock_t *) lock;
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+ gulm_fs_t *fs = lck->fs;
-+
-+ down (&fs->get_lock);
-+
-+ GULM_ASSERT (lt != NULL,);
-+
-+ if (lck->last_suc_state != LM_ST_UNLOCKED) {
-+ log_err_lck (lck,
-+ "fsid=%s: gulm_put_lock called on a lock that is not unlocked!"
-+ " Current state:%#x\n", lck->fs->fs_name,
-+ lck->last_suc_state);
-+ /* I'm still not sure about this one. We should never see it, so I
-+ * don't think it is that big of a deal, but i duno.
-+ *
-+ * Maybe should just make it an assertion.
-+ *
-+ * with the mark/unmark code, is it even a concern?
-+ */
-+ }
-+
-+ unmark_and_release_lock (lck);
-+ /* lck = NULL; */
-+
-+ up (&fs->get_lock);
-+
-+}
-+
-+static int
-+valid_trasition (unsigned int cur, unsigned int req)
-+{
-+ int lock_state_changes[16] = { /* unl exl def shr */
-+ FALSE, TRUE, TRUE, TRUE, /* unl */
-+ TRUE, FALSE, TRUE, TRUE, /* exl */
-+ TRUE, TRUE, FALSE, TRUE, /* def */
-+ TRUE, TRUE, TRUE, FALSE /* shr */
-+ };
-+ GULM_ASSERT (cur < 4
-+ && req < 4, log_err ("cur:%d req:%d\n", cur, req););
-+
-+ return (lock_state_changes[4 * cur + req]);
++ unmark_and_release_lock ((gulm_lock_t *) lock);
+}
+
+/**
-+ * verify_gulm_lock_t -
-+ * @lck:
++ * gulm_lock_finish -
++ * @glck:
+ *
-+ * wonder if I should add some other checks.
+ *
-+ * Returns: int
++ * Returns: void
+ */
-+int
-+verify_gulm_lock_t (gulm_lock_t * lck)
++void gulm_lock_finish (struct glck_req *item)
+{
-+ if (lck == NULL) {
-+ log_err ("Lock pointer was NULL!\n");
-+ return -1;
-+ }
-+ if (lck->fs == NULL) {
-+ log_err ("This lock has no filesystem!!!\n");
-+ return -1;
-+ }
-+ return 0;
++ int result;
++ gulm_lock_t *lck = (gulm_lock_t *)item->misc;
++ gulm_fs_t *fs = lck->fs;
++ struct lm_lockname lockname;
++
++ result = calc_lock_result (lck, item->state, item->error, item->flags);
++
++ gulm_key_to_lm_lockname (lck->key, &lockname);
++
++ qu_async_rpl (&fs->cq, fs->cb, fs->fsdata, &lockname, result);
++
++ /* marked in gulm_lock() */
++ unmark_and_release_lock (lck);
+}
+
+/**
+gulm_lock (lm_lock_t * lock, unsigned int cur_state,
+ unsigned int req_state, unsigned int flags)
+{
-+ gulm_lock_t *lck = NULL;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
++ glckr_t *item;
++ gulm_lock_t *lck = (gulm_lock_t *) lock;
++ gulm_fs_t *fs = lck->fs;
+
-+ /* verify vars. */
-+ lck = (gulm_lock_t *) lock;
-+ if (verify_gulm_lock_t (lck) != 0) {
-+ return -EINVAL;
++ item = glq_get_new_req();
++ if (item == NULL) {
++ return -ENOMEM;
+ }
-+ lt = &gulm_cm.ltpx;
-+ fs = lck->fs;
+
-+ GULM_ASSERT (valid_trasition (cur_state, req_state),
-+ log_err_lck (lck, "want %d with %s thinks:%d\n", req_state,
-+ (LM_FLAG_TRY & flags) ? "try" : (LM_FLAG_NOEXP
-+ & flags) ?
-+ "noexp" : "no flags", cur_state);
-+ );
++ mark_lock (lck); /* matching unmark is in gulm_lock_finish */
++
++ item->key = lck->key;
++ item->keylen = lck->keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_state;
++
++ switch (req_state) {
++ case LM_ST_EXCLUSIVE:
++ item->state = lg_lock_state_Exclusive;
++ break;
++ case LM_ST_DEFERRED:
++ item->state = lg_lock_state_Deferred;
++ break;
++ case LM_ST_SHARED:
++ item->state = lg_lock_state_Shared;
++ break;
++ case LM_ST_UNLOCKED:
++ item->state = lg_lock_state_Unlock;
++ break;
++ default:
++ GULM_ASSERT (0, log_err ("fsid=%s: Anit no lock state %d.\n",
++ fs->fs_name, req_state););
++ break;
++ }
++ item->flags = 0;
++ if (flags & LM_FLAG_TRY) {
++ item->flags |= lg_lock_flag_Try;
++ }
++ if (flags & LM_FLAG_TRY_1CB) {
++ item->flags |= lg_lock_flag_Try | lg_lock_flag_DoCB;
++ }
++ if (flags & LM_FLAG_NOEXP) {
++ item->flags |= lg_lock_flag_IgnoreExp;
++ }
++ if (flags & LM_FLAG_ANY) {
++ item->flags |= lg_lock_flag_Any;
++ }
++ if (flags & LM_FLAG_PRIORITY) {
++ item->flags |= lg_lock_flag_Piority;
++ }
++ if (lck->lvb != NULL) {
++ item->lvb = lck->lvb;
++ item->lvblen = fs->lvb_size;
++ }else{
++ item->lvb = NULL;
++ item->lvblen = 0;
++ }
++ item->error = 0;
+
-+ GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
++ item->misc = lck;
++ item->finish = gulm_lock_finish;
+
-+ /* save the details of this request. */
-+ lck->req_type = glck_state;
-+ lck->result = 0;
+ lck->cur_state = cur_state;
-+ lck->req_state = req_state;
-+ lck->flags = flags;
-+
-+ /* moving these here fixes a race on the s390 that ben found.
-+ * basically, the request was sent to the server, the server receives
-+ * it, the server processes, the server sends a reply, the client
-+ * receives the reply, and the client tries to processe the reply before
-+ * this thread could mark it as actuallypending.
-+ * */
-+ lck->actuallypending = TRUE;
-+ atomic_inc (<->locks_pending);
-+ add_lock_to_send_req_queue (lt, lck, sr_lock);
+
-+ lt->lops++;
-+#ifdef USE_SYNC_LOCKING
-+ wait_for_completion (&lck->actsleep);
-+#endif
++ glq_queue (item);
+
-+#ifdef USE_SYNC_LOCKING
-+ return lck->result;
-+#else
+ return LM_OUT_ASYNC;
-+#endif
+}
+
+/**
+void
+gulm_cancel (lm_lock_t * lock)
+{
-+ gulm_lock_t *lck;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
++ glckr_t *item;
++ gulm_lock_t *lck = (gulm_lock_t *) lock;
+
-+ /* verify vars. */
-+ lck = (gulm_lock_t *) lock;
-+ if (verify_gulm_lock_t (lck) != 0) {
-+ return;
-+ }
-+ lt = &gulm_cm.ltpx;
-+ fs = lck->fs;
++ mark_lock (lck);
+
-+ if (lck->actuallypending) {
-+ if (lck->in_to_be_sent) {
-+ /* this should pull the req out of the send queue and have it
-+ * return with a cancel code without going to the server.
-+ */
-+ cancel_pending_sender (lck);
-+ } else {
-+ add_lock_to_send_req_queue (lt, lck, sr_cancel);
-+ }
-+ } else {
-+ log_msg_lck (lck, "Cancel called with no pending request.\n");
++ item = glq_get_new_req();
++ if( item == NULL ) goto exit;
++
++ /* have to make a copy for cancel req. */
++ item->key = kmalloc(lck->keylen, GFP_KERNEL);
++ if (item->key == NULL) {
++ glq_recycle_req(item);
++ goto exit;
+ }
++ memcpy(item->key, lck->key, lck->keylen);
++ item->keylen = lck->keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_cancel;
++ item->finish = NULL;
++
++ glq_cancel(item);
++
++exit:
++ unmark_and_release_lock (lck);
++}
++
++/****************************************************************************/
++struct gulm_lvb_temp_s {
++ int error;
++ struct completion sleep;
++};
+
++/**
++ * gulm_lvb_finish -
++ * @glck:
++ *
++ *
++ * Returns: void
++ */
++void gulm_lvb_finish(struct glck_req *glck)
++{
++ struct gulm_lvb_temp_s *g = (struct gulm_lvb_temp_s *)glck->misc;
++ g->error = glck->error;
++ complete (&g->sleep);
+}
+
+/**
+int
+gulm_hold_lvb (lm_lock_t * lock, char **lvbp)
+{
-+ gulm_lock_t *lck;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
+ int err = -1;
++ struct gulm_lvb_temp_s ghlt;
++ glckr_t *item;
++ gulm_lock_t *lck = (gulm_lock_t *) lock;
++ gulm_fs_t *fs = lck->fs;
+
-+ /* verify vars. */
-+ lck = (gulm_lock_t *) lock;
-+ if (verify_gulm_lock_t (lck) != 0) {
-+ return -EINVAL;
-+ }
-+ lt = &gulm_cm.ltpx;
-+ fs = lck->fs;
++ mark_lock (lck);
+
-+ /* what where these for? */
-+ GULM_ASSERT (lck->magic_one == 0xAAAAAAAA,
-+ log_msg_lck (lck, "Bad gulm_lock magic.\n"););
-+ GULM_ASSERT (lt->magic_one == 0xAAAAAAAA,
-+ log_msg_lck (lck, "Bad lock_table magic.\n"););
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ err = -ENOMEM;
++ goto fail;
++ }
+
-+ lvb_log_msg_lk (lck->key, "Entering gulm_hold_lvb\n");
++ item->key = lck->key;
++ item->keylen = lck->keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_action;
++ item->state = lg_lock_act_HoldLVB;
++ item->flags = 0;
++ item->error = ghlt.error = 0;
+
-+ GULM_ASSERT (lck->lvb == NULL,
-+ log_msg_lck (lck,
-+ "fsid=%s: Lvb data wasn't null! must be held "
-+ "already.\n", fs->fs_name);
-+ );
++ init_completion (&ghlt.sleep);
+
-+ GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
++ item->misc = &ghlt;
++ item->finish = gulm_lvb_finish;
+
+ lck->lvb = kmalloc (fs->lvb_size, GFP_KERNEL);
+ if (lck->lvb == NULL) {
+ }
+ memset (lck->lvb, 0, fs->lvb_size);
+
-+ lck->req_type = glck_action;
-+ lck->action = lg_lock_act_HoldLVB;
-+ lck->result = 0;
-+ lck->actuallypending = TRUE;
-+ add_lock_to_send_req_queue (lt, lck, sr_act);
++ item->lvb = lck->lvb;
++ item->lvblen = fs->lvb_size;
+
-+ wait_for_completion (&lck->actsleep);
++ glq_queue (item);
++ wait_for_completion (&ghlt.sleep);
++ /* after here, item is no longer valid
++ * (memory was probably freed.)
++ * is why we use ghlt.error and not item->error.
++ */
+
-+ if (lck->result != lg_err_Ok) {
++ if (ghlt.error != lg_err_Ok) {
+ log_err ("fsid=%s: Got error %d on hold lvb request.\n",
-+ fs->fs_name, lck->result);
++ fs->fs_name, ghlt.error);
+ kfree (lck->lvb);
+ lck->lvb = NULL;
+ goto fail;
+ }
+
-+ lt->locks_lvbs++;
-+
+ *lvbp = lck->lvb;
+
++ unmark_and_release_lock (lck);
++
+ lvb_log_msg_lk (lck->key, "fsid=%s: Exiting gulm_hold_lvb\n",
+ fs->fs_name);
+ return 0;
+ fail:
++ unmark_and_release_lock (lck);
+ if (err != 0)
+ log_msg (lgm_Always,
+ "fsid=%s: Exiting gulm_hold_lvb with errors (%d)\n",
+void
+gulm_unhold_lvb (lm_lock_t * lock, char *lvb)
+{
-+ gulm_lock_t *lck = NULL;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
-+
-+ /* verify vars. */
-+ lck = (gulm_lock_t *) lock;
-+ if (verify_gulm_lock_t (lck) != 0) {
-+ return;
-+ }
-+ lt = &gulm_cm.ltpx;
-+ fs = lck->fs;
++ struct gulm_lvb_temp_s ghlt;
++ glckr_t *item;
++ gulm_lock_t *lck = (gulm_lock_t *) lock;
++ gulm_fs_t *fs = lck->fs;
+
-+ GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
++ mark_lock (lck);
+
-+ if (lck->lvb != lvb) {
-+ log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
-+ fs->fs_name, lck->lvb, lvb);
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ log_err("unhold_lvb: failed to get needed memory. skipping.\n");
+ goto exit;
+ }
+
-+ lvb_log_msg_lk (lck->key, "Entering gulm_unhold_lvb\n");
++ item->key = lck->key;
++ item->keylen = lck->keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_action;
++ item->state = lg_lock_act_UnHoldLVB;
++ item->flags = 0;
++ item->error = ghlt.error = 0;
++
++ init_completion (&ghlt.sleep);
+
-+ lck->req_type = glck_action;
-+ lck->action = lg_lock_act_UnHoldLVB;
-+ lck->result = 0;
-+ lck->actuallypending = TRUE;
-+ add_lock_to_send_req_queue (lt, lck, sr_act);
++ item->misc = &ghlt;
++ item->finish = gulm_lvb_finish;
+
-+ wait_for_completion (&lck->actsleep);
++ item->lvb = lck->lvb;
++ item->lvblen = fs->lvb_size;
+
-+ /* XXX ummm, is it sane to not free the memory if the command fails?
-+ * gfs will still think that the lvb was dropped sucessfully....
-+ * (it assumes it is always sucessful)
-+ * Maybe I should retry the drop request then?
++ glq_queue (item);
++ wait_for_completion (&ghlt.sleep);
++ /* after here, item is no longer valid
++ * (memory was probably freed.)
++ * is why we use ghlt.error and not item->error.
+ */
-+ if (lck->result != lg_err_Ok) {
++
++ if (ghlt.error != lg_err_Ok) {
+ log_err ("fsid=%s: Got error %d on unhold LVB request.\n",
-+ lck->fs->fs_name, lck->result);
-+ } else {
-+ if (lck->lvb != NULL)
-+ kfree (lck->lvb);
-+ lck->lvb = NULL;
-+ lt->locks_lvbs--;
++ lck->fs->fs_name, ghlt.error);
+ }
++ /* free it always. GFS thinks it is gone no matter what the server
++ * thinks. (and as much as i hate to say it this way, far better to
++ * leak in userspace than in kernel space.)
++ */
++ if (lck->lvb != NULL)
++ kfree (lck->lvb);
++ lck->lvb = NULL;
+ exit:
++ unmark_and_release_lock (lck);
+ lvb_log_msg ("Exiting gulm_unhold_lvb\n");
+}
+
+ * @lock:
+ * @lvb:
+ *
-+ * umm, is this even used anymore? yes.
-+ *
-+ * Returns: void
+ */
+void
+gulm_sync_lvb (lm_lock_t * lock, char *lvb)
+{
-+ gulm_lock_t *lck = NULL;
-+ gulm_fs_t *fs;
-+ lock_table_t *lt;
-+
-+ /* verify vars. */
-+ lck = (gulm_lock_t *) lock;
-+ if (verify_gulm_lock_t (lck) != 0) {
-+ return;
-+ }
-+ lt = &gulm_cm.ltpx;
-+ fs = lck->fs;
++ struct gulm_lvb_temp_s ghlt;
++ glckr_t *item;
++ gulm_lock_t *lck = (gulm_lock_t *) lock;
++ gulm_fs_t *fs = lck->fs;
+
-+ GULM_ASSERT (lck->actuallypending == FALSE, dump_gulm_lock_t (lck););
++ mark_lock (lck);
+
-+ /* this check is also in the server, so it isn't really needed here. */
-+ if (lck->last_suc_state != LM_ST_EXCLUSIVE) {
-+ log_err ("sync_lvb: You must hold the lock Exclusive first.\n");
-+ goto exit; /*cannot do anything */
-+ }
-+ if (lck->lvb == NULL) {
-+ log_err ("sync_lvb: You forgot to call hold lvb first.\n");
-+ goto exit;
-+ }
-+ if (lck->lvb != lvb) {
-+ log_err ("fsid=%s: AH! LVB pointer missmatch! %p != %p\n",
-+ fs->fs_name, lck->lvb, lvb);
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ log_err("sync_lvb: failed to get needed memory. skipping.\n");
+ goto exit;
+ }
+
-+ lvb_log_msg_lk (lck->key, "Entering gulm_sync_lvb\n");
++ item->key = lck->key;
++ item->keylen = lck->keylen;
++ item->subid = 0;
++ item->start = 0;
++ item->stop = ~((uint64_t)0);
++ item->type = glq_req_type_action;
++ item->state = lg_lock_act_SyncLVB;
++ item->flags = 0;
++ item->error = ghlt.error = 0;
++
++ init_completion (&ghlt.sleep);
+
-+ lck->req_type = glck_action;
-+ lck->action = lg_lock_act_SyncLVB;
-+ lck->result = 0;
-+ lck->actuallypending = TRUE;
-+ add_lock_to_send_req_queue (lt, lck, sr_act);
++ item->misc = &ghlt;
++ item->finish = gulm_lvb_finish;
+
-+ wait_for_completion (&lck->actsleep);
++ item->lvb = lck->lvb;
++ item->lvblen = fs->lvb_size;
+
-+ /* XXX? retry if I get an error? */
-+ if (lck->result != lg_err_Ok) {
-+ log_err_lck (lck,
-+ "fsid=%s: Got error %d:%s on Sync LVB request.\n",
-+ fs->fs_name, lck->result,
-+ gio_Err_to_str (lck->result));
++ glq_queue (item);
++ wait_for_completion (&ghlt.sleep);
++ /* after here, item is no longer valid
++ * (memory was probably freed.)
++ * is why we use ghlt.error and not item->error.
++ */
++
++ if (ghlt.error != lg_err_Ok) {
++ log_err ("fsid=%s: Got error %d on sync LVB request.\n",
++ lck->fs->fs_name, ghlt.error);
+ }
++
+ exit:
++ unmark_and_release_lock (lck);
+ lvb_log_msg ("Exiting gulm_sync_lvb\n");
-+}
+
-+/*****************************************************************************/
-+static int
-+gulm_plock_get (lm_lockspace_t * lockspace,
-+ struct lm_lockname *name,
-+ struct file *file, struct file_lock *fl)
-+{
-+ return -ENOSYS;
+}
+
-+static int
-+gulm_plock (lm_lockspace_t * lockspace,
-+ struct lm_lockname *name,
-+ struct file *file, int cmd, struct file_lock *fl)
-+{
-+ return -ENOSYS;
-+}
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_main.c linux-patched/fs/gfs_locking/lock_gulm/gulm_main.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_main.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_main.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,130 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
+
-+static int
-+gulm_punlock (lm_lockspace_t * lockspace,
-+ struct lm_lockname *name,
-+ struct file *file, struct file_lock *fl)
++#define EXTERN
++#include "gulm.h"
++
++#include <linux/init.h>
++
++#include "gulm_lock_queue.h"
++
++MODULE_DESCRIPTION ("Grand Unified Locking Module " GULM_RELEASE_NAME);
++MODULE_AUTHOR ("Red Hat, Inc.");
++MODULE_LICENSE ("GPL");
++
++gulm_cm_t gulm_cm;
++
++struct lm_lockops gulm_ops = {
++ lm_proto_name:PROTO_NAME,
++ lm_mount:gulm_mount,
++ lm_others_may_mount:gulm_others_may_mount,
++ lm_unmount:gulm_unmount,
++ lm_get_lock:gulm_get_lock,
++ lm_put_lock:gulm_put_lock,
++ lm_lock:gulm_lock,
++ lm_unlock:gulm_unlock,
++ lm_cancel:gulm_cancel,
++ lm_hold_lvb:gulm_hold_lvb,
++ lm_unhold_lvb:gulm_unhold_lvb,
++ lm_sync_lvb:gulm_sync_lvb,
++ lm_plock_get:gulm_plock_get,
++ lm_plock:gulm_plock,
++ lm_punlock:gulm_punlock,
++ lm_recovery_done:gulm_recovery_done,
++ lm_owner:THIS_MODULE,
++};
++
++/**
++ * init_gulm - Initialize the gulm module
++ *
++ * Returns: 0 on success, -EXXX on failure
++ */
++int __init
++init_gulm (void)
+{
-+ return -ENOSYS;
++ int error;
++
++ memset (&gulm_cm, 0, sizeof (gulm_cm_t));
++ gulm_cm.hookup = NULL;
++
++ /* register with the lm layers. */
++ error = lm_register_proto (&gulm_ops);
++ if (error)
++ goto fail;
++
++ error = glq_init();
++ if (error != 0 )
++ goto lm_fail;
++
++ error = gulm_lt_init();
++ if (error != 0)
++ goto glq_fail;
++
++ init_gulm_fs ();
++
++ printk ("Gulm %s (built %s %s) installed\n",
++ GULM_RELEASE_NAME, __DATE__, __TIME__);
++
++ return 0;
++
++glq_fail:
++ glq_release();
++
++ lm_fail:
++ lm_unregister_proto (&gulm_ops);
++
++ fail:
++ return error;
+}
+
-+/****************************************************************************/
-+/****************************************************************************/
-+/****************************************************************************/
-+/* should move the firstmounter lock stuff into its own file perhaps? */
+/**
-+ * get_special_lock -
-+ * @fs: <> filesystem we're getting special lock for
++ * exit_gulm - cleanup the gulm module
+ *
-+ * Returns: gulm_lock_t
+ */
-+STATIC gulm_lock_t *
-+get_special_lock (gulm_fs_t * fs)
++
++void __exit
++exit_gulm (void)
+{
-+ int err, len;
-+ gulm_lock_t *lck = NULL;
-+ uint8_t key[GIO_KEY_SIZE];
++ gulm_lt_release();
++ glq_release();
++ lm_unregister_proto (&gulm_ops);
++}
++
++module_init (init_gulm);
++module_exit (exit_gulm);
++
++/* the libgulm.h interface. */
++EXPORT_SYMBOL (lg_initialize);
++EXPORT_SYMBOL (lg_release);
++
++EXPORT_SYMBOL (lg_core_handle_messages);
++EXPORT_SYMBOL (lg_core_selector);
++EXPORT_SYMBOL (lg_core_login);
++EXPORT_SYMBOL (lg_core_logout);
++EXPORT_SYMBOL (lg_core_nodeinfo);
++EXPORT_SYMBOL (lg_core_nodelist);
++EXPORT_SYMBOL (lg_core_servicelist);
++EXPORT_SYMBOL (lg_core_corestate);
++EXPORT_SYMBOL (lg_core_shutdown);
++EXPORT_SYMBOL (lg_core_forceexpire);
++EXPORT_SYMBOL (lg_core_forcepending);
++
++EXPORT_SYMBOL (lg_lock_handle_messages);
++EXPORT_SYMBOL (lg_lock_selector);
++EXPORT_SYMBOL (lg_lock_login);
++EXPORT_SYMBOL (lg_lock_logout);
++EXPORT_SYMBOL (lg_lock_state_req);
++EXPORT_SYMBOL (lg_lock_cancel_req);
++EXPORT_SYMBOL (lg_lock_action_req);
++EXPORT_SYMBOL (lg_lock_drop_exp);
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_plock.c linux-patched/fs/gfs_locking/lock_gulm/gulm_plock.c
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_plock.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_plock.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,259 @@
++/******************************************************************************
++*******************************************************************************
++**
++** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
++** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
++**
++** This copyrighted material is made available to anyone wishing to use,
++** modify, copy, or redistribute it subject to the terms and conditions
++** of the GNU General Public License v.2.
++**
++*******************************************************************************
++******************************************************************************/
++
++#include "gulm.h"
++
++#include <linux/kernel.h>
++#include <linux/fs.h>
++#include <linux/slab.h>
++#include <linux/file.h>
++#define __KERNEL_SYSCALLS__
++#include <linux/unistd.h>
+
-+ len = pack_lock_key(key, GIO_KEY_SIZE, 'F', fs->fs_name, "irstMount", 9);
-+ if( len <= 0 ) return NULL;
++#include "gulm_lock_queue.h"
+
-+ err = internal_gulm_get_lock (fs, key, len, &lck);
++/*****************************************************************************/
++struct gulm_pret_s {
++ int error;
++ struct completion sleep;
++};
+
-+ /* return pointer */
-+ return lck;
++/**
++ * gulm_plock_packname -
++ * @fsname:
++ * @num:
++ * @key:
++ * @keylen:
++ *
++ *
++ * Returns: int
++ */
++int gulm_plock_packname(uint8_t * fsname, uint64_t num, uint8_t *key, uint16_t keylen)
++{
++ uint8_t temp[8];
++ temp[0] = (num >> 56) & 0xff;
++ temp[1] = (num >> 48) & 0xff;
++ temp[2] = (num >> 40) & 0xff;
++ temp[3] = (num >> 32) & 0xff;
++ temp[4] = (num >> 24) & 0xff;
++ temp[5] = (num >> 16) & 0xff;
++ temp[6] = (num >> 8) & 0xff;
++ temp[7] = (num >> 0) & 0xff;
++ return pack_lock_key(key, keylen, 'P', fsname, temp, 8);
+}
+
+/**
-+ * do_lock_time_out -
-+ * @d:
-+ *
-+ * after timeout, set cancel request on the handler queue. (since we cannot
-+ * call it from within the timer code.
++ * gulm_plock_finish -
++ * @glck:
++ *
+ *
++ * Returns: void
+ */
-+static void
-+do_lock_time_out (unsigned long d)
++void gulm_plock_finish(struct glck_req *glck)
+{
-+ gulm_lock_t *lck = (gulm_lock_t *) d;
-+ qu_function_call (&lck->fs->cq, gulm_cancel, lck);
++ struct gulm_pret_s *g = (struct gulm_pret_s *)glck->misc;
++ g->error = glck->error;
++ complete (&g->sleep);
+}
+
++struct gulm_pqur_s {
++ uint64_t start;
++ uint64_t stop;
++ uint64_t subid;
++ int error;
++ uint8_t state;
++ struct completion sleep;
++};
+/**
-+ * get_mount_lock -
-+ * @fs:
-+ * @first:
++ * gulm_plock_query_finish -
++ * @glck:
+ *
-+ * Get the Firstmount lock.
-+ * We try to grab it Exl. IF we get that, then we are the first client
-+ * mounting this fs. Otherwise we grab it shared to show that there are
-+ * clients using this fs.
+ *
-+ * Returns: int
++ * Returns: void
++ */
++void gulm_plock_query_finish(struct glck_req *glck)
++{
++ struct gulm_pqur_s *g = (struct gulm_pqur_s *)glck->misc;
++ g->error = glck->error;
++ g->start = glck->start;
++ g->stop = glck->stop;
++ g->subid = glck->subid;
++ g->state = glck->state;
++ complete (&g->sleep);
++}
++/**
++ * gulm_plock_get -
+ */
+int
-+get_mount_lock (gulm_fs_t * fs, int *first)
++gulm_plock_get (lm_lockspace_t * lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl)
+{
-+ int err;
-+ struct timer_list locktimeout;
-+ gulm_lock_t *lck = NULL;
-+ /*
-+ * first we need to get the lock into the hash.
-+ * then we can try to get it Exl with try and noexp.
-+ * if the try fails, grab it shared.
-+ */
++ int err = 0;
++ struct gulm_pqur_s pqur;
++ uint8_t key[GIO_KEY_SIZE];
++ gulm_fs_t *fs = (gulm_fs_t *) lockspace;
++ glckr_t *item;
+
-+ lck = get_special_lock (fs); /* there is only a mount lock. */
-+ if (lck == NULL) {
++ item = glq_get_new_req();
++ if( item == NULL ) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
-+ fs->mountlock = lck;
-+ try_it_again:
-+ *first = FALSE; /* assume we're not first */
-+
-+ err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_EXCLUSIVE,
-+ LM_FLAG_TRY | LM_FLAG_NOEXP);
-+#ifndef USE_SYNC_LOCKING
-+ wait_for_completion (&fs->sleep);
-+#endif
++ item->keylen = gulm_plock_packname(fs->fs_name, name->ln_number,
++ key, GIO_KEY_SIZE);
++ item->key = key;
++ item->subid = (unsigned long) fl->fl_owner;
++ item->start = fl->fl_start;
++ item->stop = fl->fl_end;
++ item->type = glq_req_type_query;
++ if (fl->fl_type == F_WRLCK) {
++ item->state = lg_lock_state_Exclusive;
++ } else {
++ item->state = lg_lock_state_Shared;
++ }
++ item->flags = lg_lock_flag_NoCallBacks;
++ item->error = pqur.error = 0;
+
-+ if ((lck->result & LM_OUT_ST_MASK) == LM_ST_EXCLUSIVE) {
-+ /* we got the lock, we're the first mounter. */
-+ *first = TRUE;
-+ log_msg (lgm_locking, "fsid=%s: Got mount lock Exclusive.\n",
-+ fs->fs_name);
-+ return 0;
-+ } else if ((lck->result & LM_OUT_ST_MASK) == LM_ST_UNLOCKED) {
-+ log_msg (lgm_locking,
-+ "fsid=%s: Didn't get mount lock Exl, someone else "
-+ "was first, trying for shared.\n", fs->fs_name);
++ init_completion (&pqur.sleep);
+
-+ /* the try failed, pick it up shared. */
-+ /* There was a case (bug #220) where we could hang here.
-+ *
-+ * To handle this, we put up a timer for a couple of
-+ * minutes. That if it trips, it cancels our shared
-+ * request. Which we then see, so we go back and try the
-+ * EXL again. If the Firstmounter is fine and is just
-+ * taking a damn long time to do its work, this just ends
-+ * back here, no worse for the wear.
-+ *
-+ * Another way to do this, is to wait for a killed message
-+ * for the master. When we get that, && we're pending
-+ * shared here, send the gulm_canel for the mounter lock.
-+ * (too bad we are not in the fs list yet at this point.
-+ * (well, maybe that *isn't* a bad thing))
-+ */
-+ init_timer (&locktimeout);
-+ locktimeout.function = do_lock_time_out;
-+ locktimeout.data = (unsigned long) lck;
-+ mod_timer (&locktimeout, jiffies + (120 * HZ));
-+ err = gulm_lock (lck, LM_ST_UNLOCKED, LM_ST_SHARED, 0);
-+#ifndef USE_SYNC_LOCKING
-+ wait_for_completion (&fs->sleep);
-+#endif
-+ del_timer (&locktimeout);
++ item->misc = &pqur;
++ item->finish = gulm_plock_query_finish;
+
-+ if ((lck->result & LM_OUT_ST_MASK) == LM_ST_SHARED) {
-+ /* kewl we got it. */
-+ log_msg (lgm_locking,
-+ "fsid=%s: Got mount lock shared.\n",
-+ fs->fs_name);
-+ return 0;
-+ }
++ glq_queue (item);
++ wait_for_completion (&pqur.sleep);
+
-+ log_msg (lgm_locking,
-+ "fsid=%s: Shared req timed out, trying Exl again.\n",
-+ fs->fs_name);
-+ goto try_it_again;
++ if (pqur.error == lg_err_TryFailed) {
++ err = -EAGAIN;
++ fl->fl_start = pqur.start;
++ fl->fl_end = pqur.stop;
++ fl->fl_pid = pqur.subid;
++ if( pqur.state == lg_lock_state_Exclusive )
++ fl->fl_type = F_WRLCK;
++ else
++ fl->fl_type = F_RDLCK;
++ } else if (pqur.error == 0) {
++ fl->fl_type = F_UNLCK;
++ } else {
++ err = -pqur.error;
+ }
-+ fail:
-+ log_err ("Exit get_mount_lock err=%d\n", err);
++
++fail:
+ return err;
+}
+
+/**
-+ * downgrade_mount_lock -
-+ * @fs:
-+ *
-+ * drop the Firstmount lock down to shared. This lets other mount.
-+ *
-+ * Returns: int
++ * gulm_plock -
++ *
+ */
+int
-+downgrade_mount_lock (gulm_fs_t * fs)
++gulm_plock (lm_lockspace_t *lockspace, struct lm_lockname *name,
++ struct file *file, int cmd, struct file_lock *fl)
+{
-+ int err;
-+ gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
-+ /* we were first, so we have it exl.
-+ * shift it to shared so others may mount.
-+ */
-+ err = gulm_lock (lck, LM_ST_EXCLUSIVE, LM_ST_SHARED, LM_FLAG_NOEXP);
-+#ifndef USE_SYNC_LOCKING
-+ wait_for_completion (&fs->sleep);
-+#endif
++ int err = 0;
++ struct gulm_pret_s pret;
++ uint8_t key[GIO_KEY_SIZE];
++ gulm_fs_t *fs = (gulm_fs_t *) lockspace;
++ glckr_t *item;
+
-+ if ((lck->result & LM_OUT_ST_MASK) != LM_ST_SHARED) {
-+ log_err
-+ ("fsid=%s: Couldn't downgrade mount lock to shared!!!!!\n",
-+ fs->fs_name);
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ err = -ENOMEM;
++ goto fail;
+ }
-+ return 0;
++
++ item->keylen = gulm_plock_packname(fs->fs_name, name->ln_number,
++ key, GIO_KEY_SIZE);
++ item->key = key;
++ item->subid = (unsigned long) fl->fl_owner;
++ item->start = fl->fl_start;
++ item->stop = fl->fl_end;
++ item->type = glq_req_type_state;
++ if (fl->fl_type == F_WRLCK) {
++ item->state = lg_lock_state_Exclusive;
++ } else {
++ item->state = lg_lock_state_Shared;
++ }
++ item->flags = lg_lock_flag_NoCallBacks;
++ if (IS_SETLKW(cmd))
++ item->flags |= lg_lock_flag_Try;
++ item->error = pret.error = 0;
++
++ init_completion (&pret.sleep);
++
++ item->misc = &pret;
++ item->finish = gulm_plock_finish;
++
++ glq_queue (item);
++ /* TODO should be interruptible by signals */
++ wait_for_completion (&pret.sleep);
++
++ if (pret.error == lg_err_TryFailed) {
++ err = -EAGAIN;
++ } else {
++ err = -pret.error;
++ }
++
++ if ( err != 0) err = posix_lock_file_wait(file, fl);
++
++fail:
++ return err;
+}
+
+/**
-+ * drop_mount_lock - drop our hold on the firstmount lock.
-+ * @fs: <> the filesystem pointer.
-+ *
-+ * Returns: int
++ * gulm_unplock -
+ */
+int
-+drop_mount_lock (gulm_fs_t * fs)
++gulm_punlock (lm_lockspace_t * lockspace, struct lm_lockname *name,
++ struct file *file, struct file_lock *fl)
+{
-+ int err;
-+ gulm_lock_t *lck = (gulm_lock_t *) fs->mountlock;
++ int err = 0;
++ struct gulm_pret_s pret;
++ uint8_t key[GIO_KEY_SIZE];
++ gulm_fs_t *fs = (gulm_fs_t *) lockspace;
++ glckr_t *item;
+
-+ if (fs->mountlock == NULL) {
-+ log_err ("fsid=%s: There's no Mount lock!!!!!\n", fs->fs_name);
-+ return -1;
++ item = glq_get_new_req();
++ if( item == NULL ) {
++ err = -ENOMEM;
++ goto fail;
+ }
-+ err = gulm_unlock (lck, LM_ST_SHARED);
-+#ifndef USE_SYNC_LOCKING
-+ wait_for_completion (&fs->sleep);
-+#endif
+
-+ if (lck->result != LM_ST_UNLOCKED)
-+ log_err ("fsid=%s: Couldn't unlock mount lock!!!!!!\n",
-+ fs->fs_name);
-+ gulm_put_lock (fs->mountlock);
-+ fs->mountlock = NULL;
-+ return 0;
++ item->keylen = gulm_plock_packname(fs->fs_name, name->ln_number,
++ key, GIO_KEY_SIZE);
++ item->key = key;
++ item->subid = (unsigned long) fl->fl_owner;
++ item->start = fl->fl_start;
++ item->stop = fl->fl_end;
++ item->type = glq_req_type_state;
++ item->state = lg_lock_state_Unlock;
++ item->flags = 0;
++ item->error = pret.error = 0;
++
++ init_completion (&pret.sleep);
++
++ item->misc = &pret;
++ item->finish = gulm_plock_finish;
++
++ glq_queue (item);
++ /* TODO should be interruptible by signals */
++ wait_for_completion (&pret.sleep);
++
++ err = -pret.error;
++ if ( err != 0) err = posix_lock_file_wait(file, fl);
++
++fail:
++ return err;
+}
+
-+/*****************************************************************************/
-+struct lm_lockops gulm_ops = {
-+ lm_proto_name:PROTO_NAME,
-+ lm_mount:gulm_mount,
-+ lm_others_may_mount:gulm_others_may_mount,
-+ lm_unmount:gulm_unmount,
-+ lm_get_lock:gulm_get_lock,
-+ lm_put_lock:gulm_put_lock,
-+ lm_lock:gulm_lock,
-+ lm_unlock:gulm_unlock,
-+ lm_cancel:gulm_cancel,
-+ lm_hold_lvb:gulm_hold_lvb,
-+ lm_unhold_lvb:gulm_unhold_lvb,
-+ lm_sync_lvb:gulm_sync_lvb,
-+ lm_plock_get:gulm_plock_get,
-+ lm_plock:gulm_plock,
-+ lm_punlock:gulm_punlock,
-+ lm_recovery_done:gulm_recovery_done,
-+ lm_owner:THIS_MODULE,
-+};
+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_prints.h linux/fs/gfs_locking/lock_gulm/gulm_prints.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_prints.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_prints.h 2004-09-07 16:17:31.800499255 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h
+--- linux-orig/fs/gfs_locking/lock_gulm/gulm_prints.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/gulm_prints.h 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,45 @@
+/******************************************************************************
+*******************************************************************************
+}
+
+#define log_nop(fmt, args...)
-+#define TICK printk("TICK==>" PROTO_NAME ": [%s:%d] pid:%ld\n",__FILE__,__LINE__,osi_pid())
++#define TICK printk("TICK==>" PROTO_NAME ": [%s:%d] pid:%d\n" , __FILE__ , __LINE__ , current->pid )
+
+#endif /*__gulm_prints_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_procinfo.c linux/fs/gfs_locking/lock_gulm/gulm_procinfo.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_procinfo.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_procinfo.c 2004-09-07 16:17:31.801499036 -0500
-@@ -0,0 +1,165 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#include "gulm.h"
-+#include <linux/kernel.h>
-+#include <linux/proc_fs.h>
-+#include "util.h"
-+
-+extern gulm_cm_t gulm_cm;
-+
-+struct proc_dir_entry *gulm_proc_dir;
-+struct proc_dir_entry *gulm_fs_proc_dir;
-+
-+/* the read operating function. */
-+int
-+gulm_fs_proc_read (char *buf, char **start, off_t off, int count, int *eof,
-+ void *data)
-+{
-+ gulm_fs_t *fs = (gulm_fs_t *) data;
-+ count = 0; /* ignore how much it wants */
-+
-+ count += sprintf (buf + count, "Filesystem: %s\nJID: %d\n"
-+ "handler_queue_cur: %d\n"
-+ "handler_queue_max: %d\n",
-+ fs->fs_name, fs->fsJID,
-+ fs->cq.task_count, fs->cq.task_max);
-+
-+ *eof = TRUE;
-+ if (off >= count)
-+ return 0;
-+ *start = buf + off;
-+ return (count - off);
-+}
-+
-+/* read the stuff for all */
-+int
-+gulm_core_proc_read (char *buf, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ count = 0; /* ignore how much it wants */
-+
-+ count = sprintf (buf,
-+ "cluster id: %s\n"
-+ "my name: %s\n", gulm_cm.clusterID, gulm_cm.myName);
-+
-+ *eof = TRUE;
-+ if (off >= count)
-+ return 0;
-+ *start = buf + off;
-+ return (count - off);
-+}
-+
-+int
-+gulm_lt_proc_read (char *buf, char **start, off_t off, int count,
-+ int *eof, void *data)
-+{
-+ lock_table_t *lt = (lock_table_t *) data;
-+ count = 0; /* ignore how much it wants */
-+
-+ count += sprintf (buf + count, "\n"
-+ "lock counts:\n"
-+ " total: %d\n"
-+ " unl: %d\n"
-+ " exl: %d\n"
-+ " shd: %d\n"
-+ " dfr: %d\n"
-+ "pending: %d\n"
-+ " lvbs: %d\n"
-+ " lops: %d\n\n",
-+ lt->locks_total,
-+ lt->locks_unl,
-+ lt->locks_exl,
-+ lt->locks_shd,
-+ lt->locks_dfr,
-+ atomic_read (<->locks_pending),
-+ lt->locks_lvbs, lt->lops);
-+
-+ *eof = TRUE;
-+ if (off >= count)
-+ return 0;
-+ *start = buf + off;
-+ return (count - off);
-+}
-+
-+/* add entry to our proc folder
-+ * call this on mount.
-+ * */
-+int
-+add_to_proc (gulm_fs_t * fs)
-+{
-+ if (!(create_proc_read_entry (fs->fs_name, S_IFREG | S_IRUGO,
-+ gulm_fs_proc_dir, gulm_fs_proc_read,
-+ (void *) fs))) {
-+ log_err ("couldn't register proc entry for %s\n", fs->fs_name);
-+ return -EINVAL;
-+ }
-+ return 0;
-+}
-+
-+/* get rid of it
-+ * this on umount.
-+ * */
-+void
-+remove_from_proc (gulm_fs_t * fs)
-+{
-+ remove_proc_entry (fs->fs_name, gulm_fs_proc_dir);
-+}
-+
-+ /* create our own root dir.
-+ * initmodule
-+ * */
-+int
-+init_proc_dir (void)
-+{
-+ if ((gulm_proc_dir = proc_mkdir ("gulm", &proc_root)) == NULL) {
-+ log_err ("cannot create the gulm directory in /proc\n");
-+ return -EINVAL;
-+ }
-+ if (!(create_proc_read_entry ("core", S_IFREG | S_IRUGO, gulm_proc_dir,
-+ gulm_core_proc_read, NULL))) {
-+ log_err ("couldn't register proc entry for core\n");
-+ remove_proc_entry ("gulm", &proc_root);
-+ return -EINVAL;
-+ }
-+ if ((gulm_fs_proc_dir =
-+ proc_mkdir ("filesystems", gulm_proc_dir)) == NULL) {
-+ log_err
-+ ("cannot create the filesystems directory in /proc/gulm\n");
-+ remove_proc_entry ("core", gulm_proc_dir);
-+ remove_proc_entry ("gulm", &proc_root);
-+ return -EINVAL;
-+ }
-+ if (!(create_proc_read_entry ("lockspace", S_IFREG | S_IRUGO,
-+ gulm_proc_dir, gulm_lt_proc_read,
-+ (void *) &gulm_cm.ltpx))) {
-+ remove_proc_entry ("filesystems", gulm_proc_dir);
-+ remove_proc_entry ("core", gulm_proc_dir);
-+ remove_proc_entry ("gulm", &proc_root);
-+ return -EINVAL;
-+ }
-+
-+ return 0;
-+}
-+
-+/* destroy it
-+ * close module
-+ * */
-+void
-+remove_proc_dir (void)
-+{
-+ remove_proc_entry ("lockspace", gulm_proc_dir);
-+ remove_proc_entry ("filesystems", gulm_proc_dir);
-+ remove_proc_entry ("core", gulm_proc_dir);
-+ remove_proc_entry ("gulm", &proc_root);
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_procinfo.h linux/fs/gfs_locking/lock_gulm/gulm_procinfo.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/gulm_procinfo.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/gulm_procinfo.h 2004-09-07 16:17:31.801499036 -0500
-@@ -0,0 +1,22 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
-+
-+#ifndef __procinfo_h__
-+#define __procinfo_h__
-+int add_to_proc (gulm_fs_t * fs);
-+void remove_from_proc (gulm_fs_t * fs);
-+void remove_locktables_from_proc (void);
-+void add_locktables_to_proc (void);
-+int init_proc_dir (void);
-+void remove_proc_dir (void);
-+#endif /*__procinfo_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/handler.c linux/fs/gfs_locking/lock_gulm/handler.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/handler.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/handler.c 2004-09-07 16:17:31.802498817 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.c linux-patched/fs/gfs_locking/lock_gulm/handler.c
+--- linux-orig/fs/gfs_locking/lock_gulm/handler.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/handler.c 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,343 @@
+/******************************************************************************
+*******************************************************************************
+ if (cnt <= 0)
+ cnt = 2;
+ for (; cnt > 0; cnt--) {
-+ err = kernel_thread (handler, cq, 0); /* XXX linux part */
++ err = kernel_thread (handler, cq, 0);
+ if (err < 0) {
+ stop_callback_qu (cq);
+ /* calling stop here might not behave correctly in all error
+ }
+ return 0;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/handler.h linux/fs/gfs_locking/lock_gulm/handler.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/handler.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/handler.h 2004-09-07 16:17:31.802498817 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/handler.h linux-patched/fs/gfs_locking/lock_gulm/handler.h
+--- linux-orig/fs/gfs_locking/lock_gulm/handler.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/handler.h 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,42 @@
+/******************************************************************************
+*******************************************************************************
+void display_handler_queue (callback_qu_t * cq);
+
+#endif /*__handler_c__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_core.c linux/fs/gfs_locking/lock_gulm/lg_core.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_core.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/lg_core.c 2004-09-07 16:17:31.803498598 -0500
-@@ -0,0 +1,724 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_core.c linux-patched/fs/gfs_locking/lock_gulm/lg_core.c
+--- linux-orig/fs/gfs_locking/lock_gulm/lg_core.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lg_core.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,671 @@
+/******************************************************************************
+*******************************************************************************
+**
+ do {
+ if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
+ break;
++ if ((err = xdr_dec_uint8 (dec, &x_mode)) != 0)
++ break;
+ if (x_state == gio_Mbr_ama_Slave) {
+ if ((err = xdr_dec_ipv6 (dec, &x_ip)) != 0)
+ break;
+ err = 0;
+ goto exit;
+ }
-+ err = ccbp->statechange (misc, x_state, &x_ip, lg->cfba);
++ err = ccbp->statechange (misc, x_state, x_mode, &x_ip, lg->cfba);
+ goto exit;
+ } else if (gulm_core_mbr_updt == x_code) {
+ do {
+ err = ccbp->service_list (misc, lglcb_stop, NULL);
+ goto exit;
+ } else if (gulm_info_stats_rpl == x_code) {
-+ if (ccbp->status != NULL) {
-+ if ((err =
-+ ccbp->status (misc, lglcb_start, NULL, NULL)) != 0)
-+ goto exit;
-+ }
+ do {
+ if ((err = xdr_dec_list_start (dec)) != 0)
+ break;
+ xdr_dec_string_ag (dec, &lg->cfbb,
+ &lg->cfbb_len)) != 0)
+ break;
-+ if (ccbp->status != NULL) {
-+ if ((err =
-+ ccbp->status (misc, lglcb_item,
-+ lg->cfba,
-+ lg->cfbb)) != 0) {
-+ goto exit;
-+ }
-+ }
+ }
+ } while (0);
-+ if (err != 0) {
-+ goto exit;
-+ }
-+ if (ccbp->status == NULL) {
-+ err = 0;
-+ goto exit;
-+ }
-+ err = ccbp->status (misc, lglcb_stop, NULL, NULL);
+ goto exit;
+ } else if (gulm_err_reply == x_code) {
+ if ((err = xdr_dec_uint32 (dec, &x_code)) != 0)
+ return err;
+}
+
-+/**
-+ * lg_core_status -
-+ * @lgp:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+lg_core_status (gulm_interface_p lgp)
-+{
-+ gulm_interface_t *lg = (gulm_interface_t *) lgp;
-+ xdr_enc_t *enc;
-+ int err;
-+
-+ /* make sure it is a gulm_interface_p. */
-+ if (lg == NULL)
-+ return -EINVAL;
-+ if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
-+ return -EINVAL;
-+
-+ if (lg->core_fd < 0 || lg->core_enc == NULL || lg->core_dec == NULL)
-+ return -EINVAL;
-+
-+ enc = lg->core_enc;
-+
-+ down (&lg->core_sender);
-+ do {
-+ if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
-+ break;
-+ if ((err = xdr_enc_flush (enc)) != 0)
-+ break;
-+ } while (0);
-+ up (&lg->core_sender);
-+ return err;
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_lock.c linux/fs/gfs_locking/lock_gulm/lg_lock.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_lock.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/lg_lock.c 2004-09-07 16:17:31.804498378 -0500
-@@ -0,0 +1,667 @@
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c
+--- linux-orig/fs/gfs_locking/lock_gulm/lg_lock.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lg_lock.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,745 @@
+/******************************************************************************
+*******************************************************************************
+**
+ gulm_interface_t *lg = (gulm_interface_t *) lgp;
+ xdr_dec_t *dec;
+ int err = 0;
++ uint64_t x_subid, x_start, x_stop;
+ uint32_t x_code, x_error, x_flags;
+ uint16_t x_keylen, x_lvblen = 0;
+ uint8_t x_state;
+ xdr_dec_raw_ag (dec, (void **) &lg->lfba,
+ &lg->lfba_len, &x_keylen)) != 0)
+ break;
++ if ((err = xdr_dec_uint64(dec, &x_subid)) != 0 )
++ break;
++ if ((err = xdr_dec_uint64(dec, &x_start)) != 0 )
++ break;
++ if ((err = xdr_dec_uint64(dec, &x_stop)) != 0 )
++ break;
+ if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
+ break;
+ if ((err = xdr_dec_uint32 (dec, &x_flags)) != 0)
+ goto exit;
+ }
+ err = cbp->lock_state (misc, &lg->lfba[4], x_keylen - 4,
++ x_subid, x_start, x_stop,
+ x_state, x_flags, x_error,
+ lg->lfbb, x_lvblen);
+ goto exit;
+ xdr_dec_raw_ag (dec, (void **) &lg->lfba,
+ &lg->lfba_len, &x_keylen)) != 0)
+ break;
++ if ((err = xdr_dec_uint64(dec, &x_subid)) != 0 )
++ break;
+ if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
+ break;
+ if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
+ goto exit;
+ }
+ err =
-+ cbp->lock_action (misc, &lg->lfba[4], x_keylen - 4, x_state,
-+ x_error);
++ cbp->lock_action (misc, &lg->lfba[4], x_keylen - 4,
++ x_subid, x_state, x_error);
++ goto exit;
++ } else if (gulm_lock_query_rpl == x_code) {
++ uint64_t x_c_subid=0, x_c_start=0, x_c_stop=0;
++ uint8_t x_c_state=0;
++ do {
++ if ((err =
++ xdr_dec_raw_ag (dec, (void **) &lg->lfba,
++ &lg->lfba_len, &x_keylen)) != 0)
++ break;
++ if ((err = xdr_dec_uint64(dec, &x_subid)) != 0 )
++ break;
++ if ((err = xdr_dec_uint64(dec, &x_start)) != 0 )
++ break;
++ if ((err = xdr_dec_uint64(dec, &x_stop)) != 0 )
++ break;
++ if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
++ break;
++ if ((err = xdr_dec_uint32 (dec, &x_error)) != 0)
++ break;
++ /* i realize that I'm pretty much ignoring the fact that
++ * this is can be a list of items. As of current, there
++ * is never more than one item on this list.
++ * I think I made it a list so it could be in the future,
++ * even though I cannot think of why.
++ */
++ if ((err = xdr_dec_list_start(dec)) != 0)
++ break;
++ while (xdr_dec_list_stop(dec) != 0) {
++ if((err = xdr_dec_string_ag(dec, &lg->lfbb, &lg->lfbb_len)) != 0) break;
++ if((err = xdr_dec_uint64(dec, &x_c_subid)) != 0 ) break;
++ if((err = xdr_dec_uint64(dec, &x_c_start)) != 0 ) break;
++ if((err = xdr_dec_uint64(dec, &x_c_stop)) != 0 ) break;
++ if((err = xdr_dec_uint8(dec, &x_c_state)) != 0) break;
++ }
++ } while (0);
++ if (err != 0) {
++ goto exit;
++ }
++ if (x_keylen <= 4) {
++ err = -EPROTO; /* or something */
++ goto exit;
++ }
++ if (cbp->lock_query == NULL) {
++ err = 0;
++ goto exit;
++ }
++ err = cbp->lock_query (misc, &lg->lfba[4], x_keylen - 4,
++ x_subid, x_start, x_stop, x_state,
++ x_error, lg->lfbb, x_c_subid,
++ x_c_start, x_c_stop, x_c_state);
+ goto exit;
+ } else if (gulm_lock_cb_state == x_code) {
+ do {
+ xdr_dec_raw_ag (dec, (void **) &lg->lfba,
+ &lg->lfba_len, &x_keylen)) != 0)
+ break;
++ if ((err = xdr_dec_uint64(dec, &x_subid)) != 0 )
++ break;
+ if ((err = xdr_dec_uint8 (dec, &x_state)) != 0)
+ break;
+ } while (0);
+ }
+ err =
+ cbp->drop_lock_req (misc, &lg->lfba[4], x_keylen - 4,
-+ x_state);
++ x_subid, x_state);
+ goto exit;
+ } else if (gulm_lock_cb_dropall == x_code) {
+ if (cbp->drop_all == NULL) {
+ err = cbp->drop_all (misc);
+ goto exit;
+ } else if (gulm_info_stats_rpl == x_code) {
-+ if (cbp->status != NULL) {
-+ if ((err =
-+ cbp->status (misc, lglcb_start, NULL, NULL)) != 0)
-+ goto exit;
-+ }
+ do {
+ if ((err = xdr_dec_list_start (dec)) != 0)
+ break;
+ xdr_dec_string_ag (dec, &lg->lfbb,
+ &lg->lfbb_len)) != 0)
+ break;
-+ if (cbp->status != NULL) {
-+ if ((err =
-+ cbp->status (misc, lglcb_item,
-+ lg->lfba,
-+ lg->lfbb)) != 0) {
-+ break;
-+ }
-+ }
+ }
+ } while (0);
-+ if (err != 0) {
-+ goto exit;
-+ }
-+ if (cbp->status == NULL) {
-+ err = 0;
-+ goto exit;
-+ }
-+ err = cbp->status (misc, lglcb_stop, NULL, NULL);
+ goto exit;
+ } else if (gulm_err_reply == x_code) {
+ do {
+ */
+int
+lg_lock_state_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
+ uint8_t state, uint32_t flags, uint8_t * LVB,
+ uint16_t LVBlen)
+{
+ break;
+ if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
+ break;
++ if ((err = xdr_enc_uint64 (enc, subid)) != 0)
++ break;
++ if ((err = xdr_enc_uint64 (enc, start)) != 0)
++ break;
++ if ((err = xdr_enc_uint64 (enc, stop)) != 0)
++ break;
+ if ((err = xdr_enc_uint8 (enc, state)) != 0)
+ break;
+ if ((err = xdr_enc_uint32 (enc, iflgs)) != 0)
+ * Returns: int
+ */
+int
-+lg_lock_cancel_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen)
++lg_lock_cancel_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
++ uint64_t subid)
+{
+ gulm_interface_t *lg = (gulm_interface_t *) lgp;
+ struct iovec iov[2];
+ break;
+ if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
+ break;
++ if ((err = xdr_enc_uint64 (enc, subid)) != 0)
++ break;
+ if ((err = xdr_enc_uint8 (enc, gio_lck_st_Cancel)) != 0)
+ break;
+ if ((err = xdr_enc_flush (enc)) != 0)
+ */
+int
+lg_lock_action_req (gulm_interface_p lgp, uint8_t * key, uint16_t keylen,
-+ uint8_t action, uint8_t * LVB, uint16_t LVBlen)
++ uint64_t subid, uint8_t action, uint8_t * LVB,
++ uint16_t LVBlen)
+{
+ gulm_interface_t *lg = (gulm_interface_t *) lgp;
+ struct iovec iov[2];
+ break;
+ if ((err = xdr_enc_raw_iov (enc, 2, iov)) != 0)
+ break;
++ if ((err = xdr_enc_uint64 (enc, subid)) != 0)
++ break;
+ if ((err = xdr_enc_uint8 (enc, action)) != 0)
+ break;
+ if (action == gio_lck_st_SyncLVB)
+}
+
+/**
++ * lg_lock_query_req -
++ * @lgp:
++ * @key:
++ * @keylen:
++ * @subid:
++ * @start:
++ * @stop:
++ * @state:
++ *
++ *
++ * Returns: int
++ */
++int lg_lock_query_req(gulm_interface_p lgp, uint8_t *key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop, uint8_t state)
++{
++ gulm_interface_t *lg = (gulm_interface_t *)lgp;
++ struct iovec iov[2];
++ xdr_enc_t *enc;
++ int err;
++
++ /* make sure it is a gulm_interface_p. */
++ if( lg == NULL ) return -EINVAL;
++ if( lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC ) return -EINVAL;
++
++ if( lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
++ return -EINVAL;
++
++ if( state != lg_lock_state_Unlock &&
++ state != lg_lock_state_Exclusive &&
++ state != lg_lock_state_Deferred &&
++ state != lg_lock_state_Shared )
++ return -EINVAL;
++
++ if( stop < start ) return -EINVAL;
++
++ enc = lg->lock_enc;
++
++ iov[0].iov_base = lg->lockspace;
++ iov[0].iov_len = 4;
++ iov[1].iov_base = key;
++ iov[1].iov_len = keylen;
++
++ down (&lg->lock_sender);
++ do{
++ if((err = xdr_enc_uint32(enc, gulm_lock_query_req)) != 0 ) break;
++ if((err = xdr_enc_raw_iov(enc, 2, iov)) != 0 ) break;
++ if((err = xdr_enc_uint64(enc, subid)) != 0) break;
++ if((err = xdr_enc_uint64(enc, start)) != 0) break;
++ if((err = xdr_enc_uint64(enc, stop)) != 0) break;
++ if((err = xdr_enc_uint8(enc, state)) != 0 ) break;
++ if((err = xdr_enc_flush(enc)) != 0 ) break;
++ }while(0);
++ up (&lg->lock_sender);
++ return err;
++}
++
++/**
+ * lg_lock_drop_exp -
+ * @ulm_interface_p:
+ * @holder:
+ return err;
+}
+
-+/**
-+ * lg_lock_status -
-+ * @lgp:
-+ *
-+ *
-+ * Returns: int
-+ */
-+int
-+lg_lock_status (gulm_interface_p lgp)
-+{
-+ gulm_interface_t *lg = (gulm_interface_t *) lgp;
-+ xdr_enc_t *enc;
-+ int err;
-+
-+ /* make sure it is a gulm_interface_p. */
-+ if (lg == NULL)
-+ return -EINVAL;
-+ if (lg->first_magic != LGMAGIC || lg->last_magic != LGMAGIC)
-+ return -EINVAL;
-+
-+ if (lg->lock_fd < 0 || lg->lock_enc == NULL || lg->lock_dec == NULL)
-+ return -EINVAL;
-+
-+ enc = lg->lock_enc;
++/* vim: set ai cin noet sw=8 ts=8 : */
+
-+ down (&lg->lock_sender);
-+ do {
-+ if ((err = xdr_enc_uint32 (enc, gulm_info_stats_req)) != 0)
-+ break;
-+ if ((err = xdr_enc_flush (enc)) != 0)
-+ break;
-+ } while (0);
-+ up (&lg->lock_sender);
-+ return err;
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_main.c linux/fs/gfs_locking/lock_gulm/lg_main.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/lg_main.c 2004-09-07 16:17:31.804498378 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_main.c linux-patched/fs/gfs_locking/lock_gulm/lg_main.c
+--- linux-orig/fs/gfs_locking/lock_gulm/lg_main.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lg_main.c 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,209 @@
+/******************************************************************************
+*******************************************************************************
+
+ return 0;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_priv.h linux/fs/gfs_locking/lock_gulm/lg_priv.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/lg_priv.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/lg_priv.h 2004-09-07 16:17:31.805498159 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h
+--- linux-orig/fs/gfs_locking/lock_gulm/lg_priv.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lg_priv.h 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,86 @@
+/******************************************************************************
+*******************************************************************************
+#endif
+
+#endif /*__lg_priv_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/libgulm.h linux/fs/gfs_locking/lock_gulm/libgulm.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/libgulm.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/libgulm.h 2004-09-07 16:17:31.805498159 -0500
-@@ -0,0 +1,191 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/libgulm.h linux-patched/fs/gfs_locking/lock_gulm/libgulm.h
+--- linux-orig/fs/gfs_locking/lock_gulm/libgulm.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/libgulm.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,197 @@
+/******************************************************************************
+*******************************************************************************
+**
+ int (*logout_reply) (void *misc);
+ int (*nodelist) (void *misc, lglcb_t type, char *name,
+ struct in6_addr * ip, uint8_t state);
-+ int (*statechange) (void *misc, uint8_t corestate,
++ int (*statechange) (void *misc, uint8_t corestate, uint8_t quorate,
+ struct in6_addr * masterip, char *mastername);
+ int (*nodechange) (void *misc, char *nodename,
+ struct in6_addr * nodeip, uint8_t nodestate);
+ int (*service_list) (void *misc, lglcb_t type, char *service);
-+ int (*status) (void *misc, lglcb_t type, char *key, char *value);
+ int (*error) (void *misc, uint32_t err);
+} lg_core_callbacks_t;
+
+int lg_core_forceexpire (gulm_interface_p, char *node_name);
+int lg_core_forcepending (gulm_interface_p);
+
-+int lg_core_status (gulm_interface_p);
-+
+/* Node states
+ * First three are actual states, as well as changes. Last is only a node
+ * change message.
+ int (*login_reply) (void *misc, uint32_t error, uint8_t which);
+ int (*logout_reply) (void *misc);
+ int (*lock_state) (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
+ uint8_t state, uint32_t flags, uint32_t error,
+ uint8_t * LVB, uint16_t LVBlen);
+ int (*lock_action) (void *misc, uint8_t * key, uint16_t keylen,
-+ uint8_t action, uint32_t error);
-+ int (*cancel_reply) (void *misc, uint8_t * key, uint16_t keylen,
-+ uint32_t error);
++ uint64_t subid, uint8_t action, uint32_t error);
+ int (*drop_lock_req) (void *misc, uint8_t * key, uint16_t keylen,
-+ uint8_t state);
++ uint64_t subid, uint8_t state);
++ int (*lock_query) (void *misc, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
++ uint8_t state, uint32_t error, uint8_t * cnode,
++ uint64_t csubid, uint64_t cstart, uint64_t cstop,
++ uint8_t cstate);
+ int (*drop_all) (void *misc);
-+ int (*status) (void *misc, lglcb_t type, char *key, char *value);
+ int (*error) (void *misc, uint32_t err);
+} lg_lockspace_callbacks_t;
+
+int lg_lock_login (gulm_interface_p, uint8_t lockspace[4]);
+int lg_lock_logout (gulm_interface_p);
+int lg_lock_state_req (gulm_interface_p, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop,
+ uint8_t state, uint32_t flags, uint8_t * LVB,
+ uint16_t LVBlen);
-+int lg_lock_cancel_req (gulm_interface_p, uint8_t * key, uint16_t keylen);
-+int lg_lock_action_req (gulm_interface_p, uint8_t * key,
-+ uint16_t keylen, uint8_t action,
++int lg_lock_cancel_req (gulm_interface_p, uint8_t * key, uint16_t keylen,
++ uint64_t subid);
++int lg_lock_action_req (gulm_interface_p, uint8_t * key, uint16_t keylen,
++ uint64_t subid, uint8_t action,
+ uint8_t * LVB, uint16_t LVBlen);
++int lg_lock_query_req(gulm_interface_p lgp, uint8_t *key, uint16_t keylen,
++ uint64_t subid, uint64_t start, uint64_t stop, uint8_t state);
+int lg_lock_drop_exp (gulm_interface_p, uint8_t * holder,
+ uint8_t * keymask, uint16_t kmlen);
-+int lg_lock_status (gulm_interface_p);
+
+/* state requests */
+#define lg_lock_state_Unlock (0x00)
+#define lg_lock_flag_IgnoreExp (0x00000008)
+#define lg_lock_flag_Cachable (0x00000020)
+#define lg_lock_flag_Piority (0x00000040)
++#define lg_lock_flag_NoCallBacks (0x00000100)
+
+/* These are the possible values that can be in the error fields. */
+#define lg_err_Ok (0)
+#define lg_err_NoSuchName (1018)
+
+#endif /*__libgulm_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/linux_gulm_main.c linux/fs/gfs_locking/lock_gulm/linux_gulm_main.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/linux_gulm_main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/linux_gulm_main.c 2004-09-07 16:17:31.806497940 -0500
-@@ -0,0 +1,109 @@
++
++/* vim: set ai cin noet sw=8 ts=8 : */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lm_interface.h linux-patched/fs/gfs_locking/lock_gulm/lm_interface.h
+--- linux-orig/fs/gfs_locking/lock_gulm/lm_interface.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lm_interface.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,191 @@
+/******************************************************************************
+*******************************************************************************
+**
+*******************************************************************************
+******************************************************************************/
+
-+#define EXPORT_SYMTAB
-+#define WANT_DEBUG_NAMES
-+#define WANT_GMALLOC_NAMES
-+#define EXTERN
-+#include "gulm.h"
-+
-+#include <linux/init.h>
-+
-+#include "util.h"
-+#include "gulm_procinfo.h"
-+
-+MODULE_DESCRIPTION ("Grand Unified Locking Module " GULM_RELEASE_NAME);
-+MODULE_AUTHOR ("Red Hat, Inc.");
-+MODULE_LICENSE ("GPL");
-+
-+extern gulm_cm_t gulm_cm;
-+
-+/**
-+ * init_gulm - Initialize the gulm module
-+ *
-+ * Returns: 0 on success, -EXXX on failure
-+ */
-+int __init
-+init_gulm (void)
-+{
-+ int error;
-+
-+ memset (&gulm_cm, 0, sizeof (gulm_cm_t));
-+ gulm_cm.loaded = FALSE;
-+ gulm_cm.hookup = NULL;
-+
-+ /* register with the lm layers. */
-+ error = lm_register_proto (&gulm_ops);
-+ if (error)
-+ goto fail;
-+
-+ error = init_proc_dir ();
-+ if (error != 0) {
-+ goto fail_lm;
-+ }
-+
-+ init_gulm_fs ();
-+
-+ printk ("Gulm %s (built %s %s) installed\n",
-+ GULM_RELEASE_NAME, __DATE__, __TIME__);
-+
-+ return 0;
-+
-+ fail_lm:
-+ lm_unregister_proto (&gulm_ops);
++/*
+
-+ fail:
-+ return error;
-+}
++ Sooner or later, I need to put all the documentation back into this file.
++ In the mean time, here are some notes.
+
-+/**
-+ * exit_gulm - cleanup the gulm module
-+ *
-+ */
++ - The lock module is now responsible for STOMITHing the an expired
++ client before calling the callback with type LM_CB_NEED_RECOVERY.
+
-+void __exit
-+exit_gulm (void)
-+{
-+ remove_proc_dir ();
-+ lm_unregister_proto (&gulm_ops);
-+}
++ - If mount() operation returns first == TRUE, GFS will check all the
++ journals. GFS itself can't/shouldn't stomith the machines, so the lock module
++ needs to make sure that there are no zombie machines on any of the
++ journals. (i.e. this should probably be on the first mount of the lock
++ space where all mounts by other machines are blocked.) GFS will call
++ others_may_mount() when the filesystem is in a consistent state.
+
-+module_init (init_gulm);
-+module_exit (exit_gulm);
++ - GFS can issue multiple simultaneous get_lock()s for the same lockname.
++ The lock module needs to deal with it, either by 1) building a hash table
++ to lookup the structures and keeping a reference count so there is only
++ on lm_lock_t for a given lockname. or 2) just dealing with multiple
++ lm_lock_t structures for a given lockname.
+
-+/* the libgulm.h interface. */
-+EXPORT_SYMBOL (lg_initialize);
-+EXPORT_SYMBOL (lg_release);
++*/
+
-+EXPORT_SYMBOL (lg_core_handle_messages);
-+EXPORT_SYMBOL (lg_core_selector);
-+EXPORT_SYMBOL (lg_core_login);
-+EXPORT_SYMBOL (lg_core_logout);
-+EXPORT_SYMBOL (lg_core_nodeinfo);
-+EXPORT_SYMBOL (lg_core_nodelist);
-+EXPORT_SYMBOL (lg_core_servicelist);
-+EXPORT_SYMBOL (lg_core_corestate);
-+EXPORT_SYMBOL (lg_core_shutdown);
-+EXPORT_SYMBOL (lg_core_forceexpire);
-+EXPORT_SYMBOL (lg_core_forcepending);
-+EXPORT_SYMBOL (lg_core_status);
++#ifndef __LM_INTERFACE_DOT_H__
++#define __LM_INTERFACE_DOT_H__
+
-+EXPORT_SYMBOL (lg_lock_handle_messages);
-+EXPORT_SYMBOL (lg_lock_selector);
-+EXPORT_SYMBOL (lg_lock_login);
-+EXPORT_SYMBOL (lg_lock_logout);
-+EXPORT_SYMBOL (lg_lock_state_req);
-+EXPORT_SYMBOL (lg_lock_cancel_req);
-+EXPORT_SYMBOL (lg_lock_action_req);
-+EXPORT_SYMBOL (lg_lock_drop_exp);
-+EXPORT_SYMBOL (lg_lock_status);
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/load_info.c linux/fs/gfs_locking/lock_gulm/load_info.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/load_info.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/load_info.c 2004-09-07 16:17:31.806497940 -0500
-@@ -0,0 +1,95 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++typedef void lm_lockspace_t;
++typedef void lm_lock_t;
++typedef void lm_fsdata_t;
++typedef void (*lm_callback_t) (lm_fsdata_t *fsdata, unsigned int type,
++ void *data);
+
-+#include "gulm.h"
++/* Flags for the struct lm_lockstruct->ls_flags field */
+
-+#include <linux/kernel.h>
-+#include <linux/sched.h>
-+#define __KERNEL_SYSCALLS__
-+#include <linux/unistd.h>
++#define LM_LSFLAG_LOCAL (0x00000001)
++#define LM_LSFLAG_ASYNC (0x00000002)
+
-+#include <linux/utsname.h> /* for extern system_utsname */
++/* Lock types */
+
-+#include "util.h"
++#define LM_TYPE_RESERVED (0x00)
++#define LM_TYPE_NONDISK (0x01)
++#define LM_TYPE_INODE (0x02)
++#define LM_TYPE_RGRP (0x03)
++#define LM_TYPE_META (0x04)
++#define LM_TYPE_IOPEN (0x05)
++#define LM_TYPE_FLOCK (0x06)
++#define LM_TYPE_PLOCK (0x07)
++#define LM_TYPE_QUOTA (0x08)
+
-+gulm_cm_t gulm_cm;
++/* States passed to lock() */
+
-+/**
-+ * init_ltpx -
-+ */
-+int
-+init_ltpx (void)
-+{
-+ int j;
-+ lock_table_t *lt = &gulm_cm.ltpx;
-+
-+ INIT_LIST_HEAD (<->to_be_sent);
-+ spin_lock_init (<->queue_sender);
-+ init_waitqueue_head (<->send_wchan);
-+ lt->magic_one = 0xAAAAAAAA;
-+ init_MUTEX (<->sender);
-+ init_completion (<->startup);
-+ atomic_set (<->locks_pending, 0);
-+ lt->hashbuckets = 8191;
-+ lt->hshlk = kmalloc (sizeof (spinlock_t) * lt->hashbuckets, GFP_KERNEL);
-+ if (lt->hshlk == NULL)
-+ return -ENOMEM;
-+ lt->lkhsh =
-+ kmalloc (sizeof (struct list_head) * lt->hashbuckets, GFP_KERNEL);
-+ if (lt->lkhsh == NULL) {
-+ kfree (lt->hshlk);
-+ return -ENOMEM;
-+ }
-+ for (j = 0; j < lt->hashbuckets; j++) {
-+ spin_lock_init (<->hshlk[j]);
-+ INIT_LIST_HEAD (<->lkhsh[j]);
-+ }
-+ return 0;
-+}
++#define LM_ST_UNLOCKED (0)
++#define LM_ST_EXCLUSIVE (1)
++#define LM_ST_DEFERRED (2)
++#define LM_ST_SHARED (3)
+
-+/**
-+ * load_info -
-+ * @hostdata: < optionally override the name of this node.
-+ *
-+ * Returns: int
-+ */
-+int
-+load_info (char *hostdata)
-+{
-+ int err = 0;
++/* Flags passed to lock() */
+
-+ if (gulm_cm.loaded)
-+ goto exit;
++#define LM_FLAG_TRY (0x00000001)
++#define LM_FLAG_TRY_1CB (0x00000002)
++#define LM_FLAG_NOEXP (0x00000004)
++#define LM_FLAG_ANY (0x00000008)
++#define LM_FLAG_PRIORITY (0x00000010)
+
-+ gulm_cm.verbosity = 0;
-+ if (hostdata != NULL && strlen (hostdata) > 0) {
-+ strncpy (gulm_cm.myName, hostdata, 64);
-+ } else {
-+ strncpy (gulm_cm.myName, system_utsname.nodename, 64);
-+ }
-+ gulm_cm.myName[63] = '\0';
++/* Flags returned by lock() */
+
-+ /* breaking away from ccs. just hardcoding defaults here.
-+ * Noone really used these anyways and if ppl want them badly, we'll
-+ * find another way to set them. (modprobe options for example.)
-+ * */
-+ gulm_cm.handler_threads = 2;
-+ gulm_cm.verbosity = lgm_Network | lgm_Stomith | lgm_Forking;
++#define LM_OUT_ST_MASK (0x00000003)
++#define LM_OUT_CACHEABLE (0x00000004)
++#define LM_OUT_CANCELED (0x00000008)
++#define LM_OUT_NEED_E (0x00000010)
++#define LM_OUT_NEED_D (0x00000020)
++#define LM_OUT_NEED_S (0x00000040)
++#define LM_OUT_ASYNC (0x00000080)
++#define LM_OUT_LVB_INVALID (0x00000100)
+
-+ init_ltpx ();
++/* Callback types */
+
-+ gulm_cm.loaded = TRUE;
-+ exit:
-+ return err;
-+}
-+/* vim: set ai cin noet sw=8 ts=8 : */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/load_info.h linux/fs/gfs_locking/lock_gulm/load_info.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/load_info.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/load_info.h 2004-09-07 16:17:31.806497940 -0500
-@@ -0,0 +1,17 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++#define LM_CB_NEED_E (257)
++#define LM_CB_NEED_D (258)
++#define LM_CB_NEED_S (259)
++#define LM_CB_NEED_RECOVERY (260)
++#define LM_CB_DROPLOCKS (261)
++#define LM_CB_ASYNC (262)
+
-+#ifndef __load_info_h__
-+#define __load_info_h__
-+int load_info (char *);
-+#endif /*__load_info_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/util.c linux/fs/gfs_locking/lock_gulm/util.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/util.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/util.c 2004-09-07 16:17:31.807497721 -0500
-@@ -0,0 +1,109 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++/* Reset_exp messages */
+
-+#include <linux/kernel.h>
-+#include <linux/types.h>
-+#include <linux/string.h>
-+#include <linux/crc32.h>
++#define LM_RD_GAVEUP (308)
++#define LM_RD_SUCCESS (309)
+
-+/**
-+ * atoi
-+ *
-+ * @c:
-+ *
-+ */
++struct lm_lockname {
++ uint64_t ln_number;
++ unsigned int ln_type;
++};
+
-+int
-+atoi (char *c)
-+{
-+ int x = 0;
++#define lm_name_equal(name1, name2) \
++(((name1)->ln_number == (name2)->ln_number) && \
++ ((name1)->ln_type == (name2)->ln_type)) \
+
-+ while ('0' <= *c && *c <= '9') {
-+ x = x * 10 + (*c - '0');
-+ c++;
-+ }
++struct lm_async_cb {
++ struct lm_lockname lc_name;
++ int lc_ret;
++};
+
-+ return (x);
-+}
++struct lm_lockstruct;
+
-+/**
-+ * inet_aton
-+ *
-+ * @ascii:
-+ * @ip:
-+ *
-+ */
++struct lm_lockops {
++ char lm_proto_name[256];
+
-+int
-+inet_aton (char *ascii, uint32_t * ip)
-+{
-+ uint32_t value;
-+ int x;
++ /* Mount/Unmount */
+
-+ *ip = 0;
++ int (*lm_mount) (char *table_name, char *host_data,
++ lm_callback_t cb, lm_fsdata_t *fsdata,
++ unsigned int min_lvb_size,
++ struct lm_lockstruct *lockstruct);
++ void (*lm_others_may_mount) (lm_lockspace_t *lockspace);
++ void (*lm_unmount) (lm_lockspace_t *lockspace);
+
-+ for (x = 0; x < 4; x++) {
-+ value = atoi (ascii);
-+ if (value > 255)
-+ return (-1);
++ /* Lock oriented operations */
+
-+ *ip = (*ip << 8) | value;
++ int (*lm_get_lock) (lm_lockspace_t *lockspace,
++ struct lm_lockname *name, lm_lock_t **lockp);
++ void (*lm_put_lock) (lm_lock_t *lock);
+
-+ if (x != 3) {
-+ for (; *ascii != '.' && *ascii != '\0'; ascii++) {
-+ if (*ascii < '0' || *ascii > '9') {
-+ /* not a number. stop */
-+ return -1;
-+ }
-+ }
-+ if (*ascii == '\0')
-+ return (-1);
++ unsigned int (*lm_lock) (lm_lock_t *lock, unsigned int cur_state,
++ unsigned int req_state, unsigned int flags);
++ unsigned int (*lm_unlock) (lm_lock_t *lock, unsigned int cur_state);
+
-+ ascii++;
-+ }
-+ }
++ void (*lm_cancel) (lm_lock_t *lock);
+
-+ return (0);
-+}
++ int (*lm_hold_lvb) (lm_lock_t *lock, char **lvbp);
++ void (*lm_unhold_lvb) (lm_lock_t *lock, char *lvb);
++ void (*lm_sync_lvb) (lm_lock_t *lock, char *lvb);
+
-+/**
-+ * inet_ntoa
-+ *
-+ * @ascii:
-+ * @ip:
-+ *
-+ */
-+void
-+inet_ntoa (uint32_t ip, char *buf)
-+{
-+ int i;
-+ char *p;
++ /* Posix Lock oriented operations */
+
-+ p = buf;
++ int (*lm_plock_get) (lm_lockspace_t *lockspace,
++ struct lm_lockname *name,
++ struct file *file, struct file_lock *fl);
+
-+ for (i = 3; i >= 0; i--) {
-+ p += sprintf (p, "%d", (ip >> (8 * i)) & 0xFF);
-+ if (i > 0)
-+ *(p++) = '.';
-+ }
++ int (*lm_plock) (lm_lockspace_t *lockspace,
++ struct lm_lockname *name,
++ struct file *file, int cmd, struct file_lock *fl);
+
-+}
++ int (*lm_punlock) (lm_lockspace_t *lockspace,
++ struct lm_lockname *name,
++ struct file *file, struct file_lock *fl);
+
-+/* public functions */
-+#define hash_init_val 0x6d696b65
++ /* Client oriented operations */
+
-+uint32_t __inline__
-+hash_lock_key (uint8_t * in, uint8_t len)
-+{
-+ return crc32 (hash_init_val, in, len);
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/util.h linux/fs/gfs_locking/lock_gulm/util.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/util.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/util.h 2004-09-07 16:17:31.807497721 -0500
-@@ -0,0 +1,29 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++ void (*lm_recovery_done) (lm_lockspace_t *lockspace, unsigned int jid,
++ unsigned int message);
+
-+#ifndef __UTIL_DOT_H__
-+#define __UTIL_DOT_H__
++ struct module *lm_owner;
++};
+
-+int atoi (char *c);
-+int inet_aton (char *ascii, uint32_t * ip);
-+void inet_ntoa (uint32_t ip, char *buf);
-+void dump_buffer (void *buf, int len);
++struct lm_lockstruct {
++ unsigned int ls_jid;
++ unsigned int ls_first;
++ unsigned int ls_lvb_size;
++ lm_lockspace_t *ls_lockspace;
++ struct lm_lockops *ls_ops;
++ int ls_flags;
++};
+
-+uint32_t __inline__ hash_lock_key (uint8_t * in, uint8_t len);
-+uint8_t __inline__ fourtoone (uint32_t);
++/* Bottom interface */
+
-+__inline__ int testbit (uint16_t bit, uint8_t * set);
-+__inline__ void setbit (uint16_t bit, uint8_t * set);
-+__inline__ void clearbit (uint16_t bit, uint8_t * set);
++int lm_register_proto(struct lm_lockops *proto);
++void lm_unregister_proto(struct lm_lockops *proto);
+
-+#endif /* __UTIL_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_crc.c linux/fs/gfs_locking/lock_gulm/utils_crc.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_crc.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/utils_crc.c 2004-09-07 16:17:31.808497502 -0500
-@@ -0,0 +1,92 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++/* Top interface */
+
-+#include <linux/types.h>
++int lm_mount(char *proto_name,
++ char *table_name, char *host_data,
++ lm_callback_t cb, lm_fsdata_t *fsdata,
++ unsigned int min_lvb_size, struct lm_lockstruct *lockstruct);
++void lm_unmount(struct lm_lockstruct *lockstruct);
+
-+static const uint32_t crc_32_tab[] = {
-+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
-+ 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
-+ 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
-+ 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
-+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
-+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
-+ 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
-+ 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
-+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
-+ 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
-+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
-+ 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
-+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
-+ 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
-+ 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
-+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
-+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
-+ 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
-+ 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
-+ 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
-+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
-+ 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
-+ 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
-+ 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
-+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
-+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
-+ 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
-+ 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
-+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
-+ 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
-+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
-+ 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
-+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
-+ 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
-+ 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
-+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
-+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
-+ 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
-+ 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
-+ 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
-+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
-+ 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
-+ 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
++#endif /* __LM_INTERFACE_DOT_H__ */
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/lock_gulm.mod.c linux-patched/fs/gfs_locking/lock_gulm/lock_gulm.mod.c
+--- linux-orig/fs/gfs_locking/lock_gulm/lock_gulm.mod.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/lock_gulm.mod.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,63 @@
++#include <linux/module.h>
++#include <linux/vermagic.h>
++#include <linux/compiler.h>
++
++MODULE_INFO(vermagic, VERMAGIC_STRING);
++
++#undef unix
++struct module __this_module
++__attribute__((section(".gnu.linkonce.this_module"))) = {
++ .name = __stringify(KBUILD_MODNAME),
++ .init = init_module,
++#ifdef CONFIG_MODULE_UNLOAD
++ .exit = cleanup_module,
++#endif
+};
+
-+/**
-+ * crc32 - hash an array of data
-+ * @data: the data to be hashed
-+ * @len: the length of data to be hashed
-+ *
-+ * completely copied from GFS/src/fs.c
-+ *
-+ * Take some data and convert it to a 32-bit hash.
-+ *
-+ * The hash function is a 32-bit CRC of the data. The algorithm uses
-+ * the crc_32_tab table above.
-+ *
-+ * This may not be the fastest hash function, but it does a fair bit better
-+ * at providing uniform results than the others I've looked at. That's
-+ * really important for efficient directories.
-+ *
-+ * Returns: the hash
-+ */
-+
-+uint32_t
-+crc32 (const char *data, int len, uint32_t init)
-+{
-+ uint32_t hash = init;
-+
-+ for (; len--; data++)
-+ hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
-+
-+ hash = ~hash;
++static const struct modversion_info ____versions[]
++__attribute_used__
++__attribute__((section("__versions"))) = {
++ { 0x1c7a8967, "struct_module" },
++ { 0x7da8156e, "__kmalloc" },
++ { 0x15413d5, "del_timer" },
++ { 0xf0db53e, "malloc_sizes" },
++ { 0x3f82f065, "remove_wait_queue" },
++ { 0xb6d99ad7, "sock_release" },
++ { 0xb9e5ab27, "_spin_lock" },
++ { 0xc0dc58cc, "sock_recvmsg" },
++ { 0x9327521d, "_spin_lock_irqsave" },
++ { 0x1d26aa98, "sprintf" },
++ { 0xda02d67, "jiffies" },
++ { 0x7de09941, "default_wake_function" },
++ { 0x4abe8112, "wait_for_completion" },
++ { 0x1b7d4074, "printk" },
++ { 0xbdae36c, "sock_sendmsg" },
++ { 0x1075bf0, "panic" },
++ { 0x97abb28c, "in6addr_loopback" },
++ { 0xa34f1ef5, "crc32_le" },
++ { 0x85a32c76, "_spin_unlock_irqrestore" },
++ { 0x44524640, "mod_timer" },
++ { 0xec15a2f2, "_spin_unlock" },
++ { 0x98e3a36a, "posix_lock_file_wait" },
++ { 0xfcb350e0, "kmem_cache_alloc" },
++ { 0xb12cdfe7, "system_utsname" },
++ { 0x17d59d01, "schedule_timeout" },
++ { 0x4292364c, "schedule" },
++ { 0xfb6af58d, "recalc_sigpending" },
++ { 0x830eb7ae, "__wake_up" },
++ { 0x423f9e6f, "add_wait_queue" },
++ { 0x37a0cba, "kfree" },
++ { 0x2e60bace, "memcpy" },
++ { 0x9b5d8309, "sock_create" },
++ { 0x7e9ebb05, "kernel_thread" },
++ { 0xd22b546, "__up_wakeup" },
++ { 0x136eba7c, "complete" },
++ { 0xf2520b76, "__down_failed" },
++ { 0xdc43a9c8, "daemonize" },
++};
+
-+ return hash;
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_crc.h linux/fs/gfs_locking/lock_gulm/utils_crc.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_crc.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/utils_crc.h 2004-09-07 16:17:31.808497502 -0500
-@@ -0,0 +1,17 @@
-+/******************************************************************************
-+*******************************************************************************
-+**
-+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
-+**
-+** This copyrighted material is made available to anyone wishing to use,
-+** modify, copy, or redistribute it subject to the terms and conditions
-+** of the GNU General Public License v.2.
-+**
-+*******************************************************************************
-+******************************************************************************/
++static const char __module_depends[]
++__attribute_used__
++__attribute__((section(".modinfo"))) =
++"depends=ipv6,crc32";
+
-+#ifndef __utils_crc_h__
-+#define __utils_crc_h__
-+uint32_t crc32 (const char *data, int len, uint32_t init);
-+#endif /*__utils_crc_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_tostr.c linux/fs/gfs_locking/lock_gulm/utils_tostr.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_tostr.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/utils_tostr.c 2004-09-07 16:17:31.808497502 -0500
-@@ -0,0 +1,207 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c
+--- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.c 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,79 @@
+/******************************************************************************
+*******************************************************************************
+**
+ return t;
+}
+
-+char *
-+gio_mbrupdate_to_str (int x)
-+{
-+ char *t = "Unknown Membership Update";
-+ switch (x) {
-+ case gio_Mbr_Logged_in:
-+ t = "Logged in";
-+ break;
-+ case gio_Mbr_Logged_out:
-+ t = "Logged out";
-+ break;
-+ case gio_Mbr_Expired:
-+ t = "Expired";
-+ break;
-+ case gio_Mbr_Killed:
-+ t = "Fenced";
-+ break;
-+ case gio_Mbr_OM_lgin:
-+ t = "Was Logged in";
-+ break;
-+ }
-+ return t;
-+}
-+
-+char *
-+gio_I_am_to_str (int x)
-+{
-+ switch (x) {
-+ case gio_Mbr_ama_Slave:
-+ return "Slave";
-+ break;
-+ case gio_Mbr_ama_Pending:
-+ return "Pending";
-+ break;
-+ case gio_Mbr_ama_Arbitrating:
-+ return "Arbitrating";
-+ break;
-+ case gio_Mbr_ama_Master:
-+ return "Master";
-+ break;
-+ case gio_Mbr_ama_Resource:
-+ return "Service";
-+ break;
-+ case gio_Mbr_ama_Client:
-+ return "Client";
-+ break;
-+ default:
-+ return "Unknown I_am state";
-+ break;
-+ }
-+}
-+
-+char *
-+gio_license_states (int x)
-+{
-+ switch (x) {
-+ case 0:
-+ return "valid";
-+ break;
-+ case 1:
-+ return "expired";
-+ break;
-+ case 2:
-+ return "invalid";
-+ break;
-+ default:
-+ return "unknown";
-+ break;
-+ }
-+}
-+
-+char *
-+gio_opcodes (int x)
-+{
-+ switch (x) {
-+#define CP(x) case (x): return #x ; break
-+ CP (gulm_err_reply);
-+
-+ CP (gulm_core_login_req);
-+ CP (gulm_core_login_rpl);
-+ CP (gulm_core_logout_req);
-+ CP (gulm_core_logout_rpl);
-+ CP (gulm_core_reslgn_req);
-+ CP (gulm_core_beat_req);
-+ CP (gulm_core_beat_rpl);
-+ CP (gulm_core_mbr_req);
-+ CP (gulm_core_mbr_updt);
-+ CP (gulm_core_mbr_lstreq);
-+ CP (gulm_core_mbr_lstrpl);
-+ CP (gulm_core_mbr_force);
-+ CP (gulm_core_res_req);
-+ CP (gulm_core_res_list);
-+ CP (gulm_core_state_req);
-+ CP (gulm_core_state_chgs);
-+ CP (gulm_core_shutdown);
-+ CP (gulm_core_forcepend);
-+
-+ CP (gulm_info_stats_req);
-+ CP (gulm_info_stats_rpl);
-+ CP (gulm_info_set_verbosity);
-+ CP (gulm_socket_close);
-+ CP (gulm_info_slave_list_req);
-+ CP (gulm_info_slave_list_rpl);
-+
-+ CP (gulm_lock_login_req);
-+ CP (gulm_lock_login_rpl);
-+ CP (gulm_lock_logout_req);
-+ CP (gulm_lock_logout_rpl);
-+ CP (gulm_lock_state_req);
-+ CP (gulm_lock_state_rpl);
-+ CP (gulm_lock_state_updt);
-+ CP (gulm_lock_action_req);
-+ CP (gulm_lock_action_rpl);
-+ CP (gulm_lock_action_updt);
-+ CP (gulm_lock_update_rpl);
-+ CP (gulm_lock_cb_state);
-+ CP (gulm_lock_cb_dropall);
-+ CP (gulm_lock_drop_exp);
-+ CP (gulm_lock_dump_req);
-+ CP (gulm_lock_dump_rpl);
-+ CP (gulm_lock_rerunqueues);
-+
-+#undef CP
-+ default:
-+ return "Unknown Op Code";
-+ break;
-+ }
-+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_tostr.h linux/fs/gfs_locking/lock_gulm/utils_tostr.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/utils_tostr.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/utils_tostr.h 2004-09-07 16:17:31.808497502 -0500
-@@ -0,0 +1,22 @@
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h
+--- linux-orig/fs/gfs_locking/lock_gulm/utils_tostr.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/utils_tostr.h 2004-10-22 11:54:55.000000000 -0500
+@@ -0,0 +1,17 @@
+/******************************************************************************
+*******************************************************************************
+**
+#ifndef __utils_tostr_h__
+#define __utils_tostr_h__
+char *gio_Err_to_str (int x);
-+char *gio_mbrupdate_to_str (int x);
-+char *gio_mbrama_to_str (int x);
-+char *gio_I_am_to_str (int x);
-+char *gio_license_states (int x);
-+char *gio_opcodes (int x);
+#endif /*__utils_tostr_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr.h linux/fs/gfs_locking/lock_gulm/xdr.h
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/xdr.h 2004-09-07 16:17:31.809497283 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr.h linux-patched/fs/gfs_locking/lock_gulm/xdr.h
+--- linux-orig/fs/gfs_locking/lock_gulm/xdr.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/xdr.h 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,98 @@
+/******************************************************************************
+*******************************************************************************
+int xdr_dec_list_stop (xdr_dec_t * xdr);
+
+#endif /*__gulm_xdr_h__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_base.c linux/fs/gfs_locking/lock_gulm/xdr_base.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_base.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/xdr_base.c 2004-09-07 16:17:31.810497064 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c
+--- linux-orig/fs/gfs_locking/lock_gulm/xdr_base.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/xdr_base.c 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,904 @@
+/******************************************************************************
+*******************************************************************************
+ *(xdr->stream) = XDR_NULL;
+ return 0;
+}
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_io.c linux/fs/gfs_locking/lock_gulm/xdr_io.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_io.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/xdr_io.c 2004-09-07 16:17:31.810497064 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c
+--- linux-orig/fs/gfs_locking/lock_gulm/xdr_io.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/xdr_io.c 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,169 @@
+/******************************************************************************
+*******************************************************************************
+}
+
+#endif /*__KERNEL__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_socket.c linux/fs/gfs_locking/lock_gulm/xdr_socket.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_gulm/xdr_socket.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_gulm/xdr_socket.c 2004-09-07 16:17:31.811496845 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c
+--- linux-orig/fs/gfs_locking/lock_gulm/xdr_socket.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_gulm/xdr_socket.c 2004-10-22 11:54:55.000000000 -0500
@@ -0,0 +1,82 @@
+/******************************************************************************
+*******************************************************************************
+}
+
+#endif /*__KERNEL__*/
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_harness/main.c linux/fs/gfs_locking/lock_harness/main.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_harness/main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_harness/main.c 2004-09-07 16:14:25.844078942 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_harness/main.c linux-patched/fs/gfs_locking/lock_harness/main.c
+--- linux-orig/fs/gfs_locking/lock_harness/main.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_harness/main.c 2004-10-21 18:18:17.815082384 -0500
@@ -0,0 +1,227 @@
+/******************************************************************************
+*******************************************************************************
+EXPORT_SYMBOL_GPL(lm_unregister_proto);
+EXPORT_SYMBOL_GPL(lm_mount);
+EXPORT_SYMBOL_GPL(lm_unmount);
-diff -urN linux-2.6.9-rc1-mm3/include/linux/lm_interface.h linux/include/linux/lm_interface.h
---- linux-2.6.9-rc1-mm3/include/linux/lm_interface.h 1969-12-31 18:00:00.000000000 -0600
-+++ linux/include/linux/lm_interface.h 2004-09-07 16:14:25.845078724 -0500
+diff -urN linux-orig/include/linux/lm_interface.h linux-patched/include/linux/lm_interface.h
+--- linux-orig/include/linux/lm_interface.h 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/include/linux/lm_interface.h 2004-10-21 18:18:17.797086560 -0500
@@ -0,0 +1,191 @@
+/******************************************************************************
+*******************************************************************************
+void lm_unmount(struct lm_lockstruct *lockstruct);
+
+#endif /* __LM_INTERFACE_DOT_H__ */
-diff -urN linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_nolock/main.c linux/fs/gfs_locking/lock_nolock/main.c
---- linux-2.6.9-rc1-mm3/fs/gfs_locking/lock_nolock/main.c 1969-12-31 18:00:00.000000000 -0600
-+++ linux/fs/gfs_locking/lock_nolock/main.c 2004-09-07 16:16:55.823371100 -0500
+diff -urN linux-orig/fs/gfs_locking/lock_nolock/main.c linux-patched/fs/gfs_locking/lock_nolock/main.c
+--- linux-orig/fs/gfs_locking/lock_nolock/main.c 1969-12-31 18:00:00.000000000 -0600
++++ linux-patched/fs/gfs_locking/lock_nolock/main.c 2004-10-21 18:18:24.100623662 -0500
@@ -0,0 +1,346 @@
+/******************************************************************************
+*******************************************************************************