]> git.pld-linux.org Git - packages/kernel.git/blame - linux-cluster-dlm.patch
- CSZ scheduler removed from kernel tree.
[packages/kernel.git] / linux-cluster-dlm.patch
CommitLineData
4bf12011 1# Add DLM to the build system
2diff -urN -p linux-2.6.7/cluster/Kconfig linux/cluster/Kconfig
3--- linux-2.6.7/cluster/Kconfig 2004-06-17 15:00:36.000000000 +0800
4+++ linux/cluster/Kconfig 2004-06-17 15:00:57.000000000 +0800
5@@ -10,4 +10,22 @@ config CLUSTER
6 needed by all the other components. It provides membership services
7 for those other subsystems.
8
9+config CLUSTER_DLM
10+ tristate "Distributed Lock Manager"
11+ depends on CLUSTER
12+ ---help---
13+ A fully distributed lock manager, providing cluster-wide locking services
14+ and protected lock namespaces for kernel and userland applications.
15+
16+config CLUSTER_DLM_PROCLOCKS
17+ boolean "/proc/locks support for DLM"
18+ depends on CLUSTER_DLM
19+ depends on PROC_FS
20+ ---help---
21+ If this option is enabled a file will appear in /proc/cluster/dlm_locks.
22+ write into this "file" the name of a lockspace known to the DLM and then
23+ read out a list of all the resources and locks in that lockspace that are
24+ known to the local node. Note because the DLM is distributed this may not
25+ be the full lock picture.
26+
27 endmenu
28diff -urN -p linux-2.6.7/cluster/Makefile linux/cluster/Makefile
29--- linux-2.6.7/cluster/Makefile 2004-06-17 15:00:36.000000000 +0800
30+++ linux/cluster/Makefile 2004-06-17 15:00:57.000000000 +0800
31@@ -1,3 +1,4 @@
32 obj-y := nocluster.o
33
34 obj-$(CONFIG_CLUSTER) += cman/
35+obj-$(CONFIG_CLUSTER_DLM) += dlm/
36diff -urN -p linux-2.6.7/cluster/dlm/Makefile linux/cluster/dlm/Makefile
37--- linux-2.6.7/cluster/dlm/Makefile 1970-01-01 07:30:00.000000000 +0730
38+++ linux/cluster/dlm/Makefile 2004-06-17 15:00:57.000000000 +0800
39@@ -0,0 +1,23 @@
40+dlm-objs := ast.o \
41+ config.o \
42+ device.o \
43+ dir.o \
44+ lkb.o \
45+ locking.o \
46+ lockqueue.o \
47+ lockspace.o \
48+ lowcomms.o \
49+ main.o \
50+ memory.o \
51+ midcomms.o \
52+ nodes.o \
53+ proc.o \
54+ queries.o \
55+ rebuild.o \
56+ reccomms.o \
57+ recover.o \
58+ recoverd.o \
59+ rsb.o \
60+ util.o \
61+
62+obj-$(CONFIG_CLUSTER_DLM) += dlm.o
63diff -urN linux-orig/cluster/dlm/ast.c linux-patched/cluster/dlm/ast.c
64--- linux-orig/cluster/dlm/ast.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
65+++ linux-patched/cluster/dlm/ast.c 2004-07-13 18:57:22.000000000 +0800
66@@ -0,0 +1,557 @@
4bf12011 67+/******************************************************************************
68+*******************************************************************************
69+**
70+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
71+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
72+**
73+** This copyrighted material is made available to anyone wishing to use,
74+** modify, copy, or redistribute it subject to the terms and conditions
75+** of the GNU General Public License v.2.
76+**
77+*******************************************************************************
78+******************************************************************************/
79+
80+/*
81+ * This delivers ASTs and checks for dead remote requests and deadlocks.
82+ */
83+
84+#include <linux/timer.h>
85+
86+#include "dlm_internal.h"
87+#include "rsb.h"
88+#include "lockqueue.h"
89+#include "dir.h"
90+#include "locking.h"
91+#include "lkb.h"
92+#include "lowcomms.h"
93+#include "midcomms.h"
94+#include "ast.h"
95+#include "nodes.h"
96+#include "config.h"
10d56c87 97+#include "util.h"
4bf12011 98+
99+/* Wake up flags for astd */
100+#define GDLMD_WAKE_ASTS 1
101+#define GDLMD_WAKE_TIMER 2
102+
103+static struct list_head _deadlockqueue;
104+static struct semaphore _deadlockqueue_lock;
105+static struct list_head _lockqueue;
106+static struct semaphore _lockqueue_lock;
107+static struct timer_list _lockqueue_timer;
108+static struct list_head _ast_queue;
109+static struct semaphore _ast_queue_lock;
110+static wait_queue_head_t _astd_waitchan;
111+static atomic_t _astd_running;
112+static long _astd_pid;
113+static unsigned long _astd_wakeflags;
114+static struct completion _astd_done;
115+
10d56c87 116+void add_to_lockqueue(struct dlm_lkb *lkb)
4bf12011 117+{
118+ /* Time stamp the entry so we know if it's been waiting too long */
119+ lkb->lkb_lockqueue_time = jiffies;
120+
121+ down(&_lockqueue_lock);
122+ list_add(&lkb->lkb_lockqueue, &_lockqueue);
123+ up(&_lockqueue_lock);
124+}
125+
10d56c87 126+void remove_from_lockqueue(struct dlm_lkb *lkb)
4bf12011 127+{
128+ down(&_lockqueue_lock);
129+ list_del(&lkb->lkb_lockqueue);
130+ up(&_lockqueue_lock);
131+}
132+
10d56c87 133+void add_to_deadlockqueue(struct dlm_lkb *lkb)
4bf12011 134+{
135+ if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags))
136+ return;
137+ lkb->lkb_duetime = jiffies;
138+ down(&_deadlockqueue_lock);
139+ list_add(&lkb->lkb_deadlockq, &_deadlockqueue);
140+ up(&_deadlockqueue_lock);
141+}
142+
10d56c87 143+void remove_from_deadlockqueue(struct dlm_lkb *lkb)
4bf12011 144+{
145+ if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags))
146+ return;
147+
148+ down(&_deadlockqueue_lock);
149+ list_del(&lkb->lkb_deadlockq);
150+ up(&_deadlockqueue_lock);
151+
152+ /* Invalidate the due time */
153+ memset(&lkb->lkb_duetime, 0, sizeof(lkb->lkb_duetime));
154+}
155+
4bf12011 156+/*
5cdbd17b 157+ * deliver an AST to a user
4bf12011 158+ */
159+
10d56c87 160+static void deliver_ast(struct dlm_lkb *lkb, uint16_t ast_type)
4bf12011 161+{
162+ void (*cast) (long param) = lkb->lkb_astaddr;
163+ void (*bast) (long param, int mode) = lkb->lkb_bastaddr;
164+
5cdbd17b
AM
165+ if (ast_type == AST_BAST) {
166+ if (!bast)
167+ return;
168+ if (lkb->lkb_status != GDLM_LKSTS_GRANTED)
169+ return;
4bf12011 170+ bast(lkb->lkb_astparam, (int) lkb->lkb_bastmode);
5cdbd17b
AM
171+ } else {
172+ if (!cast)
173+ return;
174+ cast(lkb->lkb_astparam);
4bf12011 175+ }
4bf12011 176+}
177+
178+/*
179+ * Queue an AST for delivery, this will only deal with
180+ * kernel ASTs, usermode API will piggyback on top of this.
181+ *
182+ * This can be called in either the user or DLM context.
10d56c87 183+ * ASTs are queued EVEN IF we are already running in dlm_astd
4bf12011 184+ * context as we don't know what other locks are held (eg we could
185+ * be being called from a lock operation that was called from
186+ * another AST!
187+ * If the AST is to be queued remotely then a message is sent to
188+ * the target system via midcomms.
189+ */
190+
10d56c87 191+void queue_ast(struct dlm_lkb *lkb, uint16_t flags, uint8_t rqmode)
4bf12011 192+{
10d56c87 193+ struct dlm_request req;
4bf12011 194+
195+ if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) {
196+ /*
197+ * Send a message to have an ast queued remotely. Note: we do
198+ * not send remote completion asts, they are handled as part of
199+ * remote lock granting.
200+ */
5cdbd17b 201+ if (flags & AST_BAST) {
4bf12011 202+ req.rr_header.rh_cmd = GDLM_REMCMD_SENDBAST;
203+ req.rr_header.rh_length = sizeof(req);
204+ req.rr_header.rh_flags = 0;
205+ req.rr_header.rh_lkid = lkb->lkb_id;
206+ req.rr_header.rh_lockspace =
207+ lkb->lkb_resource->res_ls->ls_global_id;
208+ req.rr_status = lkb->lkb_retstatus;
209+ req.rr_remlkid = lkb->lkb_remid;
210+ req.rr_rqmode = rqmode;
211+
212+ midcomms_send_message(lkb->lkb_nodeid, &req.rr_header,
5cdbd17b 213+ lkb->lkb_resource->res_ls->ls_allocation);
4bf12011 214+ } else if (lkb->lkb_retstatus == -EDEADLOCK) {
215+ /*
216+ * We only queue remote Completion ASTs here for error
217+ * completions that happen out of band.
218+ * DEADLOCK is one such.
219+ */
4bf12011 220+ req.rr_header.rh_cmd = GDLM_REMCMD_SENDCAST;
221+ req.rr_header.rh_length = sizeof(req);
222+ req.rr_header.rh_flags = 0;
223+ req.rr_header.rh_lkid = lkb->lkb_id;
224+ req.rr_header.rh_lockspace =
225+ lkb->lkb_resource->res_ls->ls_global_id;
226+ req.rr_status = lkb->lkb_retstatus;
227+ req.rr_remlkid = lkb->lkb_remid;
228+ req.rr_rqmode = rqmode;
229+
230+ midcomms_send_message(lkb->lkb_nodeid, &req.rr_header,
5cdbd17b 231+ lkb->lkb_resource->res_ls->ls_allocation);
4bf12011 232+ }
233+ } else {
234+ /*
5cdbd17b 235+ * Prepare info that will be returned in ast/bast.
4bf12011 236+ */
237+
5cdbd17b 238+ if (flags & AST_BAST) {
4bf12011 239+ lkb->lkb_bastmode = rqmode;
240+ } else {
241+ lkb->lkb_lksb->sb_status = lkb->lkb_retstatus;
242+
243+ if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED)
244+ lkb->lkb_lksb->sb_flags = DLM_SBF_DEMOTED;
245+ else
246+ lkb->lkb_lksb->sb_flags = 0;
247+ }
248+
4bf12011 249+ down(&_ast_queue_lock);
5cdbd17b
AM
250+ if (lkb->lkb_astflags & AST_DEL)
251+ log_print("queue_ast on deleted lkb %x ast %x pid %u",
252+ lkb->lkb_id, lkb->lkb_astflags, current->pid);
253+ if (!(lkb->lkb_astflags & (AST_COMP | AST_BAST)))
4bf12011 254+ list_add_tail(&lkb->lkb_astqueue, &_ast_queue);
5cdbd17b 255+ lkb->lkb_astflags |= flags;
4bf12011 256+ up(&_ast_queue_lock);
257+
258+ /* It is the responsibility of the caller to call wake_astd()
259+ * after it has finished other locking operations that request
260+ * the ASTs to be delivered after */
261+ }
262+}
263+
264+/*
5cdbd17b 265+ * Process any LKBs on the AST queue.
4bf12011 266+ */
267+
268+static void process_asts(void)
269+{
10d56c87 270+ struct dlm_lkb *lkb;
5cdbd17b 271+ uint16_t flags;
4bf12011 272+
5cdbd17b
AM
273+ for (;;) {
274+ down(&_ast_queue_lock);
275+ if (list_empty(&_ast_queue)) {
276+ up(&_ast_queue_lock);
277+ break;
278+ }
279+
10d56c87 280+ lkb = list_entry(_ast_queue.next, struct dlm_lkb, lkb_astqueue);
5cdbd17b
AM
281+ list_del(&lkb->lkb_astqueue);
282+ flags = lkb->lkb_astflags;
283+ lkb->lkb_astflags = 0;
284+ up(&_ast_queue_lock);
4bf12011 285+
5cdbd17b
AM
286+ if (flags & AST_COMP)
287+ deliver_ast(lkb, AST_COMP);
4bf12011 288+
10d56c87
AM
289+ if (flags & AST_BAST)
290+ deliver_ast(lkb, AST_BAST);
4bf12011 291+
5cdbd17b 292+ if (flags & AST_DEL) {
10d56c87
AM
293+ struct dlm_rsb *rsb = lkb->lkb_resource;
294+ struct dlm_ls *ls = rsb->res_ls;
4bf12011 295+
10d56c87 296+ DLM_ASSERT(lkb->lkb_astflags == 0,
5cdbd17b 297+ printk("%x %x\n", lkb->lkb_id, lkb->lkb_astflags););
4bf12011 298+
5cdbd17b
AM
299+ down_read(&ls->ls_in_recovery);
300+ release_lkb(ls, lkb);
301+ release_rsb(rsb);
302+ up_read(&ls->ls_in_recovery);
303+ }
304+
305+ schedule();
4bf12011 306+ }
4bf12011 307+}
308+
10d56c87 309+void lockqueue_lkb_mark(struct dlm_ls *ls)
4bf12011 310+{
10d56c87 311+ struct dlm_lkb *lkb, *safe;
4bf12011 312+ int count = 0;
313+
314+ log_all(ls, "mark waiting requests");
315+
316+ down(&_lockqueue_lock);
317+
318+ list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) {
319+
320+ if (lkb->lkb_resource->res_ls != ls)
321+ continue;
322+
323+ /*
324+ * These lkb's are new and the master is being looked up. Mark
325+ * the lkb request to be resent. Even if the destination node
326+ * for the request is still living and has our request, it will
327+ * purge all resdir requests in purge_requestqueue. If there's
328+ * a reply to the LOOKUP request in our requestqueue (the reply
329+ * arrived after ls_stop), it is invalid and will be discarded
330+ * in purge_requestqueue, too.
331+ */
332+
333+ if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) {
10d56c87
AM
334+ DLM_ASSERT(lkb->lkb_nodeid == -1,
335+ print_lkb(lkb);
336+ print_rsb(lkb->lkb_resource););
4bf12011 337+
338+ lkb->lkb_flags |= GDLM_LKFLG_LQRESEND;
339+ count++;
340+ continue;
341+ }
342+
343+ /*
344+ * These lkb's have an outstanding request to a bygone node.
345+ * The request will be redirected to the new master node in
346+ * resend_cluster_requests(). Don't mark the request for
347+ * resending if there's a reply for it saved in the
348+ * requestqueue.
349+ */
350+
351+ if (in_nodes_gone(ls, lkb->lkb_nodeid) &&
352+ !reply_in_requestqueue(ls, lkb->lkb_id)) {
353+
354+ lkb->lkb_flags |= GDLM_LKFLG_LQRESEND;
355+
356+ /*
357+ * Don't rebuild this lkb on a new rsb in
358+ * rebuild_rsbs_send().
359+ */
360+
10d56c87
AM
361+ if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONDGRANT) {
362+ DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_WAITING,
363+ print_lkb(lkb);
364+ print_rsb(lkb->lkb_resource););
4bf12011 365+ lkb->lkb_flags |= GDLM_LKFLG_NOREBUILD;
366+ }
367+
368+ /*
369+ * This flag indicates to the new master that his lkb
370+ * is in the midst of a convert request and should be
371+ * placed on the granted queue rather than the convert
372+ * queue. We will resend this convert request to the
373+ * new master.
374+ */
375+
10d56c87
AM
376+ else if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONVERT) {
377+ DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT,
378+ print_lkb(lkb);
379+ print_rsb(lkb->lkb_resource););
4bf12011 380+ lkb->lkb_flags |= GDLM_LKFLG_LQCONVERT;
381+ }
382+
383+ count++;
384+ }
385+ }
386+ up(&_lockqueue_lock);
387+
388+ log_all(ls, "marked %d requests", count);
389+}
390+
10d56c87 391+int resend_cluster_requests(struct dlm_ls *ls)
4bf12011 392+{
10d56c87 393+ struct dlm_lkb *lkb, *safe;
4bf12011 394+ int error = 0, state, count = 0;
395+
396+ log_all(ls, "resend marked requests");
397+
398+ down(&_lockqueue_lock);
399+
400+ list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) {
401+
402+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) {
403+ log_debug(ls, "resend_cluster_requests: aborted");
404+ error = -EINTR;
405+ break;
406+ }
407+
408+ if (lkb->lkb_resource->res_ls != ls)
409+ continue;
410+
411+ log_debug(ls, "resend_cluster_requests id=%x nodeid=%d "
412+ "lqstate=%u flags=%x", lkb->lkb_id, lkb->lkb_nodeid,
413+ lkb->lkb_lockqueue_state, lkb->lkb_flags);
414+
415+ /*
416+ * Resend/process the lockqueue lkb's (in-progres requests)
417+ * that were flagged at the start of recovery in
418+ * lockqueue_lkb_mark().
419+ */
420+
421+ if (lkb->lkb_flags & GDLM_LKFLG_LQRESEND) {
422+ lkb->lkb_flags &= ~GDLM_LKFLG_LQRESEND;
423+ lkb->lkb_flags &= ~GDLM_LKFLG_NOREBUILD;
424+ lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT;
425+
426+ if (lkb->lkb_nodeid == -1) {
427+ /*
428+ * Send lookup to new resdir node.
429+ */
430+ lkb->lkb_lockqueue_time = jiffies;
431+ send_cluster_request(lkb,
432+ lkb->lkb_lockqueue_state);
433+ }
434+
435+ else if (lkb->lkb_nodeid != 0) {
436+ /*
437+ * There's a new RSB master (that's not us.)
438+ */
439+ lkb->lkb_lockqueue_time = jiffies;
440+ send_cluster_request(lkb,
441+ lkb->lkb_lockqueue_state);
442+ }
443+
444+ else {
445+ /*
446+ * We are the new RSB master for this lkb
447+ * request.
448+ */
449+ state = lkb->lkb_lockqueue_state;
450+ lkb->lkb_lockqueue_state = 0;
451+ /* list_del equals remove_from_lockqueue() */
452+ list_del(&lkb->lkb_lockqueue);
10d56c87 453+ process_remastered_lkb(ls, lkb, state);
4bf12011 454+ }
455+
456+ count++;
457+ }
458+ }
459+ up(&_lockqueue_lock);
460+
461+ log_all(ls, "resent %d requests", count);
462+ return error;
463+}
464+
465+/*
466+ * Process any LKBs on the Lock queue, this
467+ * just looks at the entries to see if they have been
468+ * on the queue too long and fails the requests if so.
469+ */
470+
471+static void process_lockqueue(void)
472+{
10d56c87
AM
473+ struct dlm_lkb *lkb, *safe;
474+ struct dlm_ls *ls;
4bf12011 475+ int count = 0;
476+
477+ down(&_lockqueue_lock);
478+
479+ list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) {
480+ ls = lkb->lkb_resource->res_ls;
481+
482+ if (test_bit(LSFL_NOTIMERS, &ls->ls_flags))
483+ continue;
484+
485+ /* Don't time out locks that are in transition */
486+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags))
487+ continue;
488+
489+ if (check_timeout(lkb->lkb_lockqueue_time,
490+ dlm_config.lock_timeout)) {
491+ count++;
492+ list_del(&lkb->lkb_lockqueue);
493+ up(&_lockqueue_lock);
494+ cancel_lockop(lkb, -ETIMEDOUT);
495+ down(&_lockqueue_lock);
496+ }
497+ }
498+ up(&_lockqueue_lock);
499+
500+ if (count)
501+ wake_astd();
502+
503+ if (atomic_read(&_astd_running))
504+ mod_timer(&_lockqueue_timer,
505+ jiffies + ((dlm_config.lock_timeout >> 1) * HZ));
506+}
507+
508+/* Look for deadlocks */
509+static void process_deadlockqueue(void)
510+{
10d56c87 511+ struct dlm_lkb *lkb, *safe;
4bf12011 512+
513+ down(&_deadlockqueue_lock);
514+
515+ list_for_each_entry_safe(lkb, safe, &_deadlockqueue, lkb_deadlockq) {
10d56c87 516+ struct dlm_lkb *kill_lkb;
4bf12011 517+
518+ /* Only look at "due" locks */
519+ if (!check_timeout(lkb->lkb_duetime, dlm_config.deadlocktime))
520+ break;
521+
522+ /* Don't look at locks that are in transition */
523+ if (!test_bit(LSFL_LS_RUN,
524+ &lkb->lkb_resource->res_ls->ls_flags))
525+ continue;
526+
527+ up(&_deadlockqueue_lock);
528+
529+ /* Lock has hit due time, check for conversion deadlock */
530+ kill_lkb = conversion_deadlock_check(lkb);
531+ if (kill_lkb)
532+ cancel_conversion(kill_lkb, -EDEADLOCK);
533+
534+ down(&_deadlockqueue_lock);
535+ }
536+ up(&_deadlockqueue_lock);
537+}
538+
539+static __inline__ int no_asts(void)
540+{
541+ int ret;
542+
543+ down(&_ast_queue_lock);
544+ ret = list_empty(&_ast_queue);
545+ up(&_ast_queue_lock);
546+ return ret;
547+}
548+
549+static void lockqueue_timer_fn(unsigned long arg)
550+{
551+ set_bit(GDLMD_WAKE_TIMER, &_astd_wakeflags);
552+ wake_up(&_astd_waitchan);
553+}
554+
555+/*
556+ * DLM daemon which delivers asts.
557+ */
558+
559+static int dlm_astd(void *data)
560+{
561+ daemonize("dlm_astd");
562+
563+ INIT_LIST_HEAD(&_lockqueue);
564+ init_MUTEX(&_lockqueue_lock);
565+ INIT_LIST_HEAD(&_deadlockqueue);
566+ init_MUTEX(&_deadlockqueue_lock);
567+ INIT_LIST_HEAD(&_ast_queue);
568+ init_MUTEX(&_ast_queue_lock);
569+ init_waitqueue_head(&_astd_waitchan);
570+ complete(&_astd_done);
571+
572+ /*
573+ * Set a timer to check the lockqueue for dead locks (and deadlocks).
574+ */
575+
576+ init_timer(&_lockqueue_timer);
577+ _lockqueue_timer.function = lockqueue_timer_fn;
578+ _lockqueue_timer.data = 0;
579+ mod_timer(&_lockqueue_timer,
580+ jiffies + ((dlm_config.lock_timeout >> 1) * HZ));
581+
582+ while (atomic_read(&_astd_running)) {
583+ wchan_cond_sleep_intr(_astd_waitchan, no_asts());
584+
585+ if (test_and_clear_bit(GDLMD_WAKE_ASTS, &_astd_wakeflags))
586+ process_asts();
587+
588+ if (test_and_clear_bit(GDLMD_WAKE_TIMER, &_astd_wakeflags)) {
589+ process_lockqueue();
590+ if (dlm_config.deadlocktime)
591+ process_deadlockqueue();
592+ }
593+ }
594+
595+ if (timer_pending(&_lockqueue_timer))
596+ del_timer(&_lockqueue_timer);
597+
598+ complete(&_astd_done);
599+
600+ return 0;
601+}
602+
603+void wake_astd(void)
604+{
605+ set_bit(GDLMD_WAKE_ASTS, &_astd_wakeflags);
606+ wake_up(&_astd_waitchan);
607+}
608+
609+int astd_start()
610+{
611+ init_completion(&_astd_done);
612+ atomic_set(&_astd_running, 1);
613+ _astd_pid = kernel_thread(dlm_astd, NULL, 0);
614+ wait_for_completion(&_astd_done);
615+ return 0;
616+}
617+
618+void astd_stop()
619+{
620+ atomic_set(&_astd_running, 0);
621+ wake_astd();
622+ wait_for_completion(&_astd_done);
623+}
624diff -urN linux-orig/cluster/dlm/ast.h linux-patched/cluster/dlm/ast.h
625--- linux-orig/cluster/dlm/ast.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 626+++ linux-patched/cluster/dlm/ast.h 2004-07-13 18:57:22.000000000 +0800
5cdbd17b 627@@ -0,0 +1,28 @@
4bf12011 628+/******************************************************************************
629+*******************************************************************************
630+**
631+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
632+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
633+**
634+** This copyrighted material is made available to anyone wishing to use,
635+** modify, copy, or redistribute it subject to the terms and conditions
636+** of the GNU General Public License v.2.
637+**
638+*******************************************************************************
639+******************************************************************************/
640+
641+#ifndef __AST_DOT_H__
642+#define __AST_DOT_H__
643+
10d56c87
AM
644+void lockqueue_lkb_mark(struct dlm_ls *ls);
645+int resend_cluster_requests(struct dlm_ls *ls);
646+void add_to_lockqueue(struct dlm_lkb *lkb);
647+void remove_from_lockqueue(struct dlm_lkb *lkb);
648+void add_to_deadlockqueue(struct dlm_lkb *lkb);
649+void remove_from_deadlockqueue(struct dlm_lkb *lkb);
650+void queue_ast(struct dlm_lkb *lkb, uint16_t astflags, uint8_t rqmode);
4bf12011 651+void wake_astd(void);
652+int astd_start(void);
653+void astd_stop(void);
654+
655+#endif /* __AST_DOT_H__ */
656diff -urN linux-orig/cluster/dlm/config.c linux-patched/cluster/dlm/config.c
657--- linux-orig/cluster/dlm/config.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
658+++ linux-patched/cluster/dlm/config.c 2004-07-13 18:57:22.000000000 +0800
659@@ -0,0 +1,131 @@
4bf12011 660+/******************************************************************************
661+*******************************************************************************
662+**
663+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
664+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
665+**
666+** This copyrighted material is made available to anyone wishing to use,
667+** modify, copy, or redistribute it subject to the terms and conditions
668+** of the GNU General Public License v.2.
669+**
670+*******************************************************************************
671+******************************************************************************/
672+
673+#include <linux/module.h>
674+#include <linux/proc_fs.h>
675+
676+#include "dlm_internal.h"
677+#include "lowcomms.h"
678+#include "config.h"
679+
680+/* Config file defaults */
681+#define DEFAULT_TCP_PORT 21064
682+#define DEFAULT_LOCK_TIMEOUT 30
683+#define DEFAULT_BUFFER_SIZE 4096
10d56c87
AM
684+#define DEFAULT_RSBTBL_SIZE 256
685+#define DEFAULT_LKBTBL_SIZE 1024
686+#define DEFAULT_DIRTBL_SIZE 512
4bf12011 687+#define DEFAULT_MAX_CONNECTIONS 128
688+#define DEFAULT_DEADLOCKTIME 10
689+
690+struct config_info dlm_config = {
691+ .tcp_port = DEFAULT_TCP_PORT,
692+ .lock_timeout = DEFAULT_LOCK_TIMEOUT,
693+ .buffer_size = DEFAULT_BUFFER_SIZE,
10d56c87
AM
694+ .rsbtbl_size = DEFAULT_RSBTBL_SIZE,
695+ .lkbtbl_size = DEFAULT_LKBTBL_SIZE,
696+ .dirtbl_size = DEFAULT_DIRTBL_SIZE,
4bf12011 697+ .max_connections = DEFAULT_MAX_CONNECTIONS,
698+ .deadlocktime = DEFAULT_DEADLOCKTIME,
699+};
700+
701+
702+static struct config_proc_info {
703+ char *name;
704+ int *value;
705+} config_proc[] = {
706+ {
707+ .name = "tcp_port",
708+ .value = &dlm_config.tcp_port,
709+ },
710+ {
711+ .name = "lock_timeout",
712+ .value = &dlm_config.lock_timeout,
713+ },
714+ {
715+ .name = "buffer_size",
716+ .value = &dlm_config.buffer_size,
717+ },
718+ {
10d56c87
AM
719+ .name = "rsbtbl_size",
720+ .value = &dlm_config.rsbtbl_size,
4bf12011 721+ },
722+ {
10d56c87
AM
723+ .name = "lkbtbl_size",
724+ .value = &dlm_config.lkbtbl_size,
725+ },
726+ {
727+ .name = "dirtbl_size",
728+ .value = &dlm_config.dirtbl_size,
4bf12011 729+ },
730+ {
731+ .name = "max_connections",
732+ .value = &dlm_config.max_connections,
733+ },
734+ {
735+ .name = "deadlocktime",
736+ .value = &dlm_config.deadlocktime,
10d56c87 737+ }
4bf12011 738+};
739+static struct proc_dir_entry *dlm_dir;
740+
741+static int dlm_config_read_proc(char *page, char **start, off_t off, int count,
742+ int *eof, void *data)
743+{
744+ struct config_proc_info *cinfo = data;
745+ return snprintf(page, count, "%d\n", *cinfo->value);
746+}
747+
748+static int dlm_config_write_proc(struct file *file, const char *buffer,
749+ unsigned long count, void *data)
750+{
751+ struct config_proc_info *cinfo = data;
752+ int value;
753+ char *end;
754+
755+ value = simple_strtoul(buffer, &end, 10);
756+ if (*end)
757+ *cinfo->value = value;
758+ return count;
759+}
760+
761+int dlm_config_init(void)
762+{
763+ int i;
764+ struct proc_dir_entry *pde;
765+
766+ dlm_dir = proc_mkdir("cluster/config/dlm", 0);
767+ if (!dlm_dir)
768+ return -1;
769+
770+ dlm_dir->owner = THIS_MODULE;
771+
772+ for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) {
773+ pde = create_proc_entry(config_proc[i].name, 0660, dlm_dir);
774+ if (pde) {
775+ pde->data = &config_proc[i];
776+ pde->write_proc = dlm_config_write_proc;
777+ pde->read_proc = dlm_config_read_proc;
778+ }
779+ }
780+ return 0;
781+}
782+
783+void dlm_config_exit(void)
784+{
785+ int i;
786+
787+ for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++)
788+ remove_proc_entry(config_proc[i].name, dlm_dir);
789+ remove_proc_entry("cluster/config/dlm", NULL);
790+}
791diff -urN linux-orig/cluster/dlm/config.h linux-patched/cluster/dlm/config.h
792--- linux-orig/cluster/dlm/config.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
793+++ linux-patched/cluster/dlm/config.h 2004-07-13 18:57:22.000000000 +0800
794@@ -0,0 +1,32 @@
4bf12011 795+/******************************************************************************
796+*******************************************************************************
797+**
798+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
799+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
800+**
801+** This copyrighted material is made available to anyone wishing to use,
802+** modify, copy, or redistribute it subject to the terms and conditions
803+** of the GNU General Public License v.2.
804+**
805+*******************************************************************************
806+******************************************************************************/
807+
808+#ifndef __CONFIG_DOT_H__
809+#define __CONFIG_DOT_H__
810+
811+struct config_info {
812+ int tcp_port;
813+ int lock_timeout;
814+ int buffer_size;
10d56c87
AM
815+ int rsbtbl_size;
816+ int lkbtbl_size;
817+ int dirtbl_size;
4bf12011 818+ int max_connections;
819+ int deadlocktime;
820+};
821+
822+extern struct config_info dlm_config;
823+extern int dlm_config_init(void);
824+extern void dlm_config_exit(void);
825+
826+#endif /* __CONFIG_DOT_H__ */
827diff -urN linux-orig/cluster/dlm/device.c linux-patched/cluster/dlm/device.c
828--- linux-orig/cluster/dlm/device.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 829+++ linux-patched/cluster/dlm/device.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 830@@ -0,0 +1,1020 @@
831+/******************************************************************************
832+*******************************************************************************
833+**
834+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
835+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
836+**
837+** This copyrighted material is made available to anyone wishing to use,
838+** modify, copy, or redistribute it subject to the terms and conditions
839+** of the GNU General Public License v.2.
840+**
841+*******************************************************************************
842+******************************************************************************/
843+
844+/*
845+ * device.c
846+ *
847+ * This is the userland interface to the DLM.
848+ *
849+ * The locking is done via a misc char device (find the
850+ * registered minor number in /proc/misc).
851+ *
852+ * User code should not use this interface directly but
853+ * call the library routines in libdlm.a instead.
854+ *
855+ */
856+
857+#include <linux/miscdevice.h>
858+#include <linux/init.h>
859+#include <linux/wait.h>
860+#include <linux/module.h>
861+#include <linux/file.h>
862+#include <linux/fs.h>
863+#include <linux/poll.h>
864+#include <linux/signal.h>
865+#include <linux/spinlock.h>
866+#include <asm/ioctls.h>
867+
868+#include "dlm_internal.h"
869+#include "device.h"
870+
10d56c87 871+extern struct dlm_lkb *dlm_get_lkb(struct dlm_ls *, int);
4bf12011 872+static struct file_operations _dlm_fops;
873+static const char *name_prefix="dlm";
874+static struct list_head user_ls_list;
875+
876+/* Flags in li_flags */
877+#define LI_FLAG_COMPLETE 1
878+#define LI_FLAG_FIRSTLOCK 2
879+
880+struct lock_info {
881+ uint8_t li_cmd;
882+ struct dlm_lksb li_lksb;
883+ wait_queue_head_t li_waitq;
884+ unsigned long li_flags;
885+ void __user *li_astparam;
886+ void __user *li_astaddr;
887+ void __user *li_bastaddr;
888+ struct file_info *li_file;
889+ struct dlm_lksb __user *li_user_lksb;
890+ struct semaphore li_firstlock;
891+ struct dlm_queryinfo *li_queryinfo;
892+ struct dlm_queryinfo __user *li_user_queryinfo;
893+};
894+
895+/* A queued AST no less */
896+struct ast_info {
897+ struct dlm_lock_result result;
898+ struct dlm_queryinfo *queryinfo;
899+ struct dlm_queryinfo __user *user_queryinfo;
900+ struct list_head list;
901+};
902+
903+/* One of these per userland lockspace */
904+struct user_ls {
905+ void *ls_lockspace;
906+ atomic_t ls_refcnt;
907+ long ls_flags; /* bit 1 means LS has been deleted */
908+
909+ /* Passed into misc_register() */
910+ struct miscdevice ls_miscinfo;
911+ struct list_head ls_list;
912+};
913+
914+/* misc_device info for the control device */
915+static struct miscdevice ctl_device;
916+
917+/*
918+ * Stuff we hang off the file struct.
919+ * The first two are to cope with unlocking all the
920+ * locks help by a process when it dies.
921+ */
922+struct file_info {
923+ struct list_head fi_lkb_list; /* List of active lkbs */
924+ spinlock_t fi_lkb_lock;
925+ struct list_head fi_ast_list; /* Queue of ASTs to be delivered */
926+ spinlock_t fi_ast_lock;
927+ wait_queue_head_t fi_wait;
928+ struct user_ls *fi_ls;
929+ atomic_t fi_refcnt; /* Number of users */
930+ unsigned long fi_flags; /* Bit 1 means the device is open */
931+};
932+
933+
934+/* get and put ops for file_info.
935+ Actually I don't really like "get" and "put", but everyone
936+ else seems to use them and I can't think of anything
937+ nicer at the moment */
938+static void get_file_info(struct file_info *f)
939+{
940+ atomic_inc(&f->fi_refcnt);
941+}
942+
943+static void put_file_info(struct file_info *f)
944+{
945+ if (atomic_dec_and_test(&f->fi_refcnt))
946+ kfree(f);
947+}
948+
949+/* Find a lockspace struct given the device minor number */
950+static struct user_ls *find_lockspace(int minor)
951+{
952+ struct user_ls *lsinfo;
953+
954+ list_for_each_entry(lsinfo, &user_ls_list, ls_list) {
955+
956+ if (lsinfo->ls_miscinfo.minor == minor)
957+ return lsinfo;
958+ }
959+ return NULL;
960+}
961+
962+static void add_lockspace_to_list(struct user_ls *lsinfo)
963+{
964+ list_add(&lsinfo->ls_list, &user_ls_list);
965+}
966+
967+/* Register a lockspace with the DLM and create a misc
968+ device for userland to access it */
969+static int register_lockspace(char *name, struct user_ls **ls)
970+{
971+ struct user_ls *newls;
972+ int status;
973+ int namelen;
974+
975+ namelen = strlen(name)+strlen(name_prefix)+2;
976+
977+ newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL);
978+ if (!newls)
979+ return -ENOMEM;
980+ memset(newls, 0, sizeof(struct user_ls));
981+
982+ newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL);
983+ if (!newls->ls_miscinfo.name) {
984+ kfree(newls);
985+ return -ENOMEM;
986+ }
987+ snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", name_prefix, name);
988+
989+ status = dlm_new_lockspace((char *)newls->ls_miscinfo.name+strlen(name_prefix)+1,
990+ strlen(newls->ls_miscinfo.name) - strlen(name_prefix) - 1,
991+ &newls->ls_lockspace, 0);
992+
993+ if (status != 0) {
994+ kfree(newls->ls_miscinfo.name);
995+ kfree(newls);
996+ return status;
997+ }
998+
999+ newls->ls_miscinfo.fops = &_dlm_fops;
1000+ newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR;
1001+
1002+ status = misc_register(&newls->ls_miscinfo);
1003+ if (status) {
1004+ log_print("failed to register misc device for %s", name);
1005+ dlm_release_lockspace(newls->ls_lockspace, 0);
1006+ kfree(newls->ls_miscinfo.name);
1007+ kfree(newls);
1008+ return status;
1009+ }
1010+
1011+
1012+ add_lockspace_to_list(newls);
1013+ *ls = newls;
1014+ return 0;
1015+}
1016+
1017+static int unregister_lockspace(struct user_ls *lsinfo, int force)
1018+{
1019+ int status;
1020+
1021+ status = dlm_release_lockspace(lsinfo->ls_lockspace, force);
1022+ if (status)
1023+ return status;
1024+
1025+ status = misc_deregister(&lsinfo->ls_miscinfo);
1026+ if (status)
1027+ return status;
1028+
1029+ list_del(&lsinfo->ls_list);
1030+ kfree(lsinfo->ls_miscinfo.name);
1031+ kfree(lsinfo);
1032+
1033+ return 0;
1034+}
1035+
1036+/* Add it to userland's AST queue */
1037+static void add_to_astqueue(struct lock_info *li, void *astaddr)
1038+{
1039+ struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL);
1040+ if (!ast)
1041+ return;
1042+
1043+ ast->result.astparam = li->li_astparam;
1044+ ast->result.astaddr = astaddr;
1045+ ast->result.user_lksb = li->li_user_lksb;
1046+ ast->result.cmd = li->li_cmd;
1047+ memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb));
1048+
1049+ /* These two will both be NULL for anything other than queries */
1050+ ast->queryinfo = li->li_queryinfo;
1051+ ast->user_queryinfo = li->li_user_queryinfo;
1052+
1053+ spin_lock(&li->li_file->fi_ast_lock);
1054+ list_add_tail(&ast->list, &li->li_file->fi_ast_list);
1055+ spin_unlock(&li->li_file->fi_ast_lock);
1056+ wake_up_interruptible(&li->li_file->fi_wait);
1057+}
1058+
1059+static void bast_routine(void *param, int mode)
1060+{
1061+ struct lock_info *li = param;
1062+
1063+ if (param) {
1064+ add_to_astqueue(li, li->li_bastaddr);
1065+ }
1066+}
1067+
1068+/*
1069+ * This is the kernel's AST routine.
1070+ * All lock, unlock & query operations complete here.
1071+ * The only syncronous ops are those done during device close.
1072+ */
1073+static void ast_routine(void *param)
1074+{
1075+ struct lock_info *li = param;
1076+
1077+ /* Param may be NULL if a persistent lock is unlocked by someone else */
1078+ if (!param)
1079+ return;
1080+
1081+ /* If it's an async request then post data to the user's AST queue. */
1082+ if (li->li_astaddr) {
1083+
1084+ /* Only queue AST if the device is still open */
1085+ if (test_bit(1, &li->li_file->fi_flags))
1086+ add_to_astqueue(li, li->li_astaddr);
1087+
1088+ /* If it's a new lock operation that failed, then
1089+ * remove it from the owner queue and free the
1090+ * lock_info. The DLM will not free the LKB until this
1091+ * AST has completed.
1092+ */
1093+ if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) &&
1094+ li->li_lksb.sb_status != 0) {
10d56c87 1095+ struct dlm_lkb *lkb;
4bf12011 1096+
1097+ /* Wait till dlm_lock() has finished */
1098+ down(&li->li_firstlock);
1099+ lkb = dlm_get_lkb(li->li_file->fi_ls->ls_lockspace, li->li_lksb.sb_lkid);
1100+ if (lkb) {
1101+ spin_lock(&li->li_file->fi_lkb_lock);
1102+ list_del(&lkb->lkb_ownerqueue);
1103+ spin_unlock(&li->li_file->fi_lkb_lock);
1104+ }
1105+ up(&li->li_firstlock);
1106+ put_file_info(li->li_file);
1107+ kfree(li);
1108+ return;
1109+ }
1110+ /* Free unlocks & queries */
1111+ if (li->li_lksb.sb_status == -DLM_EUNLOCK ||
1112+ li->li_cmd == DLM_USER_QUERY) {
1113+ put_file_info(li->li_file);
1114+ kfree(li);
1115+ }
1116+ }
1117+ else {
1118+ /* Syncronous request, just wake up the caller */
1119+ set_bit(LI_FLAG_COMPLETE, &li->li_flags);
1120+ wake_up_interruptible(&li->li_waitq);
1121+ }
1122+}
1123+
1124+/*
1125+ * Wait for the lock op to complete and return the status.
1126+ */
1127+static int wait_for_ast(struct lock_info *li)
1128+{
1129+ /* Wait for the AST routine to complete */
1130+ set_task_state(current, TASK_INTERRUPTIBLE);
1131+ while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags))
1132+ schedule();
1133+
1134+ set_task_state(current, TASK_RUNNING);
1135+
1136+ return li->li_lksb.sb_status;
1137+}
1138+
1139+
1140+/* Open on control device */
1141+static int dlm_ctl_open(struct inode *inode, struct file *file)
1142+{
1143+ return 0;
1144+}
1145+
1146+/* Close on control device */
1147+static int dlm_ctl_close(struct inode *inode, struct file *file)
1148+{
1149+ return 0;
1150+}
1151+
1152+/* Open on lockspace device */
1153+static int dlm_open(struct inode *inode, struct file *file)
1154+{
1155+ struct file_info *f;
1156+ struct user_ls *lsinfo;
1157+
1158+ lsinfo = find_lockspace(iminor(inode));
1159+ if (!lsinfo)
1160+ return -ENOENT;
1161+
1162+ f = kmalloc(sizeof(struct file_info), GFP_KERNEL);
1163+ if (!f)
1164+ return -ENOMEM;
1165+
1166+ atomic_inc(&lsinfo->ls_refcnt);
1167+ INIT_LIST_HEAD(&f->fi_lkb_list);
1168+ INIT_LIST_HEAD(&f->fi_ast_list);
1169+ spin_lock_init(&f->fi_ast_lock);
1170+ spin_lock_init(&f->fi_lkb_lock);
1171+ init_waitqueue_head(&f->fi_wait);
1172+ f->fi_ls = lsinfo;
1173+ atomic_set(&f->fi_refcnt, 1);
1174+ set_bit(1, &f->fi_flags);
1175+
1176+ file->private_data = f;
1177+
1178+ return 0;
1179+}
1180+
1181+/* Check the user's version matches ours */
1182+static int check_version(struct dlm_lock_params *params)
1183+{
1184+ if (params->version[0] != DLM_DEVICE_VERSION_MAJOR ||
1185+ (params->version[0] == DLM_DEVICE_VERSION_MAJOR &&
1186+ params->version[1] > DLM_DEVICE_VERSION_MINOR)) {
1187+
1188+ log_print("version mismatch user (%d.%d.%d) kernel (%d.%d.%d)",
1189+ params->version[0],
1190+ params->version[1],
1191+ params->version[2],
1192+ DLM_DEVICE_VERSION_MAJOR,
1193+ DLM_DEVICE_VERSION_MINOR,
1194+ DLM_DEVICE_VERSION_PATCH);
1195+ return -EINVAL;
1196+ }
1197+ return 0;
1198+}
1199+
1200+/* Close on lockspace device */
1201+static int dlm_close(struct inode *inode, struct file *file)
1202+{
1203+ struct file_info *f = file->private_data;
1204+ struct lock_info li;
1205+ sigset_t tmpsig;
1206+ sigset_t allsigs;
10d56c87 1207+ struct dlm_lkb *lkb, *safe;
4bf12011 1208+ struct user_ls *lsinfo;
1209+ DECLARE_WAITQUEUE(wq, current);
1210+
1211+ lsinfo = find_lockspace(iminor(inode));
1212+ if (!lsinfo)
1213+ return -ENOENT;
1214+
1215+ /* Mark this closed so that ASTs will not be delivered any more */
1216+ clear_bit(1, &f->fi_flags);
1217+
1218+ /* Block signals while we are doing this */
1219+ sigfillset(&allsigs);
1220+ sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
1221+
1222+ /* We use our own lock_info struct here, so that any
1223+ * outstanding "real" ASTs will be delivered with the
1224+ * corresponding "real" params, thus freeing the lock_info
1225+ * that belongs the lock. This catches the corner case where
1226+ * a lock is BUSY when we try to unlock it here
1227+ */
1228+ memset(&li, 0, sizeof(li));
1229+ clear_bit(LI_FLAG_COMPLETE, &li.li_flags);
1230+ init_waitqueue_head(&li.li_waitq);
1231+ add_wait_queue(&li.li_waitq, &wq);
1232+
1233+ /*
1234+ * Free any outstanding locks, they are on the
1235+ * list in LIFO order so there should be no problems
1236+ * about unlocking parents before children.
1237+ * Although we don't remove the lkbs from the list here
1238+ * (what would be the point?), foreach_safe is needed
1239+ * because the lkbs are freed during dlm_unlock operations
1240+ */
1241+ list_for_each_entry_safe(lkb, safe, &f->fi_lkb_list, lkb_ownerqueue) {
1242+ int status;
1243+ int lock_status;
1244+ int flags = 0;
1245+ struct lock_info *old_li;
1246+
1247+ /* Make a copy of this pointer. If all goes well we will
1248+ * free it later. if not it will be left to the AST routine
1249+ * to tidy up
1250+ */
1251+ old_li = (struct lock_info *)lkb->lkb_astparam;
1252+
1253+ /* Don't unlock persistent locks */
1254+ if (lkb->lkb_flags & GDLM_LKFLG_PERSISTENT) {
1255+ list_del(&lkb->lkb_ownerqueue);
1256+
1257+ /* But tidy our references in it */
1258+ kfree(old_li);
1259+ lkb->lkb_astparam = (long)NULL;
1260+ put_file_info(f);
1261+ continue;
1262+ }
1263+
1264+ clear_bit(LI_FLAG_COMPLETE, &li.li_flags);
1265+
1266+ /* If it's not granted then cancel the request.
1267+ * If the lock was WAITING then it will be dropped,
1268+ * if it was converting then it will be reverted to GRANTED,
1269+ * then we will unlock it.
1270+ */
1271+ lock_status = lkb->lkb_status;
1272+
1273+ if (lock_status != GDLM_LKSTS_GRANTED)
1274+ flags = DLM_LKF_CANCEL;
1275+
1276+ status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, flags, &li.li_lksb, &li);
1277+
1278+ /* Must wait for it to complete as the next lock could be its
1279+ * parent */
1280+ if (status == 0)
1281+ wait_for_ast(&li);
1282+
1283+ /* If it was waiting for a conversion, it will
1284+ now be granted so we can unlock it properly */
1285+ if (lock_status == GDLM_LKSTS_CONVERT) {
1286+
1287+ clear_bit(LI_FLAG_COMPLETE, &li.li_flags);
1288+ status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, 0, &li.li_lksb, &li);
1289+
1290+ if (status == 0)
1291+ wait_for_ast(&li);
1292+ }
1293+ /* Unlock suceeded, free the lock_info struct. */
1294+ if (status == 0) {
1295+ kfree(old_li);
1296+ put_file_info(f);
1297+ }
1298+ }
1299+
1300+ remove_wait_queue(&li.li_waitq, &wq);
1301+
1302+ /* If this is the last reference, and the lockspace has been deleted
1303+ the free the struct */
1304+ if (atomic_dec_and_test(&lsinfo->ls_refcnt) && !lsinfo->ls_lockspace) {
1305+ kfree(lsinfo);
1306+ }
1307+
1308+ /* Restore signals */
1309+ sigprocmask(SIG_SETMASK, &tmpsig, NULL);
1310+ recalc_sigpending();
1311+
1312+ return 0;
1313+}
1314+
1315+/*
1316+ * ioctls to create/remove lockspaces, and check how many
1317+ * outstanding ASTs there are against a particular LS.
1318+ */
1319+static int dlm_ioctl(struct inode *inode, struct file *file,
1320+ uint command, ulong u)
1321+{
1322+ struct file_info *fi = file->private_data;
1323+ int status = -EINVAL;
1324+ int count;
1325+ struct list_head *tmp_list;
1326+
1327+ switch (command) {
1328+
1329+ /* Are there any ASTs for us to read?
1330+ * Warning, this returns the number of messages (ASTs)
1331+ * in the queue, NOT the number of bytes to read
1332+ */
1333+ case FIONREAD:
1334+ count = 0;
1335+ spin_lock(&fi->fi_ast_lock);
1336+ list_for_each(tmp_list, &fi->fi_ast_list)
1337+ count++;
1338+ spin_unlock(&fi->fi_ast_lock);
1339+ status = put_user(count, (int *)u);
1340+ break;
1341+
1342+ default:
1343+ return -ENOTTY;
1344+ }
1345+
1346+ return status;
1347+}
1348+
1349+/*
1350+ * ioctls to create/remove lockspaces.
1351+ */
1352+static int dlm_ctl_ioctl(struct inode *inode, struct file *file,
1353+ uint command, ulong u)
1354+{
1355+ int status = -EINVAL;
1356+ char ls_name[MAX_LS_NAME_LEN];
1357+ struct user_ls *lsinfo;
1358+ int force = 0;
1359+
1360+ switch (command) {
1361+ case DLM_CREATE_LOCKSPACE:
1362+ if (!capable(CAP_SYS_ADMIN))
1363+ return -EPERM;
1364+
1365+ if (strncpy_from_user(ls_name, (char*)u, MAX_LS_NAME_LEN) < 0)
1366+ return -EFAULT;
1367+ status = register_lockspace(ls_name, &lsinfo);
1368+
1369+ /* If it succeeded then return the minor number */
1370+ if (status == 0)
1371+ status = lsinfo->ls_miscinfo.minor;
1372+ break;
1373+
1374+ case DLM_FORCE_RELEASE_LOCKSPACE:
1375+ force = 2;
1376+
1377+ case DLM_RELEASE_LOCKSPACE:
1378+ if (!capable(CAP_SYS_ADMIN))
1379+ return -EPERM;
1380+
1381+ lsinfo = find_lockspace(u);
1382+ if (!lsinfo)
1383+ return -EINVAL;
1384+ status = unregister_lockspace(lsinfo, force);
1385+ break;
1386+
1387+ default:
1388+ return -ENOTTY;
1389+ }
1390+
1391+ return status;
1392+}
1393+
1394+/* Deal with the messy stuff of copying a web of structs
1395+ from kernel space to userspace */
1396+static int copy_query_result(struct ast_info *ast)
1397+{
1398+ int status = -EFAULT;
1399+ struct dlm_queryinfo qi;
1400+
1401+ /* Get the pointers to userspace structs */
1402+ if (copy_from_user(&qi, ast->user_queryinfo,
1403+ sizeof(struct dlm_queryinfo)))
1404+ goto copy_out;
1405+
1406+ /* TODO: does this deref a user pointer? */
1407+ if (put_user(ast->queryinfo->gqi_lockcount,
1408+ &ast->user_queryinfo->gqi_lockcount))
1409+ goto copy_out;
1410+
1411+ if (qi.gqi_resinfo) {
1412+ if (copy_to_user(qi.gqi_resinfo, ast->queryinfo->gqi_resinfo,
1413+ sizeof(struct dlm_resinfo)))
1414+ goto copy_out;
1415+ }
1416+
1417+ if (qi.gqi_lockinfo) {
1418+ if (copy_to_user(qi.gqi_lockinfo, ast->queryinfo->gqi_lockinfo,
1419+ sizeof(struct dlm_lockinfo) * ast->queryinfo->gqi_lockcount))
1420+ goto copy_out;
1421+ }
1422+
1423+ status = 0;
1424+
1425+ if (ast->queryinfo->gqi_lockinfo)
1426+ kfree(ast->queryinfo->gqi_lockinfo);
1427+
1428+ if (ast->queryinfo->gqi_resinfo)
1429+ kfree(ast->queryinfo->gqi_resinfo);
1430+
1431+ kfree(ast->queryinfo);
1432+
1433+ copy_out:
1434+ return status;
1435+}
1436+
1437+/* Read call, might block if no ASTs are waiting.
1438+ * It will only ever return one message at a time, regardless
1439+ * of how many are pending.
1440+ */
1441+static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
1442+{
1443+ struct file_info *fi = file->private_data;
1444+ struct ast_info *ast;
1445+ int ret;
1446+ DECLARE_WAITQUEUE(wait, current);
1447+
1448+ if (count < sizeof(struct dlm_lock_result))
1449+ return -EINVAL;
1450+
1451+ spin_lock(&fi->fi_ast_lock);
1452+ if (list_empty(&fi->fi_ast_list)) {
1453+
1454+ /* No waiting ASTs.
1455+ * Return EOF if the lockspace been deleted.
1456+ */
1457+ if (test_bit(1, &fi->fi_ls->ls_flags))
1458+ return 0;
1459+
1460+ if (file->f_flags & O_NONBLOCK) {
1461+ spin_unlock(&fi->fi_ast_lock);
1462+ return -EAGAIN;
1463+ }
1464+
1465+ add_wait_queue(&fi->fi_wait, &wait);
1466+
1467+ repeat:
1468+ set_current_state(TASK_INTERRUPTIBLE);
1469+ if (list_empty(&fi->fi_ast_list) &&
1470+ !signal_pending(current)) {
1471+
1472+ spin_unlock(&fi->fi_ast_lock);
1473+ schedule();
1474+ spin_lock(&fi->fi_ast_lock);
1475+ goto repeat;
1476+ }
1477+
1478+ current->state = TASK_RUNNING;
1479+ remove_wait_queue(&fi->fi_wait, &wait);
1480+
1481+ if (signal_pending(current)) {
1482+ spin_unlock(&fi->fi_ast_lock);
1483+ return -ERESTARTSYS;
1484+ }
1485+ }
1486+
1487+ ast = list_entry(fi->fi_ast_list.next, struct ast_info, list);
1488+ list_del(&ast->list);
1489+ spin_unlock(&fi->fi_ast_lock);
1490+
1491+ ret = sizeof(struct dlm_lock_result);
1492+ if (copy_to_user(buffer, &ast->result, sizeof(struct dlm_lock_result)))
1493+ ret = -EFAULT;
1494+
1495+ /* If it was a query then copy the result block back here */
1496+ if (ast->queryinfo) {
1497+ int status = copy_query_result(ast);
1498+ if (status)
1499+ ret = status;
1500+ }
1501+
1502+ kfree(ast);
1503+ return ret;
1504+}
1505+
1506+static unsigned int dlm_poll(struct file *file, poll_table *wait)
1507+{
1508+ struct file_info *fi = file->private_data;
1509+
1510+ poll_wait(file, &fi->fi_wait, wait);
1511+
1512+ spin_lock(&fi->fi_ast_lock);
1513+ if (!list_empty(&fi->fi_ast_list)) {
1514+ spin_unlock(&fi->fi_ast_lock);
1515+ return POLLIN | POLLRDNORM;
1516+ }
1517+
1518+ spin_unlock(&fi->fi_ast_lock);
1519+ return 0;
1520+}
1521+
1522+static int do_user_query(struct file_info *fi, struct dlm_lock_params *kparams)
1523+{
1524+ struct lock_info *li;
1525+ int status;
1526+
1527+ li = kmalloc(sizeof(struct lock_info), GFP_KERNEL);
1528+ if (!li)
1529+ return -ENOMEM;
1530+
1531+ get_file_info(fi);
1532+ li->li_user_lksb = kparams->lksb;
1533+ li->li_astparam = kparams->astparam;
1534+ li->li_bastaddr = kparams->bastaddr;
1535+ li->li_astaddr = kparams->astaddr;
1536+ li->li_file = fi;
1537+ li->li_flags = 0;
1538+ li->li_cmd = kparams->cmd;
1539+ clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags);
1540+
1541+ if (copy_from_user(&li->li_lksb, kparams->lksb,
1542+ sizeof(struct dlm_lksb))) {
1543+ kfree(li);
1544+ return -EFAULT;
1545+ }
1546+ li->li_user_queryinfo = (struct dlm_queryinfo *)li->li_lksb.sb_lvbptr;
1547+
1548+ /* Allocate query structs */
1549+ status = -ENOMEM;
1550+ li->li_queryinfo = kmalloc(sizeof(struct dlm_queryinfo), GFP_KERNEL);
1551+ if (!li->li_queryinfo)
1552+ goto out1;
1553+
1554+ /* Mainly to get gqi_lock buffer size */
1555+ if (copy_from_user(li->li_queryinfo, li->li_lksb.sb_lvbptr,
1556+ sizeof(struct dlm_queryinfo))) {
1557+ status = -EFAULT;
1558+ goto out1;
1559+ }
1560+
1561+ /* Overwrite userspace pointers we just copied with kernel space ones */
1562+ if (li->li_queryinfo->gqi_resinfo) {
1563+ li->li_queryinfo->gqi_resinfo = kmalloc(sizeof(struct dlm_resinfo), GFP_KERNEL);
1564+ if (!li->li_queryinfo->gqi_resinfo)
1565+ goto out1;
1566+ }
1567+ if (li->li_queryinfo->gqi_lockinfo) {
1568+ li->li_queryinfo->gqi_lockinfo =
1569+ kmalloc(sizeof(struct dlm_lockinfo) * li->li_queryinfo->gqi_locksize,
1570+ GFP_KERNEL);
1571+ if (!li->li_queryinfo->gqi_lockinfo)
1572+ goto out2;
1573+ }
1574+
1575+ li->li_lksb.sb_lvbptr = (char *)li->li_queryinfo;
1576+
1577+ return dlm_query(fi->fi_ls->ls_lockspace, &li->li_lksb,
1578+ kparams->flags, /* query */
1579+ li->li_queryinfo,
1580+ ast_routine, li);
1581+
1582+ out2:
1583+ kfree(li->li_queryinfo);
1584+
1585+ out1:
1586+ kfree(li);
1587+ return status;
1588+}
1589+
1590+static int do_user_lock(struct file_info *fi, struct dlm_lock_params *kparams,
1591+ const char *buffer)
1592+{
1593+ struct lock_info *li;
1594+ int status;
1595+ char name[DLM_RESNAME_MAXLEN];
1596+
1597+ /*
1598+ * Validate things that we need to have correct.
1599+ */
1600+ if (kparams->namelen > DLM_RESNAME_MAXLEN)
1601+ return -EINVAL;
1602+
1603+ if (!kparams->astaddr)
1604+ return -EINVAL;
1605+
1606+ if (!kparams->lksb)
1607+ return -EINVAL;
1608+
1609+ /* Get the lock name */
1610+ if (copy_from_user(name, buffer + offsetof(struct dlm_lock_params, name),
1611+ kparams->namelen)) {
1612+ return -EFAULT;
1613+ }
1614+
1615+ /* For conversions, the lock will already have a lock_info
1616+ block squirelled away in astparam */
1617+ if (kparams->flags & DLM_LKF_CONVERT) {
10d56c87 1618+ struct dlm_lkb *lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid);
4bf12011 1619+ if (!lkb) {
1620+ return -EINVAL;
1621+ }
1622+ li = (struct lock_info *)lkb->lkb_astparam;
1623+
1624+ /* Only override these if they are provided */
1625+ if (li->li_user_lksb)
1626+ li->li_user_lksb = kparams->lksb;
1627+ if (li->li_astparam)
1628+ li->li_astparam = kparams->astparam;
1629+ if (li->li_bastaddr)
1630+ li->li_bastaddr = kparams->bastaddr;
1631+ if (li->li_bastaddr)
1632+ li->li_astaddr = kparams->astaddr;
1633+ li->li_flags = 0;
1634+ }
1635+ else {
1636+ li = kmalloc(sizeof(struct lock_info), GFP_KERNEL);
1637+ if (!li)
1638+ return -ENOMEM;
1639+
1640+ li->li_user_lksb = kparams->lksb;
1641+ li->li_astparam = kparams->astparam;
1642+ li->li_bastaddr = kparams->bastaddr;
1643+ li->li_astaddr = kparams->astaddr;
1644+ li->li_file = fi;
1645+ li->li_flags = 0;
1646+ li->li_cmd = kparams->cmd;
1647+ li->li_queryinfo = NULL;
1648+
1649+ /* semaphore to allow us to complete our work before
1650+ the AST routine runs. In fact we only need (and use) this
1651+ when the initial lock fails */
1652+ init_MUTEX_LOCKED(&li->li_firstlock);
1653+ set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags);
1654+
1655+ get_file_info(fi);
1656+ }
1657+
1658+ /* Copy the user's LKSB into kernel space,
1659+ needed for conversions & value block operations */
1660+ if (kparams->lksb && copy_from_user(&li->li_lksb, kparams->lksb,
1661+ sizeof(struct dlm_lksb)))
1662+ return -EFAULT;
1663+
1664+ /* Lock it ... */
1665+ status = dlm_lock(fi->fi_ls->ls_lockspace, kparams->mode, &li->li_lksb,
1666+ kparams->flags, name, kparams->namelen,
1667+ kparams->parent,
1668+ ast_routine,
1669+ li,
1670+ li->li_bastaddr ? bast_routine : NULL,
1671+ kparams->range.ra_end ? &kparams->range : NULL);
1672+
1673+ /* If it succeeded (this far) with a new lock then keep track of
1674+ it on the file's lkb list */
1675+ if (!status && !(kparams->flags & DLM_LKF_CONVERT)) {
10d56c87 1676+ struct dlm_lkb *lkb;
4bf12011 1677+ lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, li->li_lksb.sb_lkid);
1678+
1679+ if (lkb) {
1680+ spin_lock(&fi->fi_lkb_lock);
1681+ list_add(&lkb->lkb_ownerqueue,
1682+ &fi->fi_lkb_list);
1683+ spin_unlock(&fi->fi_lkb_lock);
1684+ }
1685+ else {
1686+ log_print("failed to get lkb for new lock");
1687+ }
1688+ up(&li->li_firstlock);
1689+ }
1690+
1691+ return status;
1692+}
1693+
1694+static int do_user_unlock(struct file_info *fi, struct dlm_lock_params *kparams)
1695+{
1696+ struct lock_info *li;
10d56c87 1697+ struct dlm_lkb *lkb;
4bf12011 1698+ int status;
1699+
1700+ lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid);
1701+ if (!lkb) {
1702+ return -EINVAL;
1703+ }
1704+
1705+ li = (struct lock_info *)lkb->lkb_astparam;
1706+
1707+ li->li_user_lksb = kparams->lksb;
1708+ li->li_astparam = kparams->astparam;
1709+ li->li_cmd = kparams->cmd;
1710+
1711+ /* Have to do it here cos the lkb may not exist after
1712+ * dlm_unlock() */
1713+ spin_lock(&fi->fi_lkb_lock);
1714+ list_del(&lkb->lkb_ownerqueue);
1715+ spin_unlock(&fi->fi_lkb_lock);
1716+
1717+ /* Use existing lksb & astparams */
1718+ status = dlm_unlock(fi->fi_ls->ls_lockspace,
1719+ kparams->lkid,
1720+ kparams->flags, NULL, NULL);
1721+
1722+ return status;
1723+}
1724+
1725+/* Write call, submit a locking request */
1726+static ssize_t dlm_write(struct file *file, const char __user *buffer,
1727+ size_t count, loff_t *ppos)
1728+{
1729+ struct file_info *fi = file->private_data;
1730+ struct dlm_lock_params kparams;
1731+ sigset_t tmpsig;
1732+ sigset_t allsigs;
1733+ int status;
1734+
1735+ if (count < sizeof(kparams))
1736+ return -EINVAL;
1737+
1738+ /* Has the lockspace been deleted */
1739+ if (test_bit(1, &fi->fi_ls->ls_flags))
1740+ return -ENOENT;
1741+
1742+ /* Get the command info */
1743+ if (copy_from_user(&kparams, buffer, sizeof(kparams)))
1744+ return -EFAULT;
1745+
1746+ if (check_version(&kparams))
1747+ return -EINVAL;
1748+
1749+ /* Block signals while we are doing this */
1750+ sigfillset(&allsigs);
1751+ sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
1752+
1753+ switch (kparams.cmd)
1754+ {
1755+ case DLM_USER_LOCK:
1756+ status = do_user_lock(fi, &kparams, buffer);
1757+ break;
1758+
1759+ case DLM_USER_UNLOCK:
1760+ status = do_user_unlock(fi, &kparams);
1761+ break;
1762+
1763+ case DLM_USER_QUERY:
1764+ status = do_user_query(fi, &kparams);
1765+ break;
1766+
1767+ default:
1768+ status = -EINVAL;
1769+ break;
1770+ }
1771+ /* Restore signals */
1772+ sigprocmask(SIG_SETMASK, &tmpsig, NULL);
1773+ recalc_sigpending();
1774+
1775+ if (status == 0)
1776+ return count;
1777+ else
1778+ return status;
1779+}
1780+
1781+void dlm_device_free_devices()
1782+{
1783+ struct user_ls *tmp;
1784+ struct user_ls *lsinfo;
1785+
1786+ list_for_each_entry_safe(lsinfo, tmp, &user_ls_list, ls_list) {
1787+ misc_deregister(&lsinfo->ls_miscinfo);
1788+
1789+ /* Tidy up, but don't delete the lsinfo struct until
1790+ all the users have closed their devices */
1791+ list_del(&lsinfo->ls_list);
1792+ kfree(lsinfo->ls_miscinfo.name);
1793+ set_bit(1, &lsinfo->ls_flags); /* LS has been deleted */
1794+ }
1795+}
1796+
1797+static struct file_operations _dlm_fops = {
1798+ .open = dlm_open,
1799+ .release = dlm_close,
1800+ .ioctl = dlm_ioctl,
1801+ .read = dlm_read,
1802+ .write = dlm_write,
1803+ .poll = dlm_poll,
1804+ .owner = THIS_MODULE,
1805+};
1806+
1807+static struct file_operations _dlm_ctl_fops = {
1808+ .open = dlm_ctl_open,
1809+ .release = dlm_ctl_close,
1810+ .ioctl = dlm_ctl_ioctl,
1811+ .owner = THIS_MODULE,
1812+};
1813+
1814+/*
1815+ * Create control device
1816+ */
1817+int dlm_device_init(void)
1818+{
1819+ int r;
1820+
1821+ INIT_LIST_HEAD(&user_ls_list);
1822+
1823+ ctl_device.name = "dlm-control";
1824+ ctl_device.fops = &_dlm_ctl_fops;
1825+ ctl_device.minor = MISC_DYNAMIC_MINOR;
1826+
1827+ r = misc_register(&ctl_device);
1828+ if (r) {
1829+ log_print("misc_register failed for DLM control device");
1830+ return r;
1831+ }
1832+
1833+ return 0;
1834+}
1835+
1836+void dlm_device_exit(void)
1837+{
1838+ misc_deregister(&ctl_device);
1839+}
1840+
1841+/*
1842+ * Overrides for Emacs so that we follow Linus's tabbing style.
1843+ * Emacs will notice this stuff at the end of the file and automatically
1844+ * adjust the settings for this buffer only. This must remain at the end
1845+ * of the file.
1846+ * ---------------------------------------------------------------------------
1847+ * Local variables:
1848+ * c-file-style: "linux"
1849+ * End:
1850+ */
1851diff -urN linux-orig/cluster/dlm/device.h linux-patched/cluster/dlm/device.h
1852--- linux-orig/cluster/dlm/device.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 1853+++ linux-patched/cluster/dlm/device.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 1854@@ -0,0 +1,19 @@
1855+/******************************************************************************
1856+*******************************************************************************
1857+**
1858+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
1859+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
1860+**
1861+** This copyrighted material is made available to anyone wishing to use,
1862+** modify, copy, or redistribute it subject to the terms and conditions
1863+** of the GNU General Public License v.2.
1864+**
1865+*******************************************************************************
1866+******************************************************************************/
1867+
1868+#ifndef __DEVICE_DOT_H__
1869+#define __DEVICE_DOT_H__
1870+
1871+extern void dlm_device_free_devices(void);
1872+
1873+#endif /* __DEVICE_DOT_H__ */
1874diff -urN linux-orig/cluster/dlm/dir.c linux-patched/cluster/dlm/dir.c
1875--- linux-orig/cluster/dlm/dir.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
1876+++ linux-patched/cluster/dlm/dir.c 2004-07-13 18:57:22.000000000 +0800
1877@@ -0,0 +1,427 @@
4bf12011 1878+/******************************************************************************
1879+*******************************************************************************
1880+**
1881+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
1882+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
1883+**
1884+** This copyrighted material is made available to anyone wishing to use,
1885+** modify, copy, or redistribute it subject to the terms and conditions
1886+** of the GNU General Public License v.2.
1887+**
1888+*******************************************************************************
1889+******************************************************************************/
1890+
1891+#include "dlm_internal.h"
1892+#include "nodes.h"
1893+#include "lockspace.h"
1894+#include "lowcomms.h"
1895+#include "reccomms.h"
1896+#include "rsb.h"
1897+#include "config.h"
1898+#include "memory.h"
1899+#include "recover.h"
1900+#include "util.h"
1901+
10d56c87
AM
1902+struct resmov {
1903+ uint32_t rm_nodeid;
1904+ uint16_t rm_length;
1905+ uint16_t rm_pad;
1906+};
1907+
1908+
4bf12011 1909+/*
1910+ * We use the upper 16 bits of the hash value to select the directory node.
1911+ * Low bits are used for distribution of rsb's among hash buckets on each node.
1912+ *
1913+ * From the hash value, we are interested in arriving at a final value between
1914+ * zero and the number of nodes minus one (num_nodes - 1).
1915+ *
1916+ * To accomplish this scaling, we take the nearest power of two larger than
1917+ * num_nodes and subtract one to create a bit mask. The mask is applied to the
1918+ * hash, reducing the range to nearer the final range.
1919+ *
1920+ * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
1921+ * num_nodes to the previously masked hash value.
1922+ *
1923+ * This value in the desired range is used as an offset into the sorted list of
1924+ * nodeid's to give the particular nodeid of the directory node.
1925+ */
1926+
10d56c87 1927+uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length)
4bf12011 1928+{
1929+ struct list_head *tmp;
10d56c87 1930+ struct dlm_csb *csb = NULL;
4bf12011 1931+ uint32_t hash, node, n = 0, nodeid;
1932+
1933+ if (ls->ls_num_nodes == 1) {
1934+ nodeid = our_nodeid();
1935+ goto out;
1936+ }
1937+
10d56c87 1938+ hash = dlm_hash(name, length);
4bf12011 1939+ node = (hash >> 16) & ls->ls_nodes_mask;
1940+ node %= ls->ls_num_nodes;
1941+
1942+ list_for_each(tmp, &ls->ls_nodes) {
1943+ if (n++ != node)
1944+ continue;
10d56c87 1945+ csb = list_entry(tmp, struct dlm_csb, list);
4bf12011 1946+ break;
1947+ }
1948+
10d56c87 1949+ DLM_ASSERT(csb, printk("num_nodes=%u n=%u node=%u mask=%x\n",
4bf12011 1950+ ls->ls_num_nodes, n, node, ls->ls_nodes_mask););
10d56c87 1951+ nodeid = csb->node->nodeid;
4bf12011 1952+
1953+ out:
1954+ return nodeid;
1955+}
1956+
10d56c87 1957+uint32_t get_directory_nodeid(struct dlm_rsb *rsb)
4bf12011 1958+{
1959+ return name_to_directory_nodeid(rsb->res_ls, rsb->res_name,
1960+ rsb->res_length);
1961+}
1962+
10d56c87 1963+static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
4bf12011 1964+{
1965+ uint32_t val;
1966+
10d56c87
AM
1967+ val = dlm_hash(name, len);
1968+ val &= (ls->ls_dirtbl_size - 1);
4bf12011 1969+
1970+ return val;
1971+}
1972+
10d56c87 1973+static void add_resdata_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
4bf12011 1974+{
10d56c87 1975+ uint32_t bucket;
4bf12011 1976+
10d56c87
AM
1977+ bucket = dir_hash(ls, de->name, de->length);
1978+ list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
4bf12011 1979+}
1980+
10d56c87
AM
1981+static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
1982+ int namelen, uint32_t bucket)
4bf12011 1983+{
10d56c87 1984+ struct dlm_direntry *de;
4bf12011 1985+
10d56c87
AM
1986+ list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
1987+ if (de->length == namelen && !memcmp(name, de->name, namelen))
4bf12011 1988+ goto out;
1989+ }
10d56c87 1990+ de = NULL;
4bf12011 1991+ out:
10d56c87 1992+ return de;
4bf12011 1993+}
1994+
10d56c87 1995+void remove_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen)
4bf12011 1996+{
10d56c87 1997+ struct dlm_direntry *de;
4bf12011 1998+ uint32_t bucket;
1999+
10d56c87 2000+ bucket = dir_hash(ls, name, namelen);
4bf12011 2001+
10d56c87 2002+ write_lock(&ls->ls_dirtbl[bucket].lock);
4bf12011 2003+
10d56c87 2004+ de = search_bucket(ls, name, namelen, bucket);
4bf12011 2005+
10d56c87
AM
2006+ if (!de) {
2007+ log_debug(ls, "remove from %u none", nodeid);
4bf12011 2008+ goto out;
2009+ }
2010+
10d56c87
AM
2011+ if (de->master_nodeid != nodeid) {
2012+ log_debug(ls, "remove from %u ID %u",
2013+ nodeid, de->master_nodeid);
4bf12011 2014+ goto out;
2015+ }
2016+
10d56c87
AM
2017+ list_del(&de->list);
2018+ free_resdata(de);
2019+ out:
2020+ write_unlock(&ls->ls_dirtbl[bucket].lock);
4bf12011 2021+}
2022+
10d56c87 2023+void dlm_dir_clear(struct dlm_ls *ls)
4bf12011 2024+{
2025+ struct list_head *head;
10d56c87 2026+ struct dlm_direntry *de;
4bf12011 2027+ int i;
2028+
10d56c87
AM
2029+ for (i = 0; i < ls->ls_dirtbl_size; i++) {
2030+ head = &ls->ls_dirtbl[i].list;
4bf12011 2031+ while (!list_empty(head)) {
10d56c87
AM
2032+ de = list_entry(head->next, struct dlm_direntry, list);
2033+ list_del(&de->list);
2034+ free_resdata(de);
4bf12011 2035+ }
2036+ }
2037+}
2038+
10d56c87 2039+static void resmov_in(struct resmov *rm, char *buf)
4bf12011 2040+{
10d56c87 2041+ struct resmov tmp;
4bf12011 2042+
10d56c87 2043+ memcpy(&tmp, buf, sizeof(struct resmov));
4bf12011 2044+
2045+ rm->rm_nodeid = be32_to_cpu(tmp.rm_nodeid);
2046+ rm->rm_length = be16_to_cpu(tmp.rm_length);
2047+}
2048+
10d56c87 2049+int dlm_dir_rebuild_local(struct dlm_ls *ls)
4bf12011 2050+{
10d56c87
AM
2051+ struct dlm_csb *csb;
2052+ struct dlm_direntry *de;
2053+ struct dlm_rcom *rc;
2054+ struct resmov mov, last_mov;
4bf12011 2055+ char *b, *last_name;
2056+ int error = -ENOMEM, count = 0;
2057+
2058+ log_all(ls, "rebuild resource directory");
2059+
10d56c87 2060+ dlm_dir_clear(ls);
4bf12011 2061+
2062+ rc = allocate_rcom_buffer(ls);
2063+ if (!rc)
2064+ goto out;
2065+
2066+ last_name = (char *) kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
2067+ if (!last_name)
2068+ goto free_rc;
2069+
10d56c87 2070+ list_for_each_entry(csb, &ls->ls_nodes, list) {
4bf12011 2071+ last_mov.rm_length = 0;
2072+ for (;;) {
10d56c87 2073+ error = dlm_recovery_stopped(ls);
4bf12011 2074+ if (error)
2075+ goto free_last;
2076+
2077+ memcpy(rc->rc_buf, last_name, last_mov.rm_length);
2078+ rc->rc_datalen = last_mov.rm_length;
2079+
10d56c87 2080+ error = rcom_send_message(ls, csb->node->nodeid,
4bf12011 2081+ RECCOMM_RECOVERNAMES, rc, 1);
2082+ if (error)
2083+ goto free_last;
2084+
2085+ schedule();
2086+
2087+ /*
2088+ * pick each res out of buffer
2089+ */
2090+
2091+ b = rc->rc_buf;
2092+
2093+ for (;;) {
10d56c87
AM
2094+ resmov_in(&mov, b);
2095+ b += sizeof(struct resmov);
4bf12011 2096+
2097+ /* Length of 0 with a non-zero nodeid marks the
2098+ * end of the list */
2099+ if (!mov.rm_length && mov.rm_nodeid)
2100+ goto done;
2101+
2102+ /* This is just the end of the block */
2103+ if (!mov.rm_length)
2104+ break;
2105+
2106+ error = -ENOMEM;
10d56c87
AM
2107+ de = allocate_resdata(ls, mov.rm_length);
2108+ if (!de)
4bf12011 2109+ goto free_last;
2110+
10d56c87
AM
2111+ de->master_nodeid = mov.rm_nodeid;
2112+ de->length = mov.rm_length;
4bf12011 2113+
10d56c87 2114+ memcpy(de->name, b, mov.rm_length);
4bf12011 2115+ b += mov.rm_length;
2116+
10d56c87 2117+ add_resdata_to_hash(ls, de);
4bf12011 2118+ count++;
2119+
2120+ last_mov = mov;
2121+ memset(last_name, 0, DLM_RESNAME_MAXLEN);
10d56c87 2122+ memcpy(last_name, de->name, de->length);
4bf12011 2123+ }
2124+ }
2125+ done:
2126+ ;
2127+ }
2128+
2129+ set_bit(LSFL_RESDIR_VALID, &ls->ls_flags);
2130+ error = 0;
2131+
2132+ log_all(ls, "rebuilt %d resources", count);
2133+
2134+ free_last:
2135+ kfree(last_name);
2136+
2137+ free_rc:
2138+ free_rcom_buffer(rc);
2139+
2140+ out:
2141+ return error;
2142+}
2143+
2144+/*
10d56c87 2145+ * The reply end of dlm_dir_rebuild_local/RECOVERNAMES. Collect and send as
4bf12011 2146+ * many resource names as can fit in the buffer.
2147+ */
2148+
10d56c87
AM
2149+int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen,
2150+ char *outbuf, int outlen, uint32_t nodeid)
4bf12011 2151+{
2152+ struct list_head *list;
10d56c87 2153+ struct dlm_rsb *start_rsb = NULL, *rsb;
4bf12011 2154+ int offset = 0, start_namelen, error;
2155+ char *start_name;
10d56c87 2156+ struct resmov tmp;
4bf12011 2157+ uint32_t dir_nodeid;
2158+
2159+ /*
2160+ * Find the rsb where we left off (or start again)
2161+ */
2162+
2163+ start_namelen = inlen;
2164+ start_name = inbuf;
2165+
2166+ if (start_namelen > 1) {
2167+ error = find_or_create_rsb(ls, NULL, start_name,
2168+ start_namelen, 0, &start_rsb);
10d56c87 2169+ DLM_ASSERT(!error && start_rsb, printk("error %d\n", error););
4bf12011 2170+ release_rsb(start_rsb);
2171+ }
2172+
2173+ /*
2174+ * Send rsb names for rsb's we're master of and whose directory node
2175+ * matches the requesting node.
2176+ */
2177+
2178+ down_read(&ls->ls_rec_rsblist);
2179+ if (start_rsb)
2180+ list = start_rsb->res_rootlist.next;
2181+ else
2182+ list = ls->ls_rootres.next;
2183+
2184+ for (offset = 0; list != &ls->ls_rootres; list = list->next) {
10d56c87 2185+ rsb = list_entry(list, struct dlm_rsb, res_rootlist);
4bf12011 2186+ if (rsb->res_nodeid)
2187+ continue;
2188+
2189+ dir_nodeid = get_directory_nodeid(rsb);
2190+ if (dir_nodeid != nodeid)
2191+ continue;
2192+
10d56c87 2193+ if (offset + sizeof(struct resmov)*2 + rsb->res_length > outlen) {
4bf12011 2194+ /* Write end-of-block record */
10d56c87
AM
2195+ memset(&tmp, 0, sizeof(struct resmov));
2196+ memcpy(outbuf + offset, &tmp, sizeof(struct resmov));
2197+ offset += sizeof(struct resmov);
4bf12011 2198+ goto out;
2199+ }
2200+
10d56c87 2201+ memset(&tmp, 0, sizeof(struct resmov));
4bf12011 2202+ tmp.rm_nodeid = cpu_to_be32(our_nodeid());
2203+ tmp.rm_length = cpu_to_be16(rsb->res_length);
2204+
10d56c87
AM
2205+ memcpy(outbuf + offset, &tmp, sizeof(struct resmov));
2206+ offset += sizeof(struct resmov);
4bf12011 2207+
2208+ memcpy(outbuf + offset, rsb->res_name, rsb->res_length);
2209+ offset += rsb->res_length;
2210+ }
2211+
2212+ /*
2213+ * If we've reached the end of the list (and there's room) write a
2214+ * terminating record.
2215+ */
2216+
2217+ if ((list == &ls->ls_rootres) &&
10d56c87 2218+ (offset + sizeof(struct resmov) <= outlen)) {
4bf12011 2219+
10d56c87 2220+ memset(&tmp, 0, sizeof(struct resmov));
4bf12011 2221+ /* This only needs to be non-zero */
2222+ tmp.rm_nodeid = cpu_to_be32(1);
2223+ /* and this must be zero */
2224+ tmp.rm_length = 0;
10d56c87
AM
2225+ memcpy(outbuf + offset, &tmp, sizeof(struct resmov));
2226+ offset += sizeof(struct resmov);
4bf12011 2227+ }
2228+
2229+ out:
2230+ up_read(&ls->ls_rec_rsblist);
2231+ return offset;
2232+}
2233+
10d56c87
AM
2234+static int get_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name,
2235+ int namelen, uint32_t *r_nodeid, int recovery)
4bf12011 2236+{
10d56c87 2237+ struct dlm_direntry *de, *tmp;
4bf12011 2238+ uint32_t bucket;
2239+
10d56c87 2240+ bucket = dir_hash(ls, name, namelen);
4bf12011 2241+
10d56c87
AM
2242+ write_lock(&ls->ls_dirtbl[bucket].lock);
2243+ de = search_bucket(ls, name, namelen, bucket);
2244+ if (de) {
2245+ *r_nodeid = de->master_nodeid;
2246+ write_unlock(&ls->ls_dirtbl[bucket].lock);
4bf12011 2247+ goto out;
10d56c87 2248+ }
4bf12011 2249+
10d56c87 2250+ write_unlock(&ls->ls_dirtbl[bucket].lock);
4bf12011 2251+
10d56c87
AM
2252+ de = allocate_resdata(ls, namelen);
2253+ if (!de)
2254+ return -ENOMEM;
4bf12011 2255+
10d56c87
AM
2256+ de->master_nodeid = nodeid;
2257+ de->length = namelen;
2258+ memcpy(de->name, name, namelen);
4bf12011 2259+
10d56c87
AM
2260+ write_lock(&ls->ls_dirtbl[bucket].lock);
2261+ tmp = search_bucket(ls, name, namelen, bucket);
4bf12011 2262+ if (tmp) {
10d56c87
AM
2263+ free_resdata(de);
2264+ de = tmp;
2265+ } else {
2266+ list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
4bf12011 2267+ }
10d56c87
AM
2268+ *r_nodeid = de->master_nodeid;
2269+ write_unlock(&ls->ls_dirtbl[bucket].lock);
4bf12011 2270+
10d56c87
AM
2271+ out:
2272+ return 0;
2273+}
4bf12011 2274+
10d56c87
AM
2275+int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen,
2276+ uint32_t *r_nodeid)
2277+{
2278+ return get_resdata(ls, nodeid, name, namelen, r_nodeid, 0);
2279+}
4bf12011 2280+
10d56c87
AM
2281+int dlm_dir_lookup_recovery(struct dlm_ls *ls, uint32_t nodeid, char *name,
2282+ int namelen, uint32_t *r_nodeid)
2283+{
2284+ return get_resdata(ls, nodeid, name, namelen, r_nodeid, 1);
4bf12011 2285+}
2286+
2287+/*
2288+ * The node with lowest id queries all nodes to determine when all are done.
2289+ * All other nodes query the low nodeid for this.
2290+ */
2291+
10d56c87 2292+int dlm_dir_rebuild_wait(struct dlm_ls *ls)
4bf12011 2293+{
2294+ int error;
2295+
2296+ if (ls->ls_low_nodeid == our_nodeid()) {
10d56c87 2297+ error = dlm_wait_status_all(ls, RESDIR_VALID);
4bf12011 2298+ if (!error)
2299+ set_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags);
2300+ } else
10d56c87 2301+ error = dlm_wait_status_low(ls, RESDIR_ALL_VALID);
4bf12011 2302+
2303+ return error;
2304+}
2305diff -urN linux-orig/cluster/dlm/dir.h linux-patched/cluster/dlm/dir.h
2306--- linux-orig/cluster/dlm/dir.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
2307+++ linux-patched/cluster/dlm/dir.h 2004-07-13 18:57:22.000000000 +0800
2308@@ -0,0 +1,31 @@
4bf12011 2309+/******************************************************************************
2310+*******************************************************************************
2311+**
2312+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2313+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2314+**
2315+** This copyrighted material is made available to anyone wishing to use,
2316+** modify, copy, or redistribute it subject to the terms and conditions
2317+** of the GNU General Public License v.2.
2318+**
2319+*******************************************************************************
2320+******************************************************************************/
2321+
2322+#ifndef __DIR_DOT_H__
2323+#define __DIR_DOT_H__
2324+
10d56c87
AM
2325+int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen,
2326+ uint32_t *r_nodeid);
2327+int dlm_dir_lookup_recovery(struct dlm_ls *ls, uint32_t nodeid, char *name,
2328+ int namelen, uint32_t *r_nodeid);
2329+uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length);
2330+uint32_t get_directory_nodeid(struct dlm_rsb *rsb);
2331+void remove_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen);
2332+int dlm_dir_rebuild_local(struct dlm_ls *ls);
2333+int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen,
2334+ char *outbuf, int outlen, uint32_t nodeid);
2335+int dlm_dir_rebuild_wait(struct dlm_ls * ls);
2336+void dlm_dir_clear(struct dlm_ls *ls);
2337+void dlm_dir_dump(struct dlm_ls *ls);
4bf12011 2338+
2339+#endif /* __DIR_DOT_H__ */
2340diff -urN linux-orig/cluster/dlm/dlm_internal.h linux-patched/cluster/dlm/dlm_internal.h
2341--- linux-orig/cluster/dlm/dlm_internal.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
2342+++ linux-patched/cluster/dlm/dlm_internal.h 2004-07-13 18:57:22.000000000 +0800
2343@@ -0,0 +1,594 @@
4bf12011 2344+/******************************************************************************
2345+*******************************************************************************
2346+**
2347+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2348+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2349+**
2350+** This copyrighted material is made available to anyone wishing to use,
2351+** modify, copy, or redistribute it subject to the terms and conditions
2352+** of the GNU General Public License v.2.
2353+**
2354+*******************************************************************************
2355+******************************************************************************/
2356+
2357+#ifndef __DLM_INTERNAL_DOT_H__
2358+#define __DLM_INTERNAL_DOT_H__
2359+
2360+/*
2361+ * This is the main header file to be included in each DLM source file.
2362+ */
2363+
2364+#define DLM_RELEASE_NAME "<CVS>"
2365+
2366+#include <linux/slab.h>
2367+#include <linux/sched.h>
2368+#include <asm/semaphore.h>
2369+#include <linux/types.h>
2370+#include <linux/spinlock.h>
2371+#include <linux/vmalloc.h>
2372+#include <asm/uaccess.h>
2373+#include <linux/list.h>
2374+#include <linux/errno.h>
2375+#include <linux/random.h>
2376+
2377+#include <cluster/dlm.h>
2378+#include <cluster/dlm_device.h>
2379+#include <cluster/service.h>
2380+
2381+#ifndef TRUE
2382+#define TRUE (1)
2383+#endif
2384+
2385+#ifndef FALSE
2386+#define FALSE (0)
2387+#endif
2388+
2389+#if (BITS_PER_LONG == 64)
2390+#define PRIu64 "lu"
2391+#define PRId64 "ld"
2392+#define PRIo64 "lo"
2393+#define PRIx64 "lx"
2394+#define PRIX64 "lX"
2395+#define SCNu64 "lu"
2396+#define SCNd64 "ld"
2397+#define SCNo64 "lo"
2398+#define SCNx64 "lx"
2399+#define SCNX64 "lX"
2400+#else
2401+#define PRIu64 "Lu"
2402+#define PRId64 "Ld"
2403+#define PRIo64 "Lo"
2404+#define PRIx64 "Lx"
2405+#define PRIX64 "LX"
2406+#define SCNu64 "Lu"
2407+#define SCNd64 "Ld"
2408+#define SCNo64 "Lo"
2409+#define SCNx64 "Lx"
2410+#define SCNX64 "LX"
2411+#endif
2412+
2413+#define wchan_cond_sleep_intr(chan, sleep_cond) \
2414+do \
2415+{ \
2416+ DECLARE_WAITQUEUE(__wait_chan, current); \
2417+ current->state = TASK_INTERRUPTIBLE; \
2418+ add_wait_queue(&chan, &__wait_chan); \
2419+ if ((sleep_cond)) \
2420+ schedule(); \
2421+ remove_wait_queue(&chan, &__wait_chan); \
2422+ current->state = TASK_RUNNING; \
2423+} \
2424+while (0)
2425+
2426+static inline int check_timeout(unsigned long stamp, unsigned int seconds)
2427+{
2428+ return time_after(jiffies, stamp + seconds * HZ);
2429+}
2430+
2431+
2432+#define log_print(fmt, args...) printk("dlm: "fmt"\n", ##args)
2433+
2434+#define log_all(ls, fmt, args...) \
2435+ do { \
2436+ printk("dlm: %s: " fmt "\n", (ls)->ls_name, ##args); \
2437+ dlm_debug_log(ls, fmt, ##args); \
2438+ } while (0)
2439+
2440+#define log_error log_all
2441+
2442+
2443+#define DLM_DEBUG
2444+#if defined(DLM_DEBUG)
2445+#define log_debug(ls, fmt, args...) dlm_debug_log(ls, fmt, ##args)
2446+#else
2447+#define log_debug(ls, fmt, args...)
2448+#endif
2449+
2450+#if defined(DLM_DEBUG) && defined(DLM_DEBUG_ALL)
2451+#undef log_debug
2452+#define log_debug log_all
2453+#endif
2454+
2455+
10d56c87 2456+#define DLM_ASSERT(x, do) \
4bf12011 2457+{ \
2458+ if (!(x)) \
2459+ { \
10d56c87 2460+ dlm_locks_dump(); \
4bf12011 2461+ dlm_debug_dump(); \
2462+ printk("\nDLM: Assertion failed on line %d of file %s\n" \
2463+ "DLM: assertion: \"%s\"\n" \
2464+ "DLM: time = %lu\n", \
2465+ __LINE__, __FILE__, #x, jiffies); \
2466+ {do} \
2467+ printk("\n"); \
2468+ BUG(); \
2469+ panic("DLM: Record message above and reboot.\n"); \
2470+ } \
2471+}
2472+
2473+
10d56c87
AM
2474+struct dlm_ls;
2475+struct dlm_lkb;
2476+struct dlm_rsb;
2477+struct dlm_csb;
2478+struct dlm_node;
2479+struct dlm_lkbtable;
2480+struct dlm_rsbtable;
2481+struct dlm_dirtable;
2482+struct dlm_direntry;
2483+struct dlm_recover;
2484+struct dlm_header;
2485+struct dlm_request;
2486+struct dlm_reply;
2487+struct dlm_rcom;
2488+struct dlm_query_request;
2489+struct dlm_query_reply;
4bf12011 2490+
4bf12011 2491+
10d56c87
AM
2492+struct dlm_direntry {
2493+ struct list_head list;
2494+ uint32_t master_nodeid;
2495+ uint16_t length;
2496+ char name[1];
4bf12011 2497+};
2498+
10d56c87
AM
2499+struct dlm_dirtable {
2500+ struct list_head list;
2501+ rwlock_t lock;
2502+};
4bf12011 2503+
10d56c87
AM
2504+struct dlm_rsbtable {
2505+ struct list_head list;
2506+ rwlock_t lock;
2507+};
2508+
2509+struct dlm_lkbtable {
2510+ struct list_head list;
2511+ rwlock_t lock;
2512+ uint16_t counter;
4bf12011 2513+};
2514+
2515+/*
10d56c87 2516+ * Cluster node (per node in cluster)
4bf12011 2517+ */
2518+
10d56c87
AM
2519+struct dlm_node {
2520+ struct list_head list;
2521+ uint32_t nodeid;
2522+ int refcount; /* num csb's referencing */
4bf12011 2523+};
2524+
2525+/*
10d56c87 2526+ * Cluster System Block (per node in a ls)
4bf12011 2527+ */
2528+
10d56c87
AM
2529+struct dlm_csb {
2530+ struct list_head list; /* per-lockspace node list */
2531+ struct dlm_node * node; /* global node structure */
2532+ int gone_event; /* event id when node removed */
4bf12011 2533+
10d56c87 2534+ /* recovery stats for debugging */
4bf12011 2535+
10d56c87
AM
2536+ uint32_t names_send_count;
2537+ uint32_t names_send_msgid;
2538+ uint32_t names_recv_count;
2539+ uint32_t names_recv_msgid;
2540+ uint32_t locks_send_count;
2541+ uint32_t locks_send_msgid;
2542+ uint32_t locks_recv_count;
2543+ uint32_t locks_recv_msgid;
4bf12011 2544+};
2545+
2546+/*
10d56c87 2547+ * Used to save and manage recovery state for a lockspace.
4bf12011 2548+ */
2549+
10d56c87
AM
2550+struct dlm_recover {
2551+ struct list_head list;
2552+ uint32_t * nodeids;
2553+ int node_count;
2554+ int event_id;
4bf12011 2555+};
2556+
2557+/*
10d56c87 2558+ * Elements in the range array
4bf12011 2559+ */
2560+
10d56c87
AM
2561+#define GR_RANGE_START (0)
2562+#define GR_RANGE_END (1)
2563+#define RQ_RANGE_START (2)
2564+#define RQ_RANGE_END (3)
4bf12011 2565+
10d56c87
AM
2566+/*
2567+ * Lockspace structure
2568+ */
2569+
2570+#define LSFL_WORK (0)
2571+#define LSFL_LS_RUN (1)
2572+#define LSFL_LS_STOP (2)
2573+#define LSFL_LS_START (3)
2574+#define LSFL_LS_FINISH (4)
2575+#define LSFL_RECCOMM_WAIT (5)
2576+#define LSFL_RECCOMM_READY (6)
2577+#define LSFL_NOTIMERS (7)
2578+#define LSFL_FINISH_RECOVERY (8)
2579+#define LSFL_RESDIR_VALID (9)
2580+#define LSFL_ALL_RESDIR_VALID (10)
2581+#define LSFL_NODES_VALID (11)
2582+#define LSFL_ALL_NODES_VALID (12)
2583+#define LSFL_REQUEST_WARN (13)
2584+#define LSFL_NOCONVGRANT (14)
2585+
2586+#define LSST_NONE (0)
2587+#define LSST_INIT (1)
2588+#define LSST_INIT_DONE (2)
2589+#define LSST_CLEAR (3)
2590+#define LSST_WAIT_START (4)
2591+#define LSST_RECONFIG_DONE (5)
2592+
2593+struct dlm_ls {
2594+ struct list_head ls_list; /* list of lockspaces */
2595+ uint32_t ls_local_id; /* local unique lockspace ID */
2596+ uint32_t ls_global_id; /* global unique lockspace ID */
2597+ int ls_allocation; /* Memory allocation policy */
2598+ unsigned long ls_flags; /* LSFL_ */
2599+
2600+ struct dlm_rsbtable * ls_rsbtbl;
2601+ uint32_t ls_rsbtbl_size;
2602+
2603+ struct dlm_lkbtable * ls_lkbtbl;
2604+ uint32_t ls_lkbtbl_size;
2605+
2606+ struct dlm_dirtable * ls_dirtbl;
2607+ uint32_t ls_dirtbl_size;
2608+
2609+ struct list_head ls_nodes; /* current nodes in RC */
2610+ struct list_head ls_nodes_gone; /* dead node list, recovery */
2611+ uint32_t ls_num_nodes; /* number of nodes in RC */
2612+ uint32_t ls_nodes_mask;
2613+ uint32_t ls_low_nodeid;
2614+
2615+ struct rw_semaphore ls_unlock_sem; /* To prevent unlock on a
2616+ parent lock racing with a
2617+ new child lock */
2618+
2619+ struct list_head ls_deadlockq; /* List of locks in conversion
2620+ ordered by duetime. for
2621+ deadlock detection */
2622+
2623+ /* recovery related */
2624+
2625+ struct list_head ls_recover; /* dlm_recover structs */
2626+ spinlock_t ls_recover_lock;
2627+ int ls_last_stop;
2628+ int ls_last_start;
2629+ int ls_last_finish;
2630+ int ls_state; /* recovery states */
2631+
2632+ struct rw_semaphore ls_in_recovery; /* block local requests */
2633+ struct list_head ls_requestqueue;/* queue remote requests */
2634+
2635+ struct dlm_rcom * ls_rcom; /* recovery comms */
2636+ uint32_t ls_rcom_msgid;
2637+ struct semaphore ls_rcom_lock;
2638+
2639+ struct list_head ls_recover_list;
2640+ spinlock_t ls_recover_list_lock;
2641+ int ls_recover_list_count;
2642+ wait_queue_head_t ls_wait_general;
2643+
2644+ struct list_head ls_rootres; /* List of root resources */
2645+
2646+ struct rw_semaphore ls_rec_rsblist; /* To prevent incoming recovery
2647+ operations happening while
2648+ we are purging */
2649+
2650+ struct rw_semaphore ls_gap_rsblist; /* To protect rootres list
2651+ in grant_after_purge() which
2652+ runs outside recovery */
2653+
2654+ struct list_head ls_rebuild_rootrsb_list; /* Root of lock trees
2655+ we are
2656+ deserialising */
2657+ int ls_namelen;
2658+ char ls_name[1];
4bf12011 2659+};
2660+
2661+/*
2662+ * Resource block
2663+ */
2664+
10d56c87
AM
2665+#define RESFL_NEW_MASTER (0)
2666+#define RESFL_RECOVER_LIST (1)
2667+#define RESFL_MASTER (2)
4bf12011 2668+
10d56c87
AM
2669+struct dlm_rsb {
2670+ struct list_head res_hashchain;
2671+ uint32_t res_bucket;
4bf12011 2672+
10d56c87 2673+ struct dlm_ls * res_ls; /* The owning lockspace */
4bf12011 2674+
10d56c87 2675+ struct list_head res_rootlist; /* List of root rsb's */
4bf12011 2676+
10d56c87
AM
2677+ struct list_head res_subreslist; /* List of all sub-resources
2678+ for this root rsb */
4bf12011 2679+
10d56c87
AM
2680+ uint8_t res_depth; /* Depth in resource tree */
2681+ unsigned long res_flags; /* Flags, RESFL_ */
4bf12011 2682+
10d56c87
AM
2683+ struct list_head res_grantqueue;
2684+ struct list_head res_convertqueue;
2685+ struct list_head res_waitqueue;
4bf12011 2686+
10d56c87 2687+ uint32_t res_nodeid; /* nodeid of master node */
4bf12011 2688+
10d56c87
AM
2689+ struct dlm_rsb * res_root; /* root rsb if a subresource */
2690+ struct dlm_rsb * res_parent; /* parent rsb (if any) */
4bf12011 2691+
10d56c87
AM
2692+ atomic_t res_ref; /* Number of lkb's */
2693+ uint16_t res_remasterid; /* ID used during remaster */
4bf12011 2694+
10d56c87
AM
2695+ struct list_head res_recover_list; /* General list for use
2696+ during recovery */
2697+ int res_recover_msgid;
2698+ int res_newlkid_expect;
4bf12011 2699+
10d56c87 2700+ struct rw_semaphore res_lock;
4bf12011 2701+
10d56c87 2702+ char * res_lvbptr; /* Lock value block */
4bf12011 2703+
10d56c87
AM
2704+ uint8_t res_length;
2705+ char res_name[1]; /* <res_length> bytes */
4bf12011 2706+};
2707+
2708+/*
2709+ * Lock block. To avoid confusion, where flags mirror the
2710+ * public flags, they should have the same value.
2711+ */
2712+
10d56c87
AM
2713+#define GDLM_LKSTS_NEW (0)
2714+#define GDLM_LKSTS_WAITING (1)
2715+#define GDLM_LKSTS_GRANTED (2)
2716+#define GDLM_LKSTS_CONVERT (3)
4bf12011 2717+
10d56c87
AM
2718+#define GDLM_LKFLG_VALBLK (0x00000008)
2719+#define GDLM_LKFLG_PERSISTENT (0x00000080) /* Don't unlock when process exits */
2720+#define GDLM_LKFLG_NODLCKWT (0x00000100) /* Don't do deadlock detection */
2721+#define GDLM_LKFLG_EXPEDITE (0x00000400) /* Move to head of convert queue */
4bf12011 2722+
2723+/* Internal flags */
10d56c87 2724+#define GDLM_LKFLG_RANGE (0x00001000) /* Range field is present
5cdbd17b 2725+ (remote protocol only) */
10d56c87
AM
2726+#define GDLM_LKFLG_MSTCPY (0x00002000)
2727+#define GDLM_LKFLG_DELETED (0x00004000) /* LKB is being deleted */
2728+#define GDLM_LKFLG_LQCONVERT (0x00008000)
2729+#define GDLM_LKFLG_LQRESEND (0x00010000) /* LKB on lockqueue must be resent */
2730+#define GDLM_LKFLG_DEMOTED (0x00020000)
2731+#define GDLM_LKFLG_RESENT (0x00040000)
2732+#define GDLM_LKFLG_NOREBUILD (0x00080000)
4bf12011 2733+
5cdbd17b
AM
2734+#define AST_COMP (1)
2735+#define AST_BAST (2)
2736+#define AST_DEL (4)
4bf12011 2737+
10d56c87
AM
2738+struct dlm_lkb {
2739+ uint32_t lkb_flags;
2740+ uint16_t lkb_status; /* grant, wait, convert */
2741+ int8_t lkb_rqmode; /* requested lock mode */
2742+ int8_t lkb_grmode; /* granted lock mode */
2743+ uint32_t lkb_retstatus; /* status to return in lksb */
2744+ uint32_t lkb_id; /* our lock ID */
2745+ struct dlm_lksb * lkb_lksb; /* status block of caller */
5cdbd17b
AM
2746+ struct list_head lkb_idtbl_list; /* lockidtbl */
2747+ struct list_head lkb_statequeue; /* rsb's g/c/w queue */
10d56c87 2748+ struct dlm_rsb * lkb_resource;
5cdbd17b
AM
2749+ struct list_head lkb_ownerqueue; /* list of locks owned by a
2750+ process */
10d56c87
AM
2751+ struct dlm_lkb * lkb_parent; /* parent lock if any */
2752+ atomic_t lkb_childcnt; /* number of children */
5cdbd17b
AM
2753+
2754+ struct list_head lkb_lockqueue; /* queue of locks waiting
2755+ for remote reply */
2756+ int lkb_lockqueue_state; /* reason on lockqueue */
2757+ int lkb_lockqueue_flags; /* as passed into
2758+ lock/unlock */
2759+ unsigned long lkb_lockqueue_time; /* time lkb went on the
2760+ lockqueue */
10d56c87 2761+ unsigned long lkb_duetime; /* for deadlock detection */
5cdbd17b
AM
2762+
2763+ uint32_t lkb_remid; /* id on remote partner */
2764+ uint32_t lkb_nodeid; /* id of remote partner */
2765+
2766+ void * lkb_astaddr;
2767+ void * lkb_bastaddr;
2768+ long lkb_astparam;
2769+ struct list_head lkb_astqueue; /* locks with asts to deliver */
2770+ uint16_t lkb_astflags; /* COMP, BAST, DEL */
2771+ uint8_t lkb_bastmode; /* requested mode */
2772+ uint8_t lkb_highbast; /* highest mode bast sent for */
4bf12011 2773+
10d56c87 2774+ struct dlm_request * lkb_request;
4bf12011 2775+
5cdbd17b 2776+ struct list_head lkb_deadlockq; /* ls_deadlockq list */
4bf12011 2777+
5cdbd17b
AM
2778+ char * lkb_lvbptr; /* points to lksb lvb on local
2779+ lock, allocated lvb on
2780+ on remote lock */
2781+ uint64_t * lkb_range; /* Points to an array of 64 bit
2782+ numbers that represent the
2783+ requested and granted ranges
10d56c87 2784+ of the lock. NULL implies
5cdbd17b 2785+ 0-ffffffffffffffff */
4bf12011 2786+};
2787+
2788+/*
4bf12011 2789+ * Header part of the mid-level comms system. All packets start with
2790+ * this header so we can identify them. The comms packet can
2791+ * contain many of these structs but the are split into individual
2792+ * work units before being passed to the lockqueue routines.
2793+ * below this are the structs that this is a header for
2794+ */
2795+
10d56c87
AM
2796+struct dlm_header {
2797+ uint8_t rh_cmd; /* What we are */
2798+ uint8_t rh_flags; /* maybe just a pad */
2799+ uint16_t rh_length; /* Length of struct (so we can
2800+ send many in 1 message) */
2801+ uint32_t rh_lkid; /* Lock ID tag: ie the local
2802+ (requesting) lock ID */
2803+ uint32_t rh_lockspace; /* Lockspace ID */
4bf12011 2804+};
2805+
2806+/*
2807+ * This is the struct used in a remote lock/unlock/convert request
2808+ * The mid-level comms API should turn this into native byte order.
2809+ * Most "normal" lock operations will use these two structs for
2810+ * communications. Recovery operations use their own structs
2811+ * but still with the gd_req_header on the front.
2812+ */
2813+
10d56c87
AM
2814+struct dlm_request {
2815+ struct dlm_header rr_header;
2816+ uint32_t rr_remlkid; /* Remote lock ID */
2817+ uint32_t rr_remparid; /* Parent's remote lock ID */
2818+ uint32_t rr_flags; /* Flags from lock/convert req*/
2819+ uint64_t rr_range_start; /* Yes, these are in the right
2820+ place... */
2821+ uint64_t rr_range_end;
2822+ uint32_t rr_status; /* Status to return if this is
2823+ an AST request */
2824+ uint8_t rr_rqmode; /* Requested lock mode */
2825+ uint8_t rr_asts; /* Whether the LKB has ASTs */
2826+ char rr_lvb[DLM_LVB_LEN];
2827+ char rr_name[1]; /* As long as needs be. Only
2828+ used for directory lookups.
2829+ The length of this can be
2830+ worked out from the packet
2831+ length */
4bf12011 2832+};
2833+
2834+/*
2835+ * This is the struct returned by a remote lock/unlock/convert request
2836+ * The mid-level comms API should turn this into native byte order.
2837+ */
2838+
10d56c87
AM
2839+struct dlm_reply {
2840+ struct dlm_header rl_header;
2841+ uint32_t rl_lockstate; /* Whether request was
2842+ queued/granted/waiting */
2843+ uint32_t rl_nodeid; /* nodeid of lock master */
2844+ uint32_t rl_status; /* Status to return to caller */
2845+ uint32_t rl_lkid; /* Remote lkid */
2846+ char rl_lvb[DLM_LVB_LEN];
4bf12011 2847+};
2848+
2849+/*
2850+ * Recovery comms message
2851+ */
2852+
10d56c87
AM
2853+struct dlm_rcom {
2854+ struct dlm_header rc_header; /* 32 byte aligned */
2855+ uint32_t rc_msgid;
2856+ uint16_t rc_datalen;
2857+ uint8_t rc_expanded;
2858+ uint8_t rc_subcmd; /* secondary command */
2859+ char rc_buf[1]; /* first byte of data goes here
2860+ and extends beyond here for
2861+ another datalen - 1 bytes.
2862+ rh_length is set to sizeof
2863+ dlm_rcom + datalen - 1 */
4bf12011 2864+};
2865+
2866+
2867+/* A remote query: GDLM_REMCMD_QUERY */
4bf12011 2868+
10d56c87
AM
2869+struct dlm_query_request {
2870+ struct dlm_header rq_header;
2871+ uint32_t rq_mstlkid; /* LockID on master node */
2872+ uint32_t rq_query; /* query from the user */
2873+ uint32_t rq_maxlocks; /* max number of locks we can
2874+ cope with */
4bf12011 2875+};
2876+
2877+/* First block of a reply query. cmd = GDLM_REMCMD_QUERY */
2878+/* There may be subsequent blocks of
2879+ lock info in GDLM_REMCMD_QUERYCONT messages which just have
2880+ a normal header. The last of these will have rh_flags set to
2881+ GDLM_REMFLAG_ENDQUERY
2882+ */
4bf12011 2883+
10d56c87
AM
2884+struct dlm_query_reply {
2885+ struct dlm_header rq_header;
2886+ uint32_t rq_numlocks; /* Number of locks in reply */
2887+ uint32_t rq_startlock; /* Which lock this block starts
2888+ at (for multi-block replies) */
2889+ uint32_t rq_status;
2890+
2891+ /* Resource information */
2892+ uint32_t rq_grantcount; /* No. of nodes on grantqueue */
2893+ uint32_t rq_convcount; /* No. of nodes on convertq */
2894+ uint32_t rq_waitcount; /* No. of nodes on waitqueue */
2895+ char rq_valblk[DLM_LVB_LEN]; /* Master's LVB
2896+ contents, if
2897+ applicable */
4bf12011 2898+};
2899+
2900+/*
2901+ * Lockqueue wait lock states
2902+ */
2903+
10d56c87
AM
2904+#define GDLM_LQSTATE_WAIT_RSB 1
2905+#define GDLM_LQSTATE_WAIT_CONVERT 2
2906+#define GDLM_LQSTATE_WAIT_CONDGRANT 3
2907+#define GDLM_LQSTATE_WAIT_UNLOCK 4
4bf12011 2908+
2909+/* Commands sent across the comms link */
10d56c87
AM
2910+#define GDLM_REMCMD_LOOKUP 1
2911+#define GDLM_REMCMD_LOCKREQUEST 2
2912+#define GDLM_REMCMD_UNLOCKREQUEST 3
2913+#define GDLM_REMCMD_CONVREQUEST 4
2914+#define GDLM_REMCMD_LOCKREPLY 5
2915+#define GDLM_REMCMD_LOCKGRANT 6
2916+#define GDLM_REMCMD_SENDBAST 7
2917+#define GDLM_REMCMD_SENDCAST 8
2918+#define GDLM_REMCMD_REM_RESDATA 9
2919+#define GDLM_REMCMD_RECOVERMESSAGE 20
2920+#define GDLM_REMCMD_RECOVERREPLY 21
2921+#define GDLM_REMCMD_QUERY 30
2922+#define GDLM_REMCMD_QUERYREPLY 31
4bf12011 2923+
2924+/* Set in rh_flags when this is the last block of
2925+ query information. Note this could also be the first
2926+ block */
2927+#define GDLM_REMFLAG_ENDQUERY 1
2928+
4bf12011 2929+#ifndef BUG_ON
2930+#define BUG_ON(x)
2931+#endif
2932+
10d56c87 2933+void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...);
4bf12011 2934+void dlm_debug_dump(void);
10d56c87 2935+void dlm_locks_dump(void);
4bf12011 2936+
2937+#endif /* __DLM_INTERNAL_DOT_H__ */
2938diff -urN linux-orig/cluster/dlm/lkb.c linux-patched/cluster/dlm/lkb.c
2939--- linux-orig/cluster/dlm/lkb.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
2940+++ linux-patched/cluster/dlm/lkb.c 2004-07-13 18:57:22.000000000 +0800
2941@@ -0,0 +1,181 @@
4bf12011 2942+/******************************************************************************
2943+*******************************************************************************
2944+**
2945+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
2946+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
2947+**
2948+** This copyrighted material is made available to anyone wishing to use,
2949+** modify, copy, or redistribute it subject to the terms and conditions
2950+** of the GNU General Public License v.2.
2951+**
2952+*******************************************************************************
2953+******************************************************************************/
2954+
2955+/*
2956+ * lkb.c
2957+ *
2958+ * Allocate and free locks on the lock ID table.
2959+ *
2960+ * This is slightly naff but I don't really like the
2961+ * VMS lockidtbl stuff as it uses a realloced array
2962+ * to hold the locks in. I think this is slightly better
2963+ * in some ways.
2964+ *
2965+ * Any better suggestions gratefully received. Patrick
2966+ *
2967+ */
2968+
2969+#include "dlm_internal.h"
2970+#include "lockqueue.h"
2971+#include "lkb.h"
2972+#include "config.h"
2973+#include "rsb.h"
2974+#include "memory.h"
2975+#include "lockspace.h"
2976+#include "util.h"
2977+
2978+/*
2979+ * Internal find lock by ID. Must be called with the lockidtbl spinlock held.
2980+ */
2981+
10d56c87 2982+static struct dlm_lkb *__find_lock_by_id(struct dlm_ls *ls, uint32_t lkid)
4bf12011 2983+{
10d56c87
AM
2984+ uint16_t bucket = lkid & 0xFFFF;
2985+ struct dlm_lkb *lkb;
4bf12011 2986+
10d56c87 2987+ if (bucket >= ls->ls_lkbtbl_size)
4bf12011 2988+ goto out;
2989+
10d56c87 2990+ list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list){
4bf12011 2991+ if (lkb->lkb_id == lkid)
2992+ return lkb;
2993+ }
10d56c87 2994+ out:
4bf12011 2995+ return NULL;
2996+}
2997+
2998+/*
4bf12011 2999+ * LKB lkid's are 32 bits and have two 16 bit parts. The bottom 16 bits are a
3000+ * random number between 0 and lockidtbl_size-1. This random number specifies
3001+ * the "bucket" for the lkb in lockidtbl. The upper 16 bits are a sequentially
3002+ * assigned per-bucket id.
3003+ *
3004+ * Because the 16 bit id's per bucket can roll over, a new lkid must be checked
3005+ * against the lkid of all lkb's in the bucket to avoid duplication.
3006+ *
3007+ */
3008+
10d56c87 3009+struct dlm_lkb *create_lkb(struct dlm_ls *ls)
4bf12011 3010+{
10d56c87 3011+ struct dlm_lkb *lkb;
4bf12011 3012+ uint32_t lkid;
3013+ uint16_t bucket;
3014+
3015+ lkb = allocate_lkb(ls);
3016+ if (!lkb)
3017+ goto out;
3018+
10d56c87
AM
3019+ retry:
3020+ get_random_bytes(&bucket, sizeof(bucket));
3021+ bucket &= (ls->ls_lkbtbl_size - 1);
4bf12011 3022+
10d56c87 3023+ write_lock(&ls->ls_lkbtbl[bucket].lock);
4bf12011 3024+
10d56c87
AM
3025+ lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16);
3026+
3027+ if (__find_lock_by_id(ls, lkid)) {
3028+ write_unlock(&ls->ls_lkbtbl[bucket].lock);
3029+ goto retry;
3030+ }
3031+
3032+ lkb->lkb_id = lkid;
3033+ list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
3034+ write_unlock(&ls->ls_lkbtbl[bucket].lock);
3035+ out:
4bf12011 3036+ return lkb;
3037+}
3038+
3039+/*
3040+ * Free LKB and remove it from the lockidtbl.
3041+ * NB - this always frees the lkb whereas release_rsb doesn't free an
3042+ * rsb unless its reference count is zero.
3043+ */
3044+
10d56c87 3045+void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
4bf12011 3046+{
10d56c87
AM
3047+ uint16_t bucket = lkb->lkb_id & 0xFFFF;
3048+
4bf12011 3049+ if (lkb->lkb_status) {
3050+ log_error(ls, "release lkb with status %u", lkb->lkb_status);
3051+ print_lkb(lkb);
3052+ return;
3053+ }
3054+
3055+ if (lkb->lkb_parent)
3056+ atomic_dec(&lkb->lkb_parent->lkb_childcnt);
3057+
10d56c87 3058+ write_lock(&ls->ls_lkbtbl[bucket].lock);
4bf12011 3059+ list_del(&lkb->lkb_idtbl_list);
10d56c87 3060+ write_unlock(&ls->ls_lkbtbl[bucket].lock);
4bf12011 3061+
3062+ /* if this is not a master copy then lvbptr points into the user's
3063+ * lksb, so don't free it */
3064+ if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY)
3065+ free_lvb(lkb->lkb_lvbptr);
3066+
3067+ if (lkb->lkb_range)
3068+ free_range(lkb->lkb_range);
3069+
3070+ free_lkb(lkb);
3071+}
3072+
10d56c87 3073+struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid)
4bf12011 3074+{
10d56c87
AM
3075+ struct dlm_lkb *lkb;
3076+ uint16_t bucket = lkid & 0xFFFF;
4bf12011 3077+
10d56c87 3078+ read_lock(&ls->ls_lkbtbl[bucket].lock);
4bf12011 3079+ lkb = __find_lock_by_id(ls, lkid);
10d56c87 3080+ read_unlock(&ls->ls_lkbtbl[bucket].lock);
4bf12011 3081+
3082+ return lkb;
3083+}
3084+
10d56c87 3085+struct dlm_lkb *dlm_get_lkb(void *ls, uint32_t lkid)
4bf12011 3086+{
10d56c87 3087+ struct dlm_ls *lspace = find_lockspace_by_local_id(ls);
4bf12011 3088+ return find_lock_by_id(lspace, lkid);
3089+}
3090+
3091+/*
3092+ * Initialise the range parts of an LKB.
3093+ */
3094+
10d56c87 3095+int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end)
4bf12011 3096+{
3097+ int ret = -ENOMEM;
3098+
3099+ /*
3100+ * if this wasn't already a range lock, make it one
3101+ */
3102+ if (!lkb->lkb_range) {
3103+ lkb->lkb_range = allocate_range(lspace);
3104+ if (!lkb->lkb_range)
3105+ goto out;
3106+
3107+ /*
3108+ * This is needed for conversions that contain ranges where the
3109+ * original lock didn't but it's harmless for new locks too.
3110+ */
3111+ lkb->lkb_range[GR_RANGE_START] = 0LL;
3112+ lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL;
3113+ }
3114+
3115+ lkb->lkb_range[RQ_RANGE_START] = start;
3116+ lkb->lkb_range[RQ_RANGE_END] = end;
3117+
3118+ ret = 0;
3119+
3120+ out:
3121+ return ret;
3122+}
3123diff -urN linux-orig/cluster/dlm/lkb.h linux-patched/cluster/dlm/lkb.h
3124--- linux-orig/cluster/dlm/lkb.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
3125+++ linux-patched/cluster/dlm/lkb.h 2004-07-13 18:57:22.000000000 +0800
3126@@ -0,0 +1,23 @@
4bf12011 3127+/******************************************************************************
3128+*******************************************************************************
3129+**
3130+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3131+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
3132+**
3133+** This copyrighted material is made available to anyone wishing to use,
3134+** modify, copy, or redistribute it subject to the terms and conditions
3135+** of the GNU General Public License v.2.
3136+**
3137+*******************************************************************************
3138+******************************************************************************/
3139+
3140+#ifndef __LKB_DOT_H__
3141+#define __LKB_DOT_H__
3142+
10d56c87
AM
3143+struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid);
3144+struct dlm_lkb *create_lkb(struct dlm_ls *ls);
3145+void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb);
3146+struct dlm_lkb *dlm_get_lkb(void *ls, uint32_t lkid);
3147+int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end);
4bf12011 3148+
3149+#endif /* __LKB_DOT_H__ */
3150diff -urN linux-orig/cluster/dlm/locking.c linux-patched/cluster/dlm/locking.c
3151--- linux-orig/cluster/dlm/locking.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
3152+++ linux-patched/cluster/dlm/locking.c 2004-07-13 18:57:22.000000000 +0800
3153@@ -0,0 +1,1307 @@
4bf12011 3154+/******************************************************************************
3155+*******************************************************************************
3156+**
3157+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3158+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
10d56c87 3159+**
4bf12011 3160+** This copyrighted material is made available to anyone wishing to use,
3161+** modify, copy, or redistribute it subject to the terms and conditions
3162+** of the GNU General Public License v.2.
3163+**
3164+*******************************************************************************
3165+******************************************************************************/
3166+
10d56c87 3167+/*
4bf12011 3168+ * locking.c
3169+ *
3170+ * This is where the main work of the DLM goes on
3171+ *
3172+ */
3173+
3174+#include "dlm_internal.h"
3175+#include "lockqueue.h"
3176+#include "locking.h"
3177+#include "lockspace.h"
3178+#include "lkb.h"
3179+#include "nodes.h"
3180+#include "dir.h"
3181+#include "ast.h"
3182+#include "memory.h"
3183+#include "rsb.h"
10d56c87
AM
3184+#include "util.h"
3185+
3186+extern struct list_head lslist;
4bf12011 3187+
3188+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
3189+
10d56c87 3190+/*
4bf12011 3191+ * Lock compatibilty matrix - thanks Steve
3192+ * UN = Unlocked state. Not really a state, used as a flag
3193+ * PD = Padding. Used to make the matrix a nice power of two in size
3194+ * Other states are the same as the VMS DLM.
3195+ * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same)
3196+ */
3197+
3198+#define modes_compat(gr, rq) \
3199+ __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1]
3200+
3201+const int __dlm_compat_matrix[8][8] = {
3202+ /* UN NL CR CW PR PW EX PD */
3203+ {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */
3204+ {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */
3205+ {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */
3206+ {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */
3207+ {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */
3208+ {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */
3209+ {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */
3210+ {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
3211+};
3212+
10d56c87 3213+/*
4bf12011 3214+ * Compatibility matrix for conversions with QUECVT set.
3215+ * Granted mode is the row; requested mode is the column.
3216+ * Usage: matrix[grmode+1][rqmode+1]
3217+ */
3218+
3219+const int __quecvt_compat_matrix[8][8] = {
3220+ /* UN NL CR CW PR PW EX PD */
3221+ {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */
3222+ {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */
3223+ {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */
3224+ {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */
3225+ {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */
3226+ {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */
3227+ {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */
3228+ {0, 0, 0, 0, 0, 0, 0, 0} /* PD */
3229+};
3230+
10d56c87 3231+/*
4bf12011 3232+ * This defines the direction of transfer of LVB data.
3233+ * Granted mode is the row; requested mode is the column.
3234+ * Usage: matrix[grmode+1][rqmode+1]
3235+ * 1 = LVB is returned to the caller
3236+ * 0 = LVB is written to the resource
3237+ * -1 = nothing happens to the LVB
3238+ */
3239+
3240+const int __lvb_operations[8][8] = {
3241+ /* UN NL CR CW PR PW EX PD*/
3242+ { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */
3243+ { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */
3244+ { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */
3245+ { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */
3246+ { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */
3247+ { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */
3248+ { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */
3249+ { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */
3250+};
3251+
10d56c87
AM
3252+static void grant_lock(struct dlm_lkb * lkb, int send_remote);
3253+static void send_blocking_asts(struct dlm_rsb * rsb, struct dlm_lkb * lkb);
3254+static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb);
3255+static int convert_lock(struct dlm_ls * ls, int mode, struct dlm_lksb *lksb,
4bf12011 3256+ int flags, void *ast, void *astarg, void *bast,
3257+ struct dlm_range *range);
10d56c87 3258+static int dlm_lock_stage1(struct dlm_ls * lspace, struct dlm_lkb * lkb, int flags,
4bf12011 3259+ char *name, int namelen);
3260+
3261+
10d56c87 3262+static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head)
4bf12011 3263+{
10d56c87 3264+ struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, lkb_statequeue);
4bf12011 3265+
3266+ if (lkb->lkb_id == first->lkb_id)
3267+ return 1;
3268+
3269+ return 0;
3270+}
3271+
10d56c87 3272+/*
4bf12011 3273+ * Return 1 if the locks' ranges overlap
3274+ * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff
3275+ */
3276+
10d56c87 3277+static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2)
4bf12011 3278+{
3279+ if (!lkb1->lkb_range || !lkb2->lkb_range)
3280+ return 1;
3281+
3282+ if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] ||
3283+ lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END])
3284+ return 0;
3285+
3286+ return 1;
3287+}
3288+
3289+/*
3290+ * Resolve conversion deadlock by changing to NL the granted mode of deadlocked
3291+ * locks on the convert queue. One of the deadlocked locks is allowed to
3292+ * retain its original granted state (we choose the lkb provided although it
3293+ * shouldn't matter which.) We do not change the granted mode on locks without
3294+ * the CONVDEADLK flag. If any of these exist (there shouldn't if the app uses
3295+ * the flag consistently) the false return value is used.
3296+ */
3297+
10d56c87 3298+static int conversion_deadlock_resolve(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 3299+{
10d56c87 3300+ struct dlm_lkb *this;
4bf12011 3301+ int rv = TRUE;
3302+
3303+ list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
3304+ if (this == lkb)
3305+ continue;
3306+
3307+ if (!ranges_overlap(lkb, this))
3308+ continue;
3309+
3310+ if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) {
3311+
3312+ if (!(this->lkb_lockqueue_flags & DLM_LKF_CONVDEADLK)){
3313+ rv = FALSE;
3314+ continue;
3315+ }
3316+ this->lkb_grmode = DLM_LOCK_NL;
3317+ this->lkb_flags |= GDLM_LKFLG_DEMOTED;
3318+ }
3319+ }
3320+ return rv;
3321+}
3322+
3323+/*
3324+ * "A conversion deadlock arises with a pair of lock requests in the converting
3325+ * queue for one resource. The granted mode of each lock blocks the requested
3326+ * mode of the other lock."
3327+ */
3328+
10d56c87 3329+static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 3330+{
10d56c87 3331+ struct dlm_lkb *this;
4bf12011 3332+
3333+ list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
3334+ if (this == lkb)
3335+ continue;
3336+
3337+ if (!ranges_overlap(lkb, this))
3338+ continue;
3339+
3340+ if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
3341+ return TRUE;
3342+ }
3343+ return FALSE;
3344+}
3345+
3346+/*
3347+ * Check if the given lkb conflicts with another lkb on the queue.
3348+ */
3349+
10d56c87 3350+static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
4bf12011 3351+{
10d56c87 3352+ struct dlm_lkb *this;
4bf12011 3353+
3354+ list_for_each_entry(this, head, lkb_statequeue) {
3355+ if (this == lkb)
3356+ continue;
3357+ if (ranges_overlap(lkb, this) && !modes_compat(this, lkb))
3358+ return TRUE;
3359+ }
3360+ return FALSE;
3361+}
3362+
3363+/*
3364+ * Deadlock can arise when using the QUECVT flag if the requested mode of the
3365+ * first converting lock is incompatible with the granted mode of another
3366+ * converting lock further down the queue. To prevent this deadlock, a
3367+ * requested QUEUECVT lock is granted immediately if adding it to the end of
3368+ * the queue would prevent a lock ahead of it from being granted.
3369+ */
3370+
10d56c87 3371+static int queuecvt_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 3372+{
10d56c87 3373+ struct dlm_lkb *this;
4bf12011 3374+
3375+ list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
3376+ if (this == lkb)
3377+ break;
3378+
3379+ if (ranges_overlap(lkb, this) && !modes_compat(lkb, this))
3380+ return TRUE;
3381+ }
3382+ return FALSE;
3383+}
3384+
10d56c87 3385+/*
4bf12011 3386+ * Return 1 if the lock can be granted, 0 otherwise.
3387+ * Also detect and resolve conversion deadlocks.
3388+ */
3389+
10d56c87 3390+static int can_be_granted(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 3391+{
10d56c87
AM
3392+ if (test_bit(LSFL_NOCONVGRANT, &rsb->res_ls->ls_flags) &&
3393+ lkb->lkb_grmode == DLM_LOCK_IV &&
3394+ !list_empty(&rsb->res_convertqueue))
3395+ return FALSE;
3396+
3397+ if (lkb->lkb_rqmode == DLM_LOCK_NL)
4bf12011 3398+ return TRUE;
3399+
3400+ if (lkb->lkb_rqmode == lkb->lkb_grmode)
3401+ return TRUE;
3402+
3403+ if (queue_conflict(&rsb->res_grantqueue, lkb))
3404+ return FALSE;
3405+
3406+ if (!queue_conflict(&rsb->res_convertqueue, lkb)) {
3407+ if (!(lkb->lkb_lockqueue_flags & DLM_LKF_QUECVT))
3408+ return TRUE;
3409+
3410+ if (list_empty(&rsb->res_convertqueue) ||
3411+ first_in_list(lkb, &rsb->res_convertqueue) ||
3412+ queuecvt_deadlock_detect(rsb, lkb))
3413+ return TRUE;
3414+ else
3415+ return FALSE;
3416+ }
3417+
3418+ /* there *is* a conflict between this lkb and a converting lock so
3419+ we return false unless conversion deadlock resolution is permitted
3420+ (only conversion requests will have the CONVDEADLK flag set) */
3421+
3422+ if (!(lkb->lkb_lockqueue_flags & DLM_LKF_CONVDEADLK))
3423+ return FALSE;
3424+
3425+ if (!conversion_deadlock_detect(rsb, lkb))
3426+ return FALSE;
3427+
3428+ if (conversion_deadlock_resolve(rsb, lkb))
3429+ return TRUE;
3430+
3431+ return FALSE;
3432+}
3433+
3434+int dlm_lock(void *lockspace,
3435+ uint32_t mode,
3436+ struct dlm_lksb *lksb,
3437+ uint32_t flags,
3438+ void *name,
3439+ unsigned int namelen,
3440+ uint32_t parent,
3441+ void (*ast) (void *astarg),
3442+ void *astarg,
3443+ void (*bast) (void *astarg, int mode),
3444+ struct dlm_range *range)
3445+{
10d56c87
AM
3446+ struct dlm_ls *lspace;
3447+ struct dlm_lkb *lkb = NULL, *parent_lkb = NULL;
4bf12011 3448+ int ret = -EINVAL;
3449+
3450+ lspace = find_lockspace_by_local_id(lockspace);
3451+ if (!lspace)
3452+ goto out;
3453+
3454+ if (mode < 0 || mode > DLM_LOCK_EX)
3455+ goto out;
3456+
3457+ if (namelen > DLM_RESNAME_MAXLEN)
3458+ goto out;
3459+
3460+ if (flags & DLM_LKF_CANCEL)
3461+ goto out;
3462+
3463+ if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT))
3464+ goto out;
3465+
3466+ if (flags & DLM_LKF_EXPEDITE && !(flags & DLM_LKF_CONVERT))
3467+ goto out;
3468+
3469+ if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT)
3470+ goto out;
3471+
3472+ if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE)
3473+ goto out;
3474+
3475+ if (!ast || !lksb)
3476+ goto out;
3477+
3478+ if (!lksb->sb_lvbptr && (flags & DLM_LKF_VALBLK))
3479+ goto out;
3480+
3481+ if ((flags & DLM_LKF_VALBLK) && !lksb->sb_lvbptr)
3482+ goto out;
3483+
10d56c87 3484+ /*
4bf12011 3485+ * Take conversion path.
3486+ */
3487+
3488+ if (flags & DLM_LKF_CONVERT) {
3489+ ret = convert_lock(lspace, mode, lksb, flags, ast, astarg,
3490+ bast, range);
3491+ goto out;
3492+ }
3493+
10d56c87 3494+ /*
4bf12011 3495+ * Take new lock path.
3496+ */
3497+
3498+ if (parent) {
3499+ down_read(&lspace->ls_unlock_sem);
3500+
3501+ parent_lkb = find_lock_by_id(lspace, parent);
3502+
3503+ if (!parent_lkb ||
3504+ parent_lkb->lkb_flags & GDLM_LKFLG_DELETED ||
3505+ parent_lkb->lkb_flags & GDLM_LKFLG_MSTCPY ||
3506+ parent_lkb->lkb_status != GDLM_LKSTS_GRANTED) {
3507+ up_read(&lspace->ls_unlock_sem);
3508+ goto out;
3509+ }
3510+
3511+ atomic_inc(&parent_lkb->lkb_childcnt);
3512+ up_read(&lspace->ls_unlock_sem);
3513+ }
3514+
3515+ down_read(&lspace->ls_in_recovery);
3516+
3517+ ret = -ENOMEM;
3518+
3519+ lkb = create_lkb(lspace);
3520+ if (!lkb)
3521+ goto fail_dec;
3522+ lkb->lkb_astaddr = ast;
3523+ lkb->lkb_astparam = (long) astarg;
3524+ lkb->lkb_bastaddr = bast;
3525+ lkb->lkb_rqmode = mode;
3526+ lkb->lkb_grmode = DLM_LOCK_IV;
10d56c87 3527+ lkb->lkb_nodeid = -1;
4bf12011 3528+ lkb->lkb_lksb = lksb;
3529+ lkb->lkb_parent = parent_lkb;
3530+ lkb->lkb_lockqueue_flags = flags;
3531+ lkb->lkb_lvbptr = lksb->sb_lvbptr;
3532+
3533+ /* Copy the range if appropriate */
3534+ if (range) {
3535+ if (range->ra_start > range->ra_end) {
3536+ ret = -EINVAL;
3537+ goto fail_free;
3538+ }
3539+
3540+ if (lkb_set_range(lspace, lkb, range->ra_start, range->ra_end))
3541+ goto fail_free;
3542+ }
3543+
3544+ /* Convert relevant flags to internal numbers */
3545+ if (flags & DLM_LKF_VALBLK)
3546+ lkb->lkb_flags |= GDLM_LKFLG_VALBLK;
3547+ if (flags & DLM_LKF_PERSISTENT)
3548+ lkb->lkb_flags |= GDLM_LKFLG_PERSISTENT;
3549+ if (flags & DLM_LKF_NODLCKWT)
3550+ lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT;
3551+
3552+ lksb->sb_lkid = lkb->lkb_id;
3553+
3554+ ret = dlm_lock_stage1(lspace, lkb, flags, name, namelen);
3555+ if (ret)
3556+ goto fail_free;
3557+
3558+ up_read(&lspace->ls_in_recovery);
3559+
3560+ wake_astd();
3561+
3562+ return 0;
3563+
3564+ fail_free:
3565+ release_lkb(lspace, lkb);
3566+ goto fail_unlock;
3567+
3568+ fail_dec:
3569+ if (parent_lkb)
3570+ atomic_dec(&parent_lkb->lkb_childcnt);
3571+
3572+ fail_unlock:
3573+ up_read(&lspace->ls_in_recovery);
3574+
3575+ out:
3576+ return ret;
3577+}
3578+
10d56c87 3579+int dlm_lock_stage1(struct dlm_ls *ls, struct dlm_lkb *lkb, int flags, char *name,
4bf12011 3580+ int namelen)
3581+{
10d56c87
AM
3582+ struct dlm_rsb *rsb, *parent_rsb = NULL;
3583+ struct dlm_lkb *parent_lkb = lkb->lkb_parent;
4bf12011 3584+ uint32_t nodeid;
3585+ int error;
3586+
3587+ if (parent_lkb)
3588+ parent_rsb = parent_lkb->lkb_resource;
3589+
3590+ error = find_or_create_rsb(ls, parent_rsb, name, namelen, 1, &rsb);
3591+ if (error)
3592+ goto out;
4bf12011 3593+ lkb->lkb_resource = rsb;
4bf12011 3594+
10d56c87
AM
3595+ log_debug(ls, "rq %u %x \"%s\"", lkb->lkb_rqmode, lkb->lkb_id,
3596+ rsb->res_name);
3597+ /*
4bf12011 3598+ * Next stage, do we need to find the master or can
3599+ * we get on with the real locking work ?
3600+ */
3601+
3602+ if (rsb->res_nodeid == -1) {
3603+ if (get_directory_nodeid(rsb) != our_nodeid()) {
3604+ error = remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB);
3605+ goto out;
3606+ }
3607+
10d56c87
AM
3608+ error = dlm_dir_lookup(ls, our_nodeid(), rsb->res_name,
3609+ rsb->res_length, &nodeid);
4bf12011 3610+ if (error)
3611+ goto out;
3612+
10d56c87
AM
3613+ if (nodeid == our_nodeid()) {
3614+ set_bit(RESFL_MASTER, &rsb->res_flags);
4bf12011 3615+ nodeid = 0;
10d56c87
AM
3616+ } else
3617+ clear_bit(RESFL_MASTER, &rsb->res_flags);
4bf12011 3618+ rsb->res_nodeid = nodeid;
4bf12011 3619+ }
3620+
10d56c87
AM
3621+ lkb->lkb_nodeid = rsb->res_nodeid;
3622+
4bf12011 3623+ error = dlm_lock_stage2(ls, lkb, rsb, flags);
3624+
3625+ out:
3626+ if (error)
3627+ release_rsb(rsb);
3628+
3629+ return error;
3630+}
3631+
10d56c87 3632+/*
4bf12011 3633+ * Locking routine called after we have an RSB, either a copy of a remote one
3634+ * or a local one, or perhaps a shiny new one all of our very own
3635+ */
3636+
10d56c87 3637+int dlm_lock_stage2(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_rsb *rsb, int flags)
4bf12011 3638+{
3639+ int error = 0;
3640+
10d56c87
AM
3641+ DLM_ASSERT(rsb->res_nodeid != -1, print_lkb(lkb); print_rsb(rsb););
3642+
4bf12011 3643+ if (rsb->res_nodeid) {
3644+ res_lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING);
3645+ error = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONDGRANT);
3646+ } else {
3647+ dlm_lock_stage3(lkb);
3648+ }
3649+
3650+ return error;
3651+}
3652+
10d56c87 3653+/*
4bf12011 3654+ * Called on an RSB's master node to do stage2 locking for a remote lock
3655+ * request. Returns a proper lkb with rsb ready for lock processing.
3656+ * This is analagous to sections of dlm_lock() and dlm_lock_stage1().
3657+ */
3658+
10d56c87
AM
3659+struct dlm_lkb *remote_stage2(int remote_nodeid, struct dlm_ls *ls,
3660+ struct dlm_request *freq)
4bf12011 3661+{
10d56c87
AM
3662+ struct dlm_rsb *rsb = NULL, *parent_rsb = NULL;
3663+ struct dlm_lkb *lkb = NULL, *parent_lkb = NULL;
4bf12011 3664+ int error, namelen;
3665+
3666+ if (freq->rr_remparid) {
3667+ parent_lkb = find_lock_by_id(ls, freq->rr_remparid);
3668+ if (!parent_lkb)
3669+ goto fail;
3670+
3671+ atomic_inc(&parent_lkb->lkb_childcnt);
3672+ parent_rsb = parent_lkb->lkb_resource;
3673+ }
3674+
10d56c87 3675+ /*
4bf12011 3676+ * A new MSTCPY lkb. Initialize lkb fields including the real lkid and
3677+ * node actually holding the (non-MSTCPY) lkb. AST address are just
3678+ * flags in the master copy.
3679+ */
3680+
3681+ lkb = create_lkb(ls);
3682+ if (!lkb)
3683+ goto fail_dec;
3684+ lkb->lkb_grmode = DLM_LOCK_IV;
3685+ lkb->lkb_rqmode = freq->rr_rqmode;
3686+ lkb->lkb_parent = parent_lkb;
5cdbd17b
AM
3687+ lkb->lkb_astaddr = (void *) (long) (freq->rr_asts & AST_COMP);
3688+ lkb->lkb_bastaddr = (void *) (long) (freq->rr_asts & AST_BAST);
4bf12011 3689+ lkb->lkb_nodeid = remote_nodeid;
3690+ lkb->lkb_remid = freq->rr_header.rh_lkid;
3691+ lkb->lkb_flags = GDLM_LKFLG_MSTCPY;
3692+ lkb->lkb_lockqueue_flags = freq->rr_flags;
3693+
3694+ if (lkb->lkb_lockqueue_flags & DLM_LKF_VALBLK) {
3695+ lkb->lkb_flags |= GDLM_LKFLG_VALBLK;
3696+ allocate_and_copy_lvb(ls, &lkb->lkb_lvbptr, freq->rr_lvb);
3697+ if (!lkb->lkb_lvbptr)
3698+ goto fail_free;
3699+ }
3700+
3701+ if (lkb->lkb_lockqueue_flags & GDLM_LKFLG_RANGE) {
3702+ error = lkb_set_range(ls, lkb, freq->rr_range_start,
3703+ freq->rr_range_end);
3704+ if (error)
3705+ goto fail_free;
3706+ }
3707+
10d56c87 3708+ /*
4bf12011 3709+ * Get the RSB which this lock is for. Create a new RSB if this is a
3710+ * new lock on a new resource. We must be the master of any new rsb.
3711+ */
3712+
3713+ namelen = freq->rr_header.rh_length - sizeof(*freq) + 1;
3714+
10d56c87 3715+ error = find_or_create_rsb(ls, parent_rsb, freq->rr_name, namelen, 0,
4bf12011 3716+ &rsb);
3717+ if (error)
3718+ goto fail_free;
3719+
10d56c87
AM
3720+ if (!rsb || rsb->res_nodeid == -1) {
3721+ log_debug(ls, "inval rsb to %u", remote_nodeid);
3722+ lkb->lkb_retstatus = -EINVAL;
3723+ goto out;
3724+ }
3725+
4bf12011 3726+ lkb->lkb_resource = rsb;
4bf12011 3727+
10d56c87
AM
3728+ log_debug(ls, "rq %u from %u %x \"%s\"", lkb->lkb_rqmode, remote_nodeid,
3729+ lkb->lkb_id, rsb->res_name);
3730+
3731+ DLM_ASSERT(rsb->res_nodeid == 0,
3732+ print_lkb(lkb);
3733+ print_request(freq);
3734+ printk("nodeid %u\n", remote_nodeid););
4bf12011 3735+
10d56c87
AM
3736+ out:
3737+ return lkb;
4bf12011 3738+
3739+ fail_free:
3740+ /* release_lkb handles parent */
3741+ release_lkb(ls, lkb);
3742+ parent_lkb = NULL;
3743+
3744+ fail_dec:
3745+ if (parent_lkb)
3746+ atomic_dec(&parent_lkb->lkb_childcnt);
3747+ fail:
3748+ return NULL;
3749+}
3750+
10d56c87 3751+/*
4bf12011 3752+ * The final bit of lock request processing on the master node. Here the lock
3753+ * is granted and the completion ast is queued, or the lock is put on the
3754+ * waitqueue and blocking asts are sent.
3755+ */
3756+
10d56c87 3757+void dlm_lock_stage3(struct dlm_lkb *lkb)
4bf12011 3758+{
10d56c87 3759+ struct dlm_rsb *rsb = lkb->lkb_resource;
4bf12011 3760+
10d56c87 3761+ /*
4bf12011 3762+ * This is a locally mastered lock on a resource that already exists,
3763+ * see if it can be granted or if it must wait. When this function is
3764+ * called for a remote lock request (process_cluster_request,
3765+ * REMCMD_LOCKREQUEST), the result from grant_lock is returned to the
3766+ * requesting node at the end of process_cluster_request, not at the
3767+ * end of grant_lock.
3768+ */
3769+
3770+ down_write(&rsb->res_lock);
3771+
3772+ if (can_be_granted(rsb, lkb)) {
3773+ grant_lock(lkb, 0);
3774+ goto out;
3775+ }
3776+
10d56c87 3777+ /*
4bf12011 3778+ * This request is not a conversion, so the lkb didn't exist other than
3779+ * for this request and should be freed after EAGAIN is returned in the
3780+ * ast.
3781+ */
3782+
3783+ if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) {
4bf12011 3784+ lkb->lkb_retstatus = -EAGAIN;
4bf12011 3785+ if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST)
3786+ send_blocking_asts_all(rsb, lkb);
5cdbd17b 3787+ queue_ast(lkb, AST_COMP | AST_DEL, 0);
4bf12011 3788+ goto out;
3789+ }
3790+
10d56c87 3791+ /*
4bf12011 3792+ * The requested lkb must wait. Because the rsb of the requested lkb
3793+ * is mastered here, send blocking asts for the lkb's blocking the
3794+ * request.
3795+ */
3796+
3797+ lkb->lkb_retstatus = 0;
3798+ lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING);
3799+
3800+ send_blocking_asts(rsb, lkb);
3801+
3802+ out:
3803+ up_write(&rsb->res_lock);
3804+}
3805+
3806+int dlm_unlock(void *lockspace,
3807+ uint32_t lkid,
3808+ uint32_t flags,
3809+ struct dlm_lksb *lksb,
3810+ void *astarg)
3811+{
10d56c87
AM
3812+ struct dlm_ls *ls = find_lockspace_by_local_id(lockspace);
3813+ struct dlm_lkb *lkb;
3814+ struct dlm_rsb *rsb;
4bf12011 3815+ int ret = -EINVAL;
3816+
3817+ if (!ls)
3818+ goto out;
3819+
3820+ lkb = find_lock_by_id(ls, lkid);
3821+ if (!lkb)
3822+ goto out;
3823+
3824+ /* Can't dequeue a master copy (a remote node's mastered lock) */
3825+ if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY)
3826+ goto out;
3827+
3828+ /* Already waiting for a remote lock operation */
3829+ if (lkb->lkb_lockqueue_state) {
3830+ ret = -EBUSY;
3831+ goto out;
3832+ }
3833+
3834+ /* Can only cancel WAITING or CONVERTing locks.
3835+ * This is just a quick check - it is also checked in unlock_stage2()
3836+ * (which may be on the master) under the semaphore.
3837+ */
3838+ if ((flags & DLM_LKF_CANCEL) &&
3839+ (lkb->lkb_status == GDLM_LKSTS_GRANTED))
3840+ goto out;
3841+
3842+ /* "Normal" unlocks must operate on a granted lock */
3843+ if (!(flags & DLM_LKF_CANCEL) &&
3844+ (lkb->lkb_status != GDLM_LKSTS_GRANTED))
3845+ goto out;
3846+
3847+ down_write(&ls->ls_unlock_sem);
4bf12011 3848+ /* Can't dequeue a lock with sublocks */
3849+ if (atomic_read(&lkb->lkb_childcnt)) {
3850+ up_write(&ls->ls_unlock_sem);
3851+ ret = -ENOTEMPTY;
3852+ goto out;
3853+ }
4bf12011 3854+ /* Mark it as deleted so we can't use it as a parent in dlm_lock() */
3855+ if (!(flags & DLM_LKF_CANCEL))
3856+ lkb->lkb_flags |= GDLM_LKFLG_DELETED;
3857+ up_write(&ls->ls_unlock_sem);
3858+
10d56c87
AM
3859+ down_read(&ls->ls_in_recovery);
3860+ rsb = find_rsb_to_unlock(ls, lkb);
3861+
3862+ log_debug(ls, "un %x ref %u flg %x nodeid %d/%d \"%s\"", lkb->lkb_id,
3863+ atomic_read(&rsb->res_ref), rsb->res_flags,
3864+ lkb->lkb_nodeid, rsb->res_nodeid, rsb->res_name);
3865+
4bf12011 3866+ /* Save any new params */
3867+ if (lksb)
3868+ lkb->lkb_lksb = lksb;
3869+ if (astarg)
3870+ lkb->lkb_astparam = (long) astarg;
4bf12011 3871+ lkb->lkb_lockqueue_flags = flags;
3872+
10d56c87 3873+ if (lkb->lkb_nodeid)
4bf12011 3874+ ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_UNLOCK);
3875+ else
10d56c87 3876+ ret = dlm_unlock_stage2(lkb, rsb, flags);
4bf12011 3877+ up_read(&ls->ls_in_recovery);
3878+
3879+ wake_astd();
3880+
3881+ out:
3882+ return ret;
3883+}
3884+
10d56c87 3885+int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags)
4bf12011 3886+{
4bf12011 3887+ int remote = lkb->lkb_flags & GDLM_LKFLG_MSTCPY;
10d56c87 3888+ int old_status;
4bf12011 3889+
3890+ down_write(&rsb->res_lock);
3891+
3892+ /* Can only cancel WAITING or CONVERTing locks */
3893+ if ((flags & DLM_LKF_CANCEL) &&
3894+ (lkb->lkb_status == GDLM_LKSTS_GRANTED)) {
3895+ lkb->lkb_retstatus = -EINVAL;
5cdbd17b 3896+ queue_ast(lkb, AST_COMP, 0);
4bf12011 3897+ goto out;
3898+ }
3899+
3900+ old_status = lkb_dequeue(lkb);
3901+
10d56c87 3902+ /*
4bf12011 3903+ * If was granted grant any converting or waiting locks.
3904+ */
3905+
3906+ if (old_status == GDLM_LKSTS_GRANTED)
3907+ grant_pending_locks(rsb);
3908+
10d56c87 3909+ /*
4bf12011 3910+ * Cancelling a conversion
3911+ */
3912+
3913+ if ((old_status == GDLM_LKSTS_CONVERT) && (flags & DLM_LKF_CANCEL)) {
3914+ /* VMS semantics say we should send blocking ASTs again here */
3915+ send_blocking_asts(rsb, lkb);
3916+
3917+ /* Remove from deadlock detection */
3918+ if (lkb->lkb_duetime)
3919+ remove_from_deadlockqueue(lkb);
3920+
3921+ /* Stick it back on the granted queue */
3922+ lkb_enqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
3923+ lkb->lkb_rqmode = lkb->lkb_grmode;
3924+
3925+ /* Was it blocking any other locks? */
3926+ if (first_in_list(lkb, &rsb->res_convertqueue))
3927+ grant_pending_locks(rsb);
3928+
3929+ lkb->lkb_retstatus = -DLM_ECANCEL;
5cdbd17b 3930+ queue_ast(lkb, AST_COMP, 0);
4bf12011 3931+ goto out;
3932+ }
3933+
10d56c87 3934+ /*
4bf12011 3935+ * The lvb can be saved or cleared on unlock.
3936+ */
3937+
3938+ if (rsb->res_lvbptr && (lkb->lkb_grmode >= DLM_LOCK_PW)) {
3939+ if ((flags & DLM_LKF_VALBLK) && lkb->lkb_lvbptr)
3940+ memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN);
3941+ if (flags & DLM_LKF_IVVALBLK)
3942+ memset(rsb->res_lvbptr, 0, DLM_LVB_LEN);
3943+ }
3944+
5cdbd17b 3945+ lkb->lkb_retstatus = flags & DLM_LKF_CANCEL ? -DLM_ECANCEL:-DLM_EUNLOCK;
4bf12011 3946+
10d56c87
AM
3947+ if (!remote)
3948+ queue_ast(lkb, AST_COMP | AST_DEL, 0);
3949+
3950+ /*
4bf12011 3951+ * Only free the LKB if we are the master copy. Otherwise the AST
10d56c87 3952+ * delivery routine will free it after delivery.
4bf12011 3953+ */
3954+
3955+ if (remote) {
3956+ up_write(&rsb->res_lock);
3957+ release_lkb(rsb->res_ls, lkb);
3958+ release_rsb(rsb);
3959+ goto out2;
3960+ }
3961+
3962+ out:
3963+ up_write(&rsb->res_lock);
3964+ out2:
3965+ wake_astd();
3966+ return 0;
3967+}
3968+
10d56c87 3969+/*
4bf12011 3970+ * Lock conversion
3971+ */
3972+
10d56c87 3973+static int convert_lock(struct dlm_ls *ls, int mode, struct dlm_lksb *lksb,
4bf12011 3974+ int flags, void *ast, void *astarg, void *bast,
3975+ struct dlm_range *range)
3976+{
10d56c87
AM
3977+ struct dlm_lkb *lkb;
3978+ struct dlm_rsb *rsb;
4bf12011 3979+ int ret = -EINVAL;
3980+
3981+ lkb = find_lock_by_id(ls, lksb->sb_lkid);
3982+ if (!lkb) {
3983+ goto out;
3984+ }
3985+
3986+ if (lkb->lkb_status != GDLM_LKSTS_GRANTED) {
3987+ ret = -EBUSY;
3988+ goto out;
3989+ }
3990+
3991+ if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) {
3992+ goto out;
3993+ }
3994+
3995+ if ((flags & DLM_LKF_QUECVT) &&
3996+ !__quecvt_compat_matrix[lkb->lkb_grmode + 1][mode + 1]) {
3997+ goto out;
3998+ }
3999+
4000+ if (!lksb->sb_lvbptr && (flags & DLM_LKF_VALBLK)) {
4001+ goto out;
4002+ }
4003+
4004+ if ((flags & DLM_LKF_VALBLK) && !lksb->sb_lvbptr) {
4005+ goto out;
4006+ }
4007+
4008+ /* Set up the ranges as appropriate */
4009+ if (range) {
4010+ if (range->ra_start > range->ra_end)
4011+ goto out;
4012+
4013+ if (lkb_set_range(ls, lkb, range->ra_start, range->ra_end)) {
4014+ ret = -ENOMEM;
4015+ goto out;
4016+ }
4017+ }
4018+
4019+ rsb = lkb->lkb_resource;
10d56c87
AM
4020+ down_read(&ls->ls_in_recovery);
4021+
4022+ log_debug(ls, "cv %u %x \"%s\"", mode, lkb->lkb_id, rsb->res_name);
4bf12011 4023+
4024+ lkb->lkb_flags &= ~GDLM_LKFLG_VALBLK;
4025+ lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED;
4026+
4027+ if (flags & DLM_LKF_NODLCKWT)
4028+ lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT;
4029+ if (ast)
4030+ lkb->lkb_astaddr = ast;
4031+ if (astarg)
4032+ lkb->lkb_astparam = (long) astarg;
4033+ if (bast)
4034+ lkb->lkb_bastaddr = bast;
4035+ lkb->lkb_rqmode = mode;
4036+ lkb->lkb_lockqueue_flags = flags;
4037+ lkb->lkb_flags |= (flags & DLM_LKF_VALBLK) ? GDLM_LKFLG_VALBLK : 0;
4038+ lkb->lkb_lvbptr = lksb->sb_lvbptr;
4039+
4040+ if (rsb->res_nodeid) {
4041+ res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT);
4042+ ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONVERT);
4043+ } else {
4044+ ret = dlm_convert_stage2(lkb, FALSE);
4045+ }
4046+
10d56c87 4047+ up_read(&ls->ls_in_recovery);
4bf12011 4048+
4049+ wake_astd();
4050+
4051+ out:
4052+ return ret;
4053+}
4054+
10d56c87 4055+/*
4bf12011 4056+ * For local conversion requests on locally mastered locks this is called
4057+ * directly from dlm_lock/convert_lock. This function is also called for
4058+ * remote conversion requests of MSTCPY locks (from process_cluster_request).
4059+ */
4060+
10d56c87 4061+int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast)
4bf12011 4062+{
10d56c87 4063+ struct dlm_rsb *rsb = lkb->lkb_resource;
4bf12011 4064+ int ret = 0;
4065+
4066+ down_write(&rsb->res_lock);
4067+
4068+ if (can_be_granted(rsb, lkb)) {
4069+ grant_lock(lkb, 0);
4070+ grant_pending_locks(rsb);
4071+ goto out;
4072+ }
4073+
10d56c87 4074+ /*
4bf12011 4075+ * Remove lkb from granted queue.
4076+ */
4077+
4078+ lkb_dequeue(lkb);
4079+
10d56c87 4080+ /*
4bf12011 4081+ * The user won't wait so stick it back on the grant queue
4082+ */
4083+
4084+ if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) {
4085+ lkb_enqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
4086+ ret = lkb->lkb_retstatus = -EAGAIN;
4087+ if (do_ast)
5cdbd17b 4088+ queue_ast(lkb, AST_COMP, 0);
4bf12011 4089+ if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST)
4090+ send_blocking_asts_all(rsb, lkb);
4091+ goto out;
4092+ }
4093+
10d56c87 4094+ /*
4bf12011 4095+ * The lkb's status tells which queue it's on. Put back on convert
4096+ * queue. (QUECVT requests added at end of the queue, all others in
4097+ * order.)
4098+ */
4099+
4100+ lkb->lkb_retstatus = 0;
4101+ lkb_enqueue(rsb, lkb, GDLM_LKSTS_CONVERT);
4102+
10d56c87 4103+ /*
4bf12011 4104+ * If the request can't be granted
4105+ */
4106+
4107+ send_blocking_asts(rsb, lkb);
4108+
4109+ if (!(lkb->lkb_flags & GDLM_LKFLG_NODLCKWT))
4110+ add_to_deadlockqueue(lkb);
4111+
4112+ out:
4113+ up_write(&rsb->res_lock);
4114+ return ret;
4115+}
4116+
10d56c87 4117+/*
4bf12011 4118+ * Remove lkb from any queue it's on, add it to the granted queue, and queue a
4119+ * completion ast. rsb res_lock must be held in write when this is called.
4120+ */
4121+
10d56c87 4122+static void grant_lock(struct dlm_lkb *lkb, int send_remote)
4bf12011 4123+{
10d56c87 4124+ struct dlm_rsb *rsb = lkb->lkb_resource;
4bf12011 4125+
4126+ if (lkb->lkb_duetime)
4127+ remove_from_deadlockqueue(lkb);
4128+
4129+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) {
4130+ int b;
10d56c87 4131+ DLM_ASSERT(lkb->lkb_lvbptr,);
4bf12011 4132+
4133+ if (!rsb->res_lvbptr)
4134+ rsb->res_lvbptr = allocate_lvb(rsb->res_ls);
4135+
4136+ b = __lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1];
4137+ if (b)
4138+ memcpy(lkb->lkb_lvbptr, rsb->res_lvbptr, DLM_LVB_LEN);
4139+ else
4140+ memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN);
4141+ }
4142+
4143+ if (lkb->lkb_range) {
4144+ lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START];
4145+ lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END];
4146+ }
4147+
4148+ lkb->lkb_grmode = lkb->lkb_rqmode;
4149+ lkb->lkb_rqmode = DLM_LOCK_IV;
4150+ lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
4151+
4152+ lkb->lkb_highbast = 0;
4153+ lkb->lkb_retstatus = 0;
5cdbd17b 4154+ queue_ast(lkb, AST_COMP, 0);
4bf12011 4155+
10d56c87 4156+ /*
4bf12011 4157+ * A remote conversion request has been granted, either immediately
4158+ * upon being requested or after waiting a bit. In the former case,
4159+ * reply_and_grant() is called. In the later case send_remote is 1 and
4160+ * remote_grant() is called.
4161+ *
4162+ * The "send_remote" flag is set only for locks which are granted "out
4163+ * of band" - ie by another lock being converted or unlocked.
4164+ *
4165+ * The second case occurs when this lkb is granted right away as part
4166+ * of processing the initial request. In that case, we send a single
4167+ * message in reply_and_grant which combines the request reply with the
4168+ * grant message.
4169+ */
4170+
4171+ if ((lkb->lkb_flags & GDLM_LKFLG_MSTCPY) && lkb->lkb_nodeid) {
4172+ if (send_remote)
4173+ remote_grant(lkb);
4174+ else if (lkb->lkb_request)
4175+ reply_and_grant(lkb);
4176+ }
4177+
4178+}
4179+
10d56c87 4180+static void send_bast_queue(struct list_head *head, struct dlm_lkb *lkb)
4bf12011 4181+{
10d56c87 4182+ struct dlm_lkb *gr;
4bf12011 4183+
4184+ list_for_each_entry(gr, head, lkb_statequeue) {
4185+ if (gr->lkb_bastaddr &&
4186+ gr->lkb_highbast < lkb->lkb_rqmode &&
4187+ ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) {
5cdbd17b 4188+ queue_ast(gr, AST_BAST, lkb->lkb_rqmode);
4bf12011 4189+ gr->lkb_highbast = lkb->lkb_rqmode;
4190+ }
4191+ }
4192+}
4193+
10d56c87 4194+/*
4bf12011 4195+ * Notify granted locks if they are blocking a newly forced-to-wait lock.
4196+ */
4197+
10d56c87 4198+static void send_blocking_asts(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 4199+{
4200+ send_bast_queue(&rsb->res_grantqueue, lkb);
4201+ /* check if the following improves performance */
4202+ /* send_bast_queue(&rsb->res_convertqueue, lkb); */
4203+}
4204+
10d56c87 4205+static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
4bf12011 4206+{
4207+ send_bast_queue(&rsb->res_grantqueue, lkb);
4208+ send_bast_queue(&rsb->res_convertqueue, lkb);
4209+}
4210+
10d56c87 4211+/*
4bf12011 4212+ * Called when a lock has been dequeued. Look for any locks to grant that are
4213+ * waiting for conversion or waiting to be granted.
4214+ * The rsb res_lock must be held in write when this function is called.
4215+ */
4216+
10d56c87 4217+int grant_pending_locks(struct dlm_rsb *rsb)
4bf12011 4218+{
10d56c87 4219+ struct dlm_lkb *lkb;
4bf12011 4220+ struct list_head *list;
4221+ struct list_head *temp;
4222+ int8_t high = DLM_LOCK_IV;
4223+
4224+ list_for_each_safe(list, temp, &rsb->res_convertqueue) {
10d56c87 4225+ lkb = list_entry(list, struct dlm_lkb, lkb_statequeue);
4bf12011 4226+
4227+ if (can_be_granted(rsb, lkb))
4228+ grant_lock(lkb, 1);
4229+ else
4230+ high = MAX(lkb->lkb_rqmode, high);
4231+ }
4232+
4233+ list_for_each_safe(list, temp, &rsb->res_waitqueue) {
10d56c87 4234+ lkb = list_entry(list, struct dlm_lkb, lkb_statequeue);
4bf12011 4235+
4236+ if (can_be_granted(rsb, lkb))
4237+ grant_lock(lkb, 1);
4238+ else
4239+ high = MAX(lkb->lkb_rqmode, high);
4240+ }
4241+
10d56c87 4242+ /*
4bf12011 4243+ * If there are locks left on the wait/convert queue then send blocking
4244+ * ASTs to granted locks that are blocking
4245+ *
4246+ * FIXME: This might generate some spurious blocking ASTs for range
4247+ * locks.
4248+ */
4249+
4250+ if (high > DLM_LOCK_IV) {
4251+ list_for_each_safe(list, temp, &rsb->res_grantqueue) {
10d56c87 4252+ lkb = list_entry(list, struct dlm_lkb, lkb_statequeue);
4bf12011 4253+
4254+ if (lkb->lkb_bastaddr &&
4255+ (lkb->lkb_highbast < high) &&
4256+ !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) {
4257+
5cdbd17b 4258+ queue_ast(lkb, AST_BAST, high);
4bf12011 4259+ lkb->lkb_highbast = high;
4260+ }
4261+ }
4262+ }
4263+
4264+ return 0;
4265+}
4266+
10d56c87 4267+/*
4bf12011 4268+ * Called to cancel a locking operation that failed due to some internal
4269+ * reason.
4270+ *
4271+ * Waiting locks will be removed, converting locks will be reverted to their
4272+ * granted status, unlocks will be left where they are.
4273+ *
4274+ * A completion AST will be delivered to the caller.
4275+ */
4276+
10d56c87 4277+int cancel_lockop(struct dlm_lkb *lkb, int status)
4bf12011 4278+{
4279+ int state = lkb->lkb_lockqueue_state;
5cdbd17b 4280+ uint16_t astflags = AST_COMP;
4bf12011 4281+
4282+ lkb->lkb_lockqueue_state = 0;
4283+
4284+ switch (state) {
4285+ case GDLM_LQSTATE_WAIT_RSB:
5cdbd17b 4286+ astflags |= AST_DEL;
4bf12011 4287+ break;
4288+
4289+ case GDLM_LQSTATE_WAIT_CONDGRANT:
4290+ res_lkb_dequeue(lkb);
5cdbd17b 4291+ astflags |= AST_DEL;
4bf12011 4292+ break;
4293+
4294+ case GDLM_LQSTATE_WAIT_CONVERT:
4295+ res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED);
4296+
4297+ /* Remove from deadlock detection */
4298+ if (lkb->lkb_duetime) {
4299+ remove_from_deadlockqueue(lkb);
4300+ }
4301+ break;
4302+
4303+ case GDLM_LQSTATE_WAIT_UNLOCK:
4304+ /* We can leave this. I think.... */
4305+ break;
4306+ }
4307+
4308+ lkb->lkb_retstatus = status;
5cdbd17b 4309+ queue_ast(lkb, astflags, 0);
4bf12011 4310+
4311+ return 0;
4312+}
4313+
10d56c87 4314+/*
4bf12011 4315+ * Check for conversion deadlock. If a deadlock was found
4316+ * return lkb to kill, else return NULL
4317+ */
4318+
10d56c87 4319+struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb)
4bf12011 4320+{
10d56c87 4321+ struct dlm_rsb *rsb = lkb->lkb_resource;
4bf12011 4322+ struct list_head *entry;
4323+
10d56c87 4324+ DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT,);
4bf12011 4325+
4326+ /* Work our way up to the head of the queue looking for locks that
4327+ * conflict with us */
4328+
4329+ down_read(&rsb->res_lock);
4330+
4331+ entry = lkb->lkb_statequeue.prev;
4332+ while (entry != &rsb->res_convertqueue) {
10d56c87 4333+ struct dlm_lkb *lkb2 = list_entry(entry, struct dlm_lkb, lkb_statequeue);
4bf12011 4334+
4335+ if (ranges_overlap(lkb, lkb2) && !modes_compat(lkb2, lkb)) {
4336+ up_read(&rsb->res_lock);
4337+ return lkb;
4338+ }
4339+ entry = entry->prev;
4340+ }
4341+ up_read(&rsb->res_lock);
4342+
4343+ return 0;
4344+}
4345+
10d56c87 4346+/*
4bf12011 4347+ * Conversion operation was cancelled by us (not the user).
4348+ * ret contains the return code to pass onto the user
4349+ */
4350+
10d56c87 4351+void cancel_conversion(struct dlm_lkb *lkb, int ret)
4bf12011 4352+{
10d56c87 4353+ struct dlm_rsb *rsb = lkb->lkb_resource;
4bf12011 4354+
4355+ /* Stick it back on the granted queue */
4356+ res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
4357+ lkb->lkb_rqmode = lkb->lkb_grmode;
4358+
4359+ remove_from_deadlockqueue(lkb);
4360+
4361+ lkb->lkb_retstatus = ret;
5cdbd17b 4362+ queue_ast(lkb, AST_COMP, 0);
4bf12011 4363+ wake_astd();
4364+}
4365+
10d56c87 4366+/*
4bf12011 4367+ * As new master of the rsb for this lkb, we need to handle these requests
4368+ * removed from the lockqueue and originating from local processes:
4369+ * GDLM_LQSTATE_WAIT_RSB, GDLM_LQSTATE_WAIT_CONDGRANT,
4370+ * GDLM_LQSTATE_WAIT_UNLOCK, GDLM_LQSTATE_WAIT_CONVERT.
4371+ */
4372+
10d56c87 4373+void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state)
4bf12011 4374+{
10d56c87
AM
4375+ struct dlm_rsb *rsb;
4376+
4bf12011 4377+ switch (state) {
4378+ case GDLM_LQSTATE_WAIT_RSB:
4379+ dlm_lock_stage1(lkb->lkb_resource->res_ls, lkb,
4380+ lkb->lkb_lockqueue_flags,
4381+ lkb->lkb_resource->res_name,
4382+ lkb->lkb_resource->res_length);
4383+ break;
4384+
4385+ case GDLM_LQSTATE_WAIT_CONDGRANT:
4386+ res_lkb_dequeue(lkb);
4387+ dlm_lock_stage3(lkb);
4388+ break;
4389+
4390+ case GDLM_LQSTATE_WAIT_UNLOCK:
10d56c87
AM
4391+ rsb = find_rsb_to_unlock(ls, lkb);
4392+ dlm_unlock_stage2(lkb, rsb, lkb->lkb_lockqueue_flags);
4bf12011 4393+ break;
4394+
4395+ case GDLM_LQSTATE_WAIT_CONVERT:
4396+ dlm_convert_stage2(lkb, TRUE);
4397+ break;
4398+
4399+ default:
10d56c87 4400+ DLM_ASSERT(0,);
4bf12011 4401+ }
4402+}
10d56c87
AM
4403+
4404+static void dump_queue(struct list_head *head)
4405+{
4406+ struct dlm_lkb *lkb;
4407+
4408+ list_for_each_entry(lkb, head, lkb_statequeue) {
4409+ printk("%08x gr %d rq %d flg %x sts %u node %u remid %x "
4410+ "lq %d,%x\n",
4411+ lkb->lkb_id,
4412+ lkb->lkb_grmode,
4413+ lkb->lkb_rqmode,
4414+ lkb->lkb_flags,
4415+ lkb->lkb_status,
4416+ lkb->lkb_nodeid,
4417+ lkb->lkb_remid,
4418+ lkb->lkb_lockqueue_state,
4419+ lkb->lkb_lockqueue_flags);
4420+ }
4421+}
4422+
4423+static void dump_rsb(struct dlm_rsb *rsb)
4424+{
4425+ printk("name \"%s\" flags %lx nodeid %u ref %u\n",
4426+ rsb->res_name, rsb->res_flags, rsb->res_nodeid,
4427+ atomic_read(&rsb->res_ref));
4428+
4429+ if (!list_empty(&rsb->res_grantqueue)) {
4430+ printk("grant queue\n");
4431+ dump_queue(&rsb->res_grantqueue);
4432+ }
4433+
4434+ if (!list_empty(&rsb->res_convertqueue)) {
4435+ printk("convert queue\n");
4436+ dump_queue(&rsb->res_convertqueue);
4437+ }
4438+
4439+ if (!list_empty(&rsb->res_waitqueue)) {
4440+ printk("wait queue\n");
4441+ dump_queue(&rsb->res_waitqueue);
4442+ }
4443+}
4444+
4445+void dlm_locks_dump(void)
4446+{
4447+ struct dlm_ls *ls;
4448+ struct dlm_rsb *rsb;
4449+ struct list_head *head;
4450+ int i;
4451+
4452+ list_for_each_entry(ls, &lslist, ls_list) {
4453+ for (i = 0; i < ls->ls_rsbtbl_size; i++) {
4454+ head = &ls->ls_rsbtbl[i].list;
4455+ list_for_each_entry(rsb, head, res_hashchain)
4456+ dump_rsb(rsb);
4457+ }
4458+ }
4459+}
4460+
4bf12011 4461diff -urN linux-orig/cluster/dlm/locking.h linux-patched/cluster/dlm/locking.h
4462--- linux-orig/cluster/dlm/locking.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
4463+++ linux-patched/cluster/dlm/locking.h 2004-07-13 18:57:22.000000000 +0800
4464@@ -0,0 +1,32 @@
4bf12011 4465+/******************************************************************************
4466+*******************************************************************************
4467+**
4468+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4469+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4470+**
4471+** This copyrighted material is made available to anyone wishing to use,
4472+** modify, copy, or redistribute it subject to the terms and conditions
4473+** of the GNU General Public License v.2.
4474+**
4475+*******************************************************************************
4476+******************************************************************************/
4477+
4478+#ifndef __LOCKING_DOT_H__
4479+#define __LOCKING_DOT_H__
4480+
10d56c87
AM
4481+void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state);
4482+void dlm_lock_stage3(struct dlm_lkb *lkb);
4483+int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast);
4484+int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags);
4485+int dlm_lock_stage2(struct dlm_ls *lspace, struct dlm_lkb *lkb, struct dlm_rsb *rsb, int flags);
4486+struct dlm_rsb *create_rsb(struct dlm_ls *lspace, struct dlm_lkb *lkb, char *name, int namelen);
4487+int free_rsb_if_unused(struct dlm_rsb *rsb);
4488+struct dlm_lkb *remote_stage2(int remote_csid, struct dlm_ls *lspace,
4489+ struct dlm_request *freq);
4490+int cancel_lockop(struct dlm_lkb *lkb, int status);
4491+int dlm_remove_lock(struct dlm_lkb *lkb, uint32_t flags);
4492+int grant_pending_locks(struct dlm_rsb *rsb);
4493+void cancel_conversion(struct dlm_lkb *lkb, int ret);
4494+struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb);
4bf12011 4495+
4496+#endif /* __LOCKING_DOT_H__ */
4497diff -urN linux-orig/cluster/dlm/lockqueue.c linux-patched/cluster/dlm/lockqueue.c
4498--- linux-orig/cluster/dlm/lockqueue.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
4499+++ linux-patched/cluster/dlm/lockqueue.c 2004-07-13 18:57:22.000000000 +0800
4500@@ -0,0 +1,1092 @@
4bf12011 4501+/******************************************************************************
4502+*******************************************************************************
4503+**
4504+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
4505+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
4506+**
4507+** This copyrighted material is made available to anyone wishing to use,
4508+** modify, copy, or redistribute it subject to the terms and conditions
4509+** of the GNU General Public License v.2.
4510+**
4511+*******************************************************************************
4512+******************************************************************************/
4513+
4514+/*
4515+ * lockqueue.c
4516+ *
4517+ * This controls the lock queue, which is where locks
4518+ * come when they need to wait for a remote operation
4519+ * to complete.
4520+ *
4521+ * This could also be thought of as the "high-level" comms
4522+ * layer.
4523+ *
4524+ */
4525+
4526+#include "dlm_internal.h"
4527+#include "lockqueue.h"
4528+#include "dir.h"
4529+#include "locking.h"
4530+#include "lkb.h"
4531+#include "lowcomms.h"
4532+#include "midcomms.h"
4533+#include "reccomms.h"
4534+#include "nodes.h"
4535+#include "lockspace.h"
4536+#include "ast.h"
4537+#include "memory.h"
4538+#include "rsb.h"
4539+#include "queries.h"
10d56c87 4540+#include "util.h"
4bf12011 4541+
10d56c87
AM
4542+static void add_reply_lvb(struct dlm_lkb * lkb, struct dlm_reply *reply);
4543+static void add_request_lvb(struct dlm_lkb * lkb, struct dlm_request *req);
4bf12011 4544+
4545+/*
4546+ * format of an entry on the request queue
4547+ */
4548+struct rq_entry {
4549+ struct list_head rqe_list;
4550+ uint32_t rqe_nodeid;
4551+ char rqe_request[1];
4552+};
4553+
4554+/*
4555+ * Add a new request (if appropriate) to the request queue and send the remote
4556+ * request out. - runs in the context of the locking caller
4557+ *
4558+ * Recovery of a remote_stage request if the remote end fails while the lkb
4559+ * is still on the lockqueue:
4560+ *
4561+ * o lkbs on the lockqueue are flagged with GDLM_LKFLG_LQRESEND in
4562+ * lockqueue_lkb_mark() at the start of recovery.
4563+ *
4564+ * o Some lkb's will be rebuilt on new master rsb's during recovery.
4565+ * (depends on the type of request, see below).
4566+ *
4567+ * o At the end of recovery, resend_cluster_requests() looks at these
4568+ * LQRESEND lkb's and either:
4569+ *
4570+ * i) resends the request to the new master for the rsb where the
4571+ * request is processed as usual. The lkb remains on the lockqueue until
4572+ * the new master replies and we run process_lockqueue_reply().
4573+ *
4574+ * ii) if we've become the rsb master, remove the lkb from the lockqueue
4575+ * and processes the request locally via process_remastered_lkb().
4576+ *
4577+ * GDLM_LQSTATE_WAIT_RSB (1) - these lockqueue lkb's are not on any rsb queue
4578+ * and the request should be resent if dest node is failed.
4579+ *
4580+ * GDLM_LQSTATE_WAIT_CONDGRANT (3) - this lockqueue lkb is on a local rsb's
4581+ * wait queue. Don't rebuild this lkb on a new master rsb (the NOREBUILD flag
4582+ * makes send_lkb_queue() skip it). Resend this request to the new master.
4583+ *
4584+ * GDLM_LQSTATE_WAIT_UNLOCK (4) - this lkb is on a local rsb's queue. It will
4585+ * be rebuilt on the rsb on the new master (restbl_lkb_send/send_lkb_queue).
4586+ * Resend this request to the new master.
4587+ *
4588+ * GDLM_LQSTATE_WAIT_CONVERT (2) - this lkb is on a local rsb convert queue.
4589+ * It will be rebuilt on the new master rsb's granted queue. Resend this
4590+ * request to the new master.
4591+ */
4592+
10d56c87 4593+int remote_stage(struct dlm_lkb *lkb, int state)
4bf12011 4594+{
4595+ int error;
4596+
4597+ lkb->lkb_lockqueue_state = state;
4598+ add_to_lockqueue(lkb);
4599+
4600+ error = send_cluster_request(lkb, state);
4601+ if (error < 0) {
4602+ log_print("remote_stage error sending request %d", error);
4603+
4604+ /* Leave on lockqueue, it will be resent to correct node during
4605+ * recovery. */
4606+
4607+ /*
4608+ lkb->lkb_lockqueue_state = 0;
4609+ remove_from_lockqueue(lkb);
4610+ return -ENOTCONN;
4611+ */
4612+ }
4613+ return 0;
4614+}
4615+
4616+/*
4617+ * Requests received while the lockspace is in recovery get added to the
4618+ * request queue and processed when recovery is complete.
4619+ */
4620+
10d56c87 4621+void add_to_requestqueue(struct dlm_ls *ls, int nodeid, char *request, int length)
4bf12011 4622+{
4623+ struct rq_entry *entry;
4624+
4625+ if (in_nodes_gone(ls, nodeid))
4626+ return;
4627+
4628+ entry = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
4629+ if (!entry) {
4630+ // TODO something better
4631+ printk("dlm: add_to_requestqueue: out of memory\n");
4632+ return;
4633+ }
4634+
4635+ log_debug(ls, "add_to_requestqueue %d", nodeid);
4636+ entry->rqe_nodeid = nodeid;
4637+ memcpy(entry->rqe_request, request, length);
4638+ list_add_tail(&entry->rqe_list, &ls->ls_requestqueue);
4639+}
4640+
10d56c87 4641+int process_requestqueue(struct dlm_ls *ls)
4bf12011 4642+{
4643+ int error = 0, count = 0;
4644+ struct rq_entry *entry, *safe;
10d56c87 4645+ struct dlm_header *req;
4bf12011 4646+
4647+ log_all(ls, "process held requests");
4648+
4649+ list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) {
10d56c87 4650+ req = (struct dlm_header *) entry->rqe_request;
4bf12011 4651+ log_debug(ls, "process_requestqueue %u", entry->rqe_nodeid);
4652+
4653+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) {
4654+ log_debug(ls, "process_requestqueue aborted");
4655+ error = -EINTR;
4656+ break;
4657+ }
4658+
4659+ error = process_cluster_request(entry->rqe_nodeid, req, TRUE);
4660+ if (error == -EINTR) {
4661+ log_debug(ls, "process_requestqueue interrupted");
4662+ break;
4663+ }
4664+
4665+ list_del(&entry->rqe_list);
4666+ kfree(entry);
4667+ count++;
4668+ error = 0;
4669+ }
4670+
4671+ log_all(ls, "processed %d requests", count);
4672+ return error;
4673+}
4674+
10d56c87 4675+void wait_requestqueue(struct dlm_ls *ls)
4bf12011 4676+{
4677+ while (!list_empty(&ls->ls_requestqueue) &&
4678+ test_bit(LSFL_LS_RUN, &ls->ls_flags))
4679+ schedule();
4680+}
4681+
4682+/*
4683+ * Resdir requests (lookup or remove) and replies from before recovery are
4684+ * invalid since the resdir was rebuilt. Clear them. Requests from nodes now
4685+ * gone are also invalid.
4686+ */
4687+
10d56c87 4688+void purge_requestqueue(struct dlm_ls *ls)
4bf12011 4689+{
4690+ int count = 0;
4691+ struct rq_entry *entry, *safe;
10d56c87
AM
4692+ struct dlm_header *req;
4693+ struct dlm_request *freq;
4694+ struct dlm_lkb *lkb;
4bf12011 4695+
4696+ log_all(ls, "purge requests");
4697+
4698+ list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) {
10d56c87
AM
4699+ req = (struct dlm_header *) entry->rqe_request;
4700+ freq = (struct dlm_request *) req;
4bf12011 4701+
4702+ if (req->rh_cmd == GDLM_REMCMD_REM_RESDATA ||
4703+ req->rh_cmd == GDLM_REMCMD_LOOKUP ||
4704+ in_nodes_gone(ls, entry->rqe_nodeid)) {
4705+
4706+ list_del(&entry->rqe_list);
4707+ kfree(entry);
4708+ count++;
4709+
4710+ } else if (req->rh_cmd == GDLM_REMCMD_LOCKREPLY) {
4711+
4712+ /*
4713+ * Replies to resdir lookups are invalid and must be
4714+ * purged. The lookup requests are marked in
4715+ * lockqueue_lkb_mark and will be resent in
4716+ * resend_cluster_requests. The only way to check if
4717+ * this is a lookup reply is to look at the
4718+ * lockqueue_state of the lkb.
4719+ */
4720+
4721+ lkb = find_lock_by_id(ls, freq->rr_header.rh_lkid);
10d56c87 4722+ DLM_ASSERT(lkb,);
4bf12011 4723+ if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) {
4724+ list_del(&entry->rqe_list);
4725+ kfree(entry);
4726+ count++;
4727+ }
4728+ }
4729+ }
4730+
4731+ log_all(ls, "purged %d requests", count);
4732+}
4733+
4734+/*
4735+ * Check if there's a reply for the given lkid in the requestqueue.
4736+ */
4737+
10d56c87 4738+int reply_in_requestqueue(struct dlm_ls *ls, int lkid)
4bf12011 4739+{
4740+ int rv = FALSE;
4741+ struct rq_entry *entry, *safe;
10d56c87
AM
4742+ struct dlm_header *req;
4743+ struct dlm_request *freq;
4bf12011 4744+
4745+ list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) {
10d56c87
AM
4746+ req = (struct dlm_header *) entry->rqe_request;
4747+ freq = (struct dlm_request *) req;
4bf12011 4748+
4749+ if (req->rh_cmd == GDLM_REMCMD_LOCKREPLY &&
4750+ freq->rr_header.rh_lkid == lkid) {
4751+ rv = TRUE;
4752+ break;
4753+ }
4754+ }
4755+
4756+ return rv;
4757+}
4758+
10d56c87 4759+void allocate_and_copy_lvb(struct dlm_ls *ls, char **lvbptr, char *src)
4bf12011 4760+{
4761+ if (!*lvbptr)
4762+ *lvbptr = allocate_lvb(ls);
4763+ if (*lvbptr)
4764+ memcpy(*lvbptr, src, DLM_LVB_LEN);
4765+}
4766+
4767+/*
4768+ * Process a lockqueue LKB after it has had it's remote processing complete and
10d56c87
AM
4769+ * been pulled from the lockqueue. Runs in the context of the DLM recvd thread
4770+ * on the machine that requested the lock.
4bf12011 4771+ */
4772+
10d56c87
AM
4773+static void process_lockqueue_reply(struct dlm_lkb *lkb,
4774+ struct dlm_reply *reply,
4775+ uint32_t nodeid)
4bf12011 4776+{
10d56c87
AM
4777+ struct dlm_rsb *rsb = lkb->lkb_resource;
4778+ struct dlm_ls *ls = rsb->res_ls;
4779+ int oldstate, state = lkb->lkb_lockqueue_state;
4bf12011 4780+
4781+ lkb->lkb_lockqueue_state = 0;
4782+ if (state)
4783+ remove_from_lockqueue(lkb);
4784+
4785+ switch (state) {
4786+ case GDLM_LQSTATE_WAIT_RSB:
4787+
10d56c87
AM
4788+ DLM_ASSERT(reply->rl_status == 0,
4789+ print_lkb(lkb);
4790+ print_rsb(rsb);
4791+ print_reply(reply););
4792+
4793+ DLM_ASSERT(rsb->res_nodeid == -1 ||
4794+ rsb->res_nodeid == 0,
4795+ print_lkb(lkb);
4796+ print_rsb(rsb);
4797+ print_reply(reply););
4798+
4799+ if (reply->rl_nodeid == our_nodeid()) {
4800+ if (rsb->res_nodeid == -1) {
4801+ set_bit(RESFL_MASTER, &rsb->res_flags);
4802+ rsb->res_nodeid = 0;
4803+ } else {
4804+ log_all(ls, "ignore master reply %x %u",
4805+ lkb->lkb_id, nodeid);
4806+ }
4807+ } else {
4808+ DLM_ASSERT(rsb->res_nodeid == -1,
4809+ print_lkb(lkb);
4810+ print_rsb(rsb);
4811+ print_reply(reply););
4bf12011 4812+
10d56c87 4813+ clear_bit(RESFL_MASTER, &rsb->res_flags);
4bf12011 4814+ rsb->res_nodeid = reply->rl_nodeid;
10d56c87 4815+ }
4bf12011 4816+
10d56c87
AM
4817+ log_debug(ls, "lookup reply %x %u", lkb->lkb_id,
4818+ rsb->res_nodeid);
4bf12011 4819+
10d56c87
AM
4820+ lkb->lkb_nodeid = rsb->res_nodeid;
4821+ dlm_lock_stage2(ls, lkb, rsb, lkb->lkb_lockqueue_flags);
4bf12011 4822+ break;
4823+
4824+ case GDLM_LQSTATE_WAIT_CONVERT:
4825+ case GDLM_LQSTATE_WAIT_CONDGRANT:
4826+
4827+ /*
4828+ * After a remote lock/conversion/grant request we put the lock
4829+ * on the right queue and send an AST if appropriate. Any lock
4830+ * shuffling (eg newly granted locks because this one was
4831+ * converted downwards) will be dealt with in seperate messages
4832+ * (which may be in the same network message)
4833+ */
4834+
10d56c87
AM
4835+
4836+ /* the destination wasn't the master */
4837+ if (reply->rl_status == -EINVAL) {
4838+ int master_nodeid;
4839+
4840+ log_debug(ls, "resend lookup");
4841+ lkb_dequeue(lkb);
4842+ rsb->res_nodeid = -1;
4843+ lkb->lkb_nodeid = -1;
4844+ if (get_directory_nodeid(rsb) != our_nodeid())
4845+ remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB);
4846+ else {
4847+ dlm_dir_lookup(ls, our_nodeid(), rsb->res_name,
4848+ rsb->res_length, &master_nodeid);
4849+
4850+ if (master_nodeid == our_nodeid()) {
4851+ set_bit(RESFL_MASTER, &rsb->res_flags);
4852+ master_nodeid = 0;
4853+ }
4854+ else
4855+ clear_bit(RESFL_MASTER,&rsb->res_flags);
4856+ rsb->res_nodeid = master_nodeid;
4857+ lkb->lkb_nodeid = master_nodeid;
4858+ dlm_lock_stage2(ls, lkb, rsb,
4859+ lkb->lkb_lockqueue_flags);
4860+ }
4861+ break;
4862+ }
4863+
4bf12011 4864+ if (!lkb->lkb_remid)
4865+ lkb->lkb_remid = reply->rl_lkid;
4866+
4867+ /*
4868+ * The remote request failed (we assume because of NOQUEUE).
4869+ * If this is a new request (non-conv) the lkb was created just
4870+ * for it so the lkb should be freed. If this was a
4871+ * conversion, the lkb already existed so we should put it back
4872+ * on the grant queue.
4873+ */
4874+
4875+ if (reply->rl_status != 0) {
10d56c87 4876+ DLM_ASSERT(reply->rl_status == -EAGAIN,);
4bf12011 4877+
4878+ if (state == GDLM_LQSTATE_WAIT_CONDGRANT) {
4879+ res_lkb_dequeue(lkb);
5cdbd17b
AM
4880+ lkb->lkb_retstatus = reply->rl_status;
4881+ queue_ast(lkb, AST_COMP | AST_DEL, 0);
4882+ } else {
4bf12011 4883+ res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
5cdbd17b
AM
4884+ lkb->lkb_retstatus = reply->rl_status;
4885+ queue_ast(lkb, AST_COMP, 0);
4886+ }
4bf12011 4887+ break;
4888+ }
4889+
4890+ /*
4891+ * The remote request was successful in granting the request or
4892+ * queuing it to be granted later. Add the lkb to the
4893+ * appropriate rsb queue.
4894+ */
4895+
4896+ switch (reply->rl_lockstate) {
4897+ case GDLM_LKSTS_GRANTED:
4898+
4899+ /* Compact version of grant_lock(). */
4900+
4901+ down_write(&rsb->res_lock);
4902+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK)
4903+ memcpy(lkb->lkb_lvbptr, reply->rl_lvb,
4904+ DLM_LVB_LEN);
4905+
4906+ lkb->lkb_grmode = lkb->lkb_rqmode;
4907+ lkb->lkb_rqmode = DLM_LOCK_IV;
4908+ lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
4909+
4910+ if (lkb->lkb_range) {
4911+ lkb->lkb_range[GR_RANGE_START] =
4912+ lkb->lkb_range[RQ_RANGE_START];
4913+ lkb->lkb_range[GR_RANGE_END] =
4914+ lkb->lkb_range[RQ_RANGE_END];
4915+ }
4916+ up_write(&rsb->res_lock);
4917+
4918+ lkb->lkb_retstatus = 0;
5cdbd17b 4919+ queue_ast(lkb, AST_COMP, 0);
4bf12011 4920+ break;
4921+
4922+ case GDLM_LKSTS_WAITING:
4923+
4924+ if (lkb->lkb_status != GDLM_LKSTS_GRANTED)
4925+ res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_WAITING);
4926+ else
4927+ log_error(ls, "wait reply for granted %x %u",
4928+ lkb->lkb_id, lkb->lkb_nodeid);
4929+ break;
4930+
4931+ case GDLM_LKSTS_CONVERT:
4932+
4933+ if (lkb->lkb_status != GDLM_LKSTS_GRANTED)
4934+ res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT);
4935+ else
4936+ log_error(ls, "convert reply for granted %x %u",
4937+ lkb->lkb_id, lkb->lkb_nodeid);
4938+ break;
4939+
4940+ default:
4941+ log_error(ls, "process_lockqueue_reply state %d",
4942+ reply->rl_lockstate);
4943+ }
4944+
4945+ break;
4946+
4947+ case GDLM_LQSTATE_WAIT_UNLOCK:
4948+
4949+ /*
4950+ * Unlocks should never fail. Update local lock info. This
4951+ * always sends completion AST with status in lksb
4952+ */
4953+
10d56c87 4954+ DLM_ASSERT(reply->rl_status == 0,);
4bf12011 4955+ oldstate = res_lkb_dequeue(lkb);
4956+
4957+ /* Differentiate between unlocks and conversion cancellations */
4958+ if (lkb->lkb_lockqueue_flags & DLM_LKF_CANCEL &&
4959+ oldstate == GDLM_LKSTS_CONVERT) {
4960+ res_lkb_enqueue(lkb->lkb_resource, lkb,
4961+ GDLM_LKSTS_GRANTED);
4962+ lkb->lkb_retstatus = -DLM_ECANCEL;
5cdbd17b 4963+ queue_ast(lkb, AST_COMP, 0);
4bf12011 4964+ } else {
4bf12011 4965+ lkb->lkb_retstatus = -DLM_EUNLOCK;
5cdbd17b 4966+ queue_ast(lkb, AST_COMP | AST_DEL, 0);
4bf12011 4967+ }
4bf12011 4968+ break;
4969+
4970+ default:
4971+ log_error(ls, "process_lockqueue_reply id %x state %d",
4972+ lkb->lkb_id, state);
4973+ }
4974+}
4975+
4976+/*
4977+ * Tell a remote node to grant a lock. This happens when we are the master
4978+ * copy for a lock that is actually held on a remote node. The remote end is
4979+ * also responsible for sending the completion AST.
4980+ */
4981+
10d56c87 4982+void remote_grant(struct dlm_lkb *lkb)
4bf12011 4983+{
4984+ struct writequeue_entry *e;
10d56c87 4985+ struct dlm_request *req;
4bf12011 4986+
4987+ // TODO Error handling
4988+ e = lowcomms_get_buffer(lkb->lkb_nodeid,
10d56c87 4989+ sizeof(struct dlm_request),
4bf12011 4990+ lkb->lkb_resource->res_ls->ls_allocation,
4991+ (char **) &req);
4992+ if (!e)
4993+ return;
4994+
4995+ req->rr_header.rh_cmd = GDLM_REMCMD_LOCKGRANT;
10d56c87 4996+ req->rr_header.rh_length = sizeof(struct dlm_request);
4bf12011 4997+ req->rr_header.rh_flags = 0;
4998+ req->rr_header.rh_lkid = lkb->lkb_id;
4999+ req->rr_header.rh_lockspace = lkb->lkb_resource->res_ls->ls_global_id;
5000+ req->rr_remlkid = lkb->lkb_remid;
5001+ req->rr_flags = 0;
5002+
5003+ if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) {
5004+ /* This is a confusing non-standard use of rr_flags which is
5005+ * usually used to pass lockqueue_flags. */
5006+ req->rr_flags |= GDLM_LKFLG_DEMOTED;
5007+ }
5008+
5009+ add_request_lvb(lkb, req);
5010+ midcomms_send_buffer(&req->rr_header, e);
5011+}
5012+
10d56c87 5013+void reply_and_grant(struct dlm_lkb *lkb)
4bf12011 5014+{
10d56c87
AM
5015+ struct dlm_request *req = lkb->lkb_request;
5016+ struct dlm_reply *reply;
4bf12011 5017+ struct writequeue_entry *e;
5018+
5019+ // TODO Error handling
5020+ e = lowcomms_get_buffer(lkb->lkb_nodeid,
10d56c87 5021+ sizeof(struct dlm_reply),
4bf12011 5022+ lkb->lkb_resource->res_ls->ls_allocation,
5023+ (char **) &reply);
5024+ if (!e)
5025+ return;
5026+
5027+ reply->rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY;
5028+ reply->rl_header.rh_flags = 0;
10d56c87 5029+ reply->rl_header.rh_length = sizeof(struct dlm_reply);
4bf12011 5030+ reply->rl_header.rh_lkid = req->rr_header.rh_lkid;
5031+ reply->rl_header.rh_lockspace = req->rr_header.rh_lockspace;
5032+
5033+ reply->rl_status = lkb->lkb_retstatus;
5034+ reply->rl_lockstate = lkb->lkb_status;
5035+ reply->rl_lkid = lkb->lkb_id;
5036+
10d56c87 5037+ DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_DEMOTED),);
4bf12011 5038+
5039+ lkb->lkb_request = NULL;
5040+
5041+ add_reply_lvb(lkb, reply);
5042+ midcomms_send_buffer(&reply->rl_header, e);
5043+}
5044+
5045+/*
5046+ * Request removal of a dead entry in the resource directory
5047+ */
5048+
10d56c87
AM
5049+void remote_remove_resdata(struct dlm_ls *ls, int nodeid, char *name,
5050+ int namelen)
4bf12011 5051+{
5052+ struct writequeue_entry *e;
10d56c87 5053+ struct dlm_request *req;
4bf12011 5054+
5055+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) {
10d56c87 5056+ struct dlm_rcom *rc = allocate_rcom_buffer(ls);
4bf12011 5057+
5058+ memcpy(rc->rc_buf, name, namelen);
5059+ rc->rc_datalen = namelen;
5060+
5061+ rcom_send_message(ls, nodeid, RECCOMM_REMRESDATA, rc, 0);
5062+
5063+ free_rcom_buffer(rc);
5064+ return;
5065+ }
5066+ // TODO Error handling
5067+ e = lowcomms_get_buffer(nodeid,
10d56c87 5068+ sizeof(struct dlm_request) + namelen - 1,
4bf12011 5069+ ls->ls_allocation, (char **) &req);
5070+ if (!e)
5071+ return;
5072+
10d56c87 5073+ memset(req, 0, sizeof(struct dlm_request) + namelen - 1);
4bf12011 5074+ req->rr_header.rh_cmd = GDLM_REMCMD_REM_RESDATA;
5075+ req->rr_header.rh_length =
10d56c87 5076+ sizeof(struct dlm_request) + namelen - 1;
4bf12011 5077+ req->rr_header.rh_flags = 0;
5078+ req->rr_header.rh_lkid = 0;
5079+ req->rr_header.rh_lockspace = ls->ls_global_id;
5080+ req->rr_remlkid = 0;
4bf12011 5081+ memcpy(req->rr_name, name, namelen);
5082+
5083+ midcomms_send_buffer(&req->rr_header, e);
5084+}
5085+
5086+/*
5087+ * Send remote cluster request to directory or master node before the request
5088+ * is put on the lock queue. Runs in the context of the locking caller.
5089+ */
5090+
10d56c87 5091+int send_cluster_request(struct dlm_lkb *lkb, int state)
4bf12011 5092+{
5093+ uint32_t target_nodeid;
10d56c87
AM
5094+ struct dlm_rsb *rsb = lkb->lkb_resource;
5095+ struct dlm_ls *ls = rsb->res_ls;
5096+ struct dlm_request *req;
4bf12011 5097+ struct writequeue_entry *e;
5098+
4bf12011 5099+ if (state == GDLM_LQSTATE_WAIT_RSB)
5100+ target_nodeid = get_directory_nodeid(rsb);
10d56c87
AM
5101+ else
5102+ target_nodeid = lkb->lkb_nodeid;
5103+
5104+ /* during recovery it's valid for target_nodeid to equal our own;
5105+ resend_cluster_requests does this to get requests back on track */
4bf12011 5106+
10d56c87
AM
5107+ DLM_ASSERT(target_nodeid && target_nodeid != -1,
5108+ print_lkb(lkb);
5109+ print_rsb(rsb);
5110+ printk("target_nodeid %u\n", target_nodeid););
4bf12011 5111+
5112+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) {
5113+ /* this may happen when called by resend_cluster_request */
5114+ log_error(ls, "send_cluster_request to %u state %d recovery",
5115+ target_nodeid, state);
5116+ }
5117+
5118+ e = lowcomms_get_buffer(target_nodeid,
10d56c87 5119+ sizeof(struct dlm_request) +
4bf12011 5120+ rsb->res_length - 1, ls->ls_allocation,
5121+ (char **) &req);
5122+ if (!e)
5123+ return -ENOBUFS;
10d56c87 5124+ memset(req, 0, sizeof(struct dlm_request) + rsb->res_length - 1);
4bf12011 5125+
5126+ /* Common stuff, some are just defaults */
5127+
5128+ if (lkb->lkb_bastaddr)
5cdbd17b 5129+ req->rr_asts = AST_BAST;
4bf12011 5130+ if (lkb->lkb_astaddr)
5cdbd17b 5131+ req->rr_asts |= AST_COMP;
4bf12011 5132+ if (lkb->lkb_parent)
5133+ req->rr_remparid = lkb->lkb_parent->lkb_remid;
5134+
5135+ req->rr_flags = lkb->lkb_lockqueue_flags;
5136+ req->rr_rqmode = lkb->lkb_rqmode;
5137+ req->rr_remlkid = lkb->lkb_remid;
5138+ req->rr_header.rh_length =
10d56c87 5139+ sizeof(struct dlm_request) + rsb->res_length - 1;
4bf12011 5140+ req->rr_header.rh_flags = 0;
5141+ req->rr_header.rh_lkid = lkb->lkb_id;
5142+ req->rr_header.rh_lockspace = ls->ls_global_id;
5143+
5144+ switch (state) {
5145+
5146+ case GDLM_LQSTATE_WAIT_RSB:
5147+
10d56c87
AM
5148+ DLM_ASSERT(!lkb->lkb_parent,
5149+ print_lkb(lkb);
5150+ print_rsb(rsb););
5151+
5152+ DLM_ASSERT(rsb->res_nodeid == -1,
5153+ print_lkb(lkb);
5154+ print_rsb(rsb););
5155+
5156+ log_debug(ls, "send lu %x to %u", lkb->lkb_id, target_nodeid);
4bf12011 5157+
5158+ req->rr_header.rh_cmd = GDLM_REMCMD_LOOKUP;
5159+ memcpy(req->rr_name, rsb->res_name, rsb->res_length);
5160+ break;
5161+
5162+ case GDLM_LQSTATE_WAIT_CONVERT:
5163+
10d56c87
AM
5164+ DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid,
5165+ print_lkb(lkb);
5166+ print_rsb(rsb););
5167+
5168+ log_debug(ls, "send cv %x to %u", lkb->lkb_id, target_nodeid);
5169+
4bf12011 5170+ req->rr_header.rh_cmd = GDLM_REMCMD_CONVREQUEST;
5171+ if (lkb->lkb_range) {
5172+ req->rr_flags |= GDLM_LKFLG_RANGE;
5173+ req->rr_range_start = lkb->lkb_range[RQ_RANGE_START];
5174+ req->rr_range_end = lkb->lkb_range[RQ_RANGE_END];
5175+ }
5176+ break;
5177+
5178+ case GDLM_LQSTATE_WAIT_CONDGRANT:
5179+
10d56c87
AM
5180+ DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid,
5181+ print_lkb(lkb);
5182+ print_rsb(rsb););
5183+
5184+ log_debug(ls, "send rq %x to %u", lkb->lkb_id, target_nodeid);
5185+
4bf12011 5186+ req->rr_header.rh_cmd = GDLM_REMCMD_LOCKREQUEST;
4bf12011 5187+ memcpy(req->rr_name, rsb->res_name, rsb->res_length);
5188+ if (lkb->lkb_range) {
5189+ req->rr_flags |= GDLM_LKFLG_RANGE;
5190+ req->rr_range_start = lkb->lkb_range[RQ_RANGE_START];
5191+ req->rr_range_end = lkb->lkb_range[RQ_RANGE_END];
5192+ }
5193+ break;
5194+
5195+ case GDLM_LQSTATE_WAIT_UNLOCK:
5196+
10d56c87
AM
5197+ log_debug(ls, "send un %x to %u", lkb->lkb_id, target_nodeid);
5198+
5199+ if (rsb->res_nodeid != -1)
5200+ log_all(ls, "un %x to %u rsb nodeid %u", lkb->lkb_id,
5201+ target_nodeid, rsb->res_nodeid);
5202+
4bf12011 5203+ req->rr_header.rh_cmd = GDLM_REMCMD_UNLOCKREQUEST;
5204+ break;
5205+
5206+ default:
10d56c87 5207+ DLM_ASSERT(0, printk("Unknown cluster request\n"););
4bf12011 5208+ }
5209+
5210+ add_request_lvb(lkb, req);
5211+ midcomms_send_buffer(&req->rr_header, e);
5212+
5213+ return 0;
5214+}
5215+
5216+/*
5217+ * We got a request from another cluster node, process it and return an info
5218+ * structure with the lock state/LVB etc as required. Executes in the DLM's
5219+ * recvd thread.
5220+ */
5221+
10d56c87 5222+int process_cluster_request(int nodeid, struct dlm_header *req, int recovery)
4bf12011 5223+{
10d56c87
AM
5224+ struct dlm_ls *lspace;
5225+ struct dlm_lkb *lkb = NULL;
5226+ struct dlm_rsb *rsb;
4bf12011 5227+ int send_reply = 0, status = 0, namelen;
10d56c87
AM
5228+ struct dlm_request *freq = (struct dlm_request *) req;
5229+ struct dlm_reply *rp = (struct dlm_reply *) req;
5230+ struct dlm_reply reply;
4bf12011 5231+
5232+ lspace = find_lockspace_by_global_id(req->rh_lockspace);
5233+
5234+ if (!lspace) {
5235+ log_print("process_cluster_request invalid lockspace %x "
5236+ "from %d req %u", req->rh_lockspace, nodeid,
5237+ req->rh_cmd);
5238+ status = -EINVAL;
5239+ goto out;
5240+ }
5241+
5242+ /* wait for recoverd to drain requestqueue */
5243+ if (!recovery)
5244+ wait_requestqueue(lspace);
5245+
5246+ /*
5247+ * If we're in recovery then queue the request for later. Otherwise,
5248+ * we still need to get the "in_recovery" lock to make sure the
5249+ * recovery itself doesn't start until we are done.
5250+ */
5251+ retry:
5252+ if (!test_bit(LSFL_LS_RUN, &lspace->ls_flags)) {
5253+ if (test_bit(LSFL_REQUEST_WARN, &lspace->ls_flags))
5254+ log_error(lspace, "process_cluster_request warning %u",
5255+ nodeid);
5256+ add_to_requestqueue(lspace, nodeid, (char *) req,
5257+ req->rh_length);
10d56c87
AM
5258+ log_debug(lspace, "process_cluster_request queue %d from %u",
5259+ req->rh_cmd, nodeid);
4bf12011 5260+ status = -EINTR;
5261+ goto out;
5262+ }
5263+ if (!down_read_trylock(&lspace->ls_in_recovery)) {
5264+ schedule();
5265+ goto retry;
5266+ }
5267+
5268+
5269+ /*
5270+ * Process the request.
5271+ */
5272+
5273+ switch (req->rh_cmd) {
5274+
5275+ case GDLM_REMCMD_LOOKUP:
5276+ {
10d56c87 5277+ uint32_t dir_nodeid, r_nodeid;
4bf12011 5278+ int status;
4bf12011 5279+
5280+ namelen = freq->rr_header.rh_length - sizeof(*freq) + 1;
5281+
5282+ dir_nodeid = name_to_directory_nodeid(lspace,
5283+ freq->rr_name,
5284+ namelen);
5285+ if (dir_nodeid != our_nodeid())
5286+ log_debug(lspace, "ignoring directory lookup");
5287+
10d56c87
AM
5288+ status = dlm_dir_lookup(lspace, nodeid, freq->rr_name,
5289+ namelen, &r_nodeid);
4bf12011 5290+ if (status)
5291+ status = -ENOMEM;
5292+
5293+ reply.rl_status = status;
5294+ reply.rl_lockstate = 0;
10d56c87 5295+ reply.rl_nodeid = r_nodeid;
4bf12011 5296+ }
5297+ send_reply = 1;
5298+ break;
5299+
5300+ case GDLM_REMCMD_REM_RESDATA:
5301+
5302+ namelen = freq->rr_header.rh_length - sizeof(*freq) + 1;
10d56c87 5303+ remove_resdata(lspace, nodeid, freq->rr_name, namelen);
4bf12011 5304+ break;
5305+
5306+ case GDLM_REMCMD_LOCKREQUEST:
5307+
5308+ lkb = remote_stage2(nodeid, lspace, freq);
5309+ if (lkb) {
5310+ lkb->lkb_request = freq;
10d56c87
AM
5311+ if (lkb->lkb_retstatus != -EINVAL)
5312+ dlm_lock_stage3(lkb);
4bf12011 5313+
5314+ /*
5315+ * If the request was granted in lock_stage3, then a
5316+ * reply message was already sent in combination with
5317+ * the grant message and lkb_request is NULL.
5318+ */
5319+
5320+ if (lkb->lkb_request) {
5321+ lkb->lkb_request = NULL;
5322+ send_reply = 1;
5323+ reply.rl_status = lkb->lkb_retstatus;
5324+ reply.rl_lockstate = lkb->lkb_status;
5325+ reply.rl_lkid = lkb->lkb_id;
5326+
5327+ /*
5328+ * If the request could not be granted and the
5329+ * user won't wait, then free up the LKB
5330+ */
5331+
5cdbd17b 5332+ if (lkb->lkb_retstatus == -EAGAIN) {
4bf12011 5333+ rsb = lkb->lkb_resource;
5334+ release_lkb(lspace, lkb);
5335+ release_rsb(rsb);
5336+ lkb = NULL;
5337+ }
10d56c87
AM
5338+ else if (lkb->lkb_retstatus == -EINVAL) {
5339+ release_lkb(lspace, lkb);
5340+ lkb = NULL;
5341+ }
4bf12011 5342+ }
5343+ } else {
5344+ reply.rl_status = -ENOMEM;
5345+ send_reply = 1;
5346+ }
5347+ break;
5348+
5349+ case GDLM_REMCMD_CONVREQUEST:
5350+
5351+ lkb = find_lock_by_id(lspace, freq->rr_remlkid);
5352+
10d56c87
AM
5353+ DLM_ASSERT(lkb,
5354+ print_request(freq);
5355+ printk("nodeid %u\n", nodeid););
4bf12011 5356+
10d56c87
AM
5357+ rsb = lkb->lkb_resource;
5358+
5359+ DLM_ASSERT(rsb,
5360+ print_lkb(lkb);
5361+ print_request(freq);
5362+ printk("nodeid %u\n", nodeid););
5363+
5364+ DLM_ASSERT(!rsb->res_nodeid,
5365+ print_lkb(lkb);
5366+ print_rsb(rsb);
5367+ print_request(freq);
5368+ printk("nodeid %u\n", nodeid););
5369+
5370+ DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY,
5371+ print_lkb(lkb);
5372+ print_rsb(rsb);
5373+ print_request(freq);
5374+ printk("nodeid %u\n", nodeid););
5375+
5376+ DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_GRANTED,
5377+ print_lkb(lkb);
5378+ print_rsb(rsb);
5379+ print_request(freq);
5380+ printk("nodeid %u\n", nodeid););
4bf12011 5381+
5382+ lkb->lkb_rqmode = freq->rr_rqmode;
5383+ lkb->lkb_lockqueue_flags = freq->rr_flags;
5384+ lkb->lkb_request = freq;
5385+ lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED;
5386+
10d56c87
AM
5387+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK ||
5388+ freq->rr_flags & DLM_LKF_VALBLK) {
4bf12011 5389+ lkb->lkb_flags |= GDLM_LKFLG_VALBLK;
5390+ allocate_and_copy_lvb(lspace, &lkb->lkb_lvbptr,
5391+ freq->rr_lvb);
5392+ }
5393+
5394+ if (freq->rr_flags & GDLM_LKFLG_RANGE) {
5395+ if (lkb_set_range(lspace, lkb, freq->rr_range_start,
5396+ freq->rr_range_end)) {
5397+ reply.rl_status = -ENOMEM;
5398+ send_reply = 1;
5399+ goto out;
5400+ }
5401+ }
5402+
10d56c87
AM
5403+ log_debug(lspace, "cv %u from %u %x \"%s\"", lkb->lkb_rqmode,
5404+ nodeid, lkb->lkb_id, rsb->res_name);
5405+
4bf12011 5406+ dlm_convert_stage2(lkb, FALSE);
5407+
5408+ /*
5409+ * If the conv request was granted in stage2, then a reply
5410+ * message was already sent in combination with the grant
5411+ * message.
5412+ */
5413+
5414+ if (lkb->lkb_request) {
5415+ lkb->lkb_request = NULL;
5416+ send_reply = 1;
5417+ reply.rl_status = lkb->lkb_retstatus;
5418+ reply.rl_lockstate = lkb->lkb_status;
5419+ reply.rl_lkid = lkb->lkb_id;
5420+ }
5421+ break;
5422+
5423+ case GDLM_REMCMD_LOCKREPLY:
5424+
10d56c87
AM
5425+ lkb = find_lock_by_id(lspace, req->rh_lkid);
5426+
5427+ DLM_ASSERT(lkb,
5428+ print_reply(rp);
5429+ printk("nodeid %u\n", nodeid););
4bf12011 5430+
10d56c87
AM
5431+ DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY),
5432+ print_lkb(lkb);
5433+ print_reply(rp);
5434+ printk("nodeid %u\n", nodeid););
4bf12011 5435+
10d56c87 5436+ process_lockqueue_reply(lkb, rp, nodeid);
4bf12011 5437+ break;
5438+
5439+ case GDLM_REMCMD_LOCKGRANT:
5440+
5441+ /*
5442+ * Remote lock has been granted asynchronously. Do a compact
5443+ * version of what grant_lock() does.
5444+ */
5445+
5446+ lkb = find_lock_by_id(lspace, freq->rr_remlkid);
5447+
10d56c87
AM
5448+ DLM_ASSERT(lkb,
5449+ print_request(freq);
5450+ printk("nodeid %u\n", nodeid););
4bf12011 5451+
5452+ rsb = lkb->lkb_resource;
5453+
10d56c87
AM
5454+ DLM_ASSERT(rsb,
5455+ print_lkb(lkb);
5456+ print_request(freq);
5457+ printk("nodeid %u\n", nodeid););
5458+
5459+ DLM_ASSERT(rsb->res_nodeid,
5460+ print_lkb(lkb);
5461+ print_rsb(rsb);
5462+ print_request(freq);
5463+ printk("nodeid %u\n", nodeid););
5464+
5465+ DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY),
5466+ print_lkb(lkb);
5467+ print_rsb(rsb);
5468+ print_request(freq);
5469+ printk("nodeid %u\n", nodeid););
5470+
5471+ if (lkb->lkb_lockqueue_state) {
5472+ log_error(rsb->res_ls, "granting lock on lockqueue");
5473+ print_lkb(lkb);
5474+ }
4bf12011 5475+
5476+ down_write(&rsb->res_lock);
5477+
5478+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK)
5479+ memcpy(lkb->lkb_lvbptr, freq->rr_lvb, DLM_LVB_LEN);
5480+
5481+ lkb->lkb_grmode = lkb->lkb_rqmode;
5482+ lkb->lkb_rqmode = DLM_LOCK_IV;
5483+
5484+ if (lkb->lkb_range) {
5485+ lkb->lkb_range[GR_RANGE_START] =
5486+ lkb->lkb_range[RQ_RANGE_START];
5487+ lkb->lkb_range[GR_RANGE_END] =
5488+ lkb->lkb_range[RQ_RANGE_END];
5489+ }
5490+
5491+ lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED);
5492+ up_write(&rsb->res_lock);
5493+
5494+ if (freq->rr_flags & GDLM_LKFLG_DEMOTED)
5495+ lkb->lkb_flags |= GDLM_LKFLG_DEMOTED;
5496+
5497+ lkb->lkb_retstatus = 0;
5cdbd17b 5498+ queue_ast(lkb, AST_COMP, 0);
4bf12011 5499+ break;
5500+
5501+ case GDLM_REMCMD_SENDBAST:
5502+
5503+ lkb = find_lock_by_id(lspace, freq->rr_remlkid);
5504+
10d56c87
AM
5505+ DLM_ASSERT(lkb,
5506+ print_request(freq);
5507+ printk("nodeid %u\n", nodeid););
4bf12011 5508+
5509+ if (lkb->lkb_status == GDLM_LKSTS_GRANTED)
5cdbd17b 5510+ queue_ast(lkb, AST_BAST, freq->rr_rqmode);
4bf12011 5511+ break;
5512+
5513+ case GDLM_REMCMD_SENDCAST:
5514+
5515+ /* This is only used for some error completion ASTs */
5516+
5517+ lkb = find_lock_by_id(lspace, freq->rr_remlkid);
5518+
10d56c87
AM
5519+ DLM_ASSERT(lkb,
5520+ print_request(freq);
5521+ printk("nodeid %u\n", nodeid););
4bf12011 5522+
5523+ /* Return the lock to granted status */
5524+ res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED);
4bf12011 5525+ lkb->lkb_retstatus = freq->rr_status;
5cdbd17b 5526+ queue_ast(lkb, AST_COMP, 0);
4bf12011 5527+ break;
5528+
5529+ case GDLM_REMCMD_UNLOCKREQUEST:
5530+
5531+ lkb = find_lock_by_id(lspace, freq->rr_remlkid);
5532+
10d56c87
AM
5533+ DLM_ASSERT(lkb,
5534+ print_request(freq);
5535+ printk("nodeid %u\n", nodeid););
5536+
5537+ DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY,
5538+ print_lkb(lkb);
5539+ print_request(freq);
5540+ printk("nodeid %u\n", nodeid););
5541+
5542+ rsb = find_rsb_to_unlock(lspace, lkb);
5543+
5544+ log_debug(lspace, "un from %u %x \"%s\"", nodeid, lkb->lkb_id,
5545+ rsb->res_name);
4bf12011 5546+
10d56c87 5547+ reply.rl_status = dlm_unlock_stage2(lkb, rsb, freq->rr_flags);
4bf12011 5548+ send_reply = 1;
5549+ break;
5550+
5551+ case GDLM_REMCMD_QUERY:
5552+ remote_query(nodeid, lspace, req);
5553+ break;
5554+
5555+ case GDLM_REMCMD_QUERYREPLY:
5556+ remote_query_reply(nodeid, lspace, req);
5557+ break;
5558+
5559+ default:
5560+ log_error(lspace, "process_cluster_request cmd %d",req->rh_cmd);
5561+ }
5562+
5563+ up_read(&lspace->ls_in_recovery);
5564+
5565+ out:
5566+ if (send_reply) {
5567+ reply.rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY;
5568+ reply.rl_header.rh_flags = 0;
5569+ reply.rl_header.rh_length = sizeof(reply);
5570+ reply.rl_header.rh_lkid = freq->rr_header.rh_lkid;
5571+ reply.rl_header.rh_lockspace = freq->rr_header.rh_lockspace;
5572+
5573+ status = midcomms_send_message(nodeid, &reply.rl_header,
5574+ GFP_KERNEL);
5575+ }
5576+
5577+ wake_astd();
5578+
5579+ return status;
5580+}
5581+
10d56c87 5582+static void add_reply_lvb(struct dlm_lkb *lkb, struct dlm_reply *reply)
4bf12011 5583+{
5584+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK)
5585+ memcpy(reply->rl_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN);
5586+}
5587+
10d56c87 5588+static void add_request_lvb(struct dlm_lkb *lkb, struct dlm_request *req)
4bf12011 5589+{
5590+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK)
5591+ memcpy(req->rr_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN);
5592+}
5593diff -urN linux-orig/cluster/dlm/lockqueue.h linux-patched/cluster/dlm/lockqueue.h
5594--- linux-orig/cluster/dlm/lockqueue.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
5595+++ linux-patched/cluster/dlm/lockqueue.h 2004-07-13 18:57:22.000000000 +0800
5596@@ -0,0 +1,28 @@
4bf12011 5597+/******************************************************************************
5598+*******************************************************************************
5599+**
5600+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5601+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
5602+**
5603+** This copyrighted material is made available to anyone wishing to use,
5604+** modify, copy, or redistribute it subject to the terms and conditions
5605+** of the GNU General Public License v.2.
5606+**
5607+*******************************************************************************
5608+******************************************************************************/
5609+
5610+#ifndef __LOCKQUEUE_DOT_H__
5611+#define __LOCKQUEUE_DOT_H__
5612+
10d56c87
AM
5613+void remote_grant(struct dlm_lkb * lkb);
5614+void reply_and_grant(struct dlm_lkb * lkb);
5615+int remote_stage(struct dlm_lkb * lkb, int state);
5616+int process_cluster_request(int csid, struct dlm_header *req, int recovery);
5617+int send_cluster_request(struct dlm_lkb * lkb, int state);
5618+void purge_requestqueue(struct dlm_ls * ls);
5619+int process_requestqueue(struct dlm_ls * ls);
5620+int reply_in_requestqueue(struct dlm_ls * ls, int lkid);
5621+void remote_remove_resdata(struct dlm_ls * ls, int nodeid, char *name, int namelen);
5622+void allocate_and_copy_lvb(struct dlm_ls * ls, char **lvbptr, char *src);
4bf12011 5623+
5624+#endif /* __LOCKQUEUE_DOT_H__ */
5625diff -urN linux-orig/cluster/dlm/lockspace.c linux-patched/cluster/dlm/lockspace.c
5626--- linux-orig/cluster/dlm/lockspace.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
5627+++ linux-patched/cluster/dlm/lockspace.c 2004-07-13 18:57:22.000000000 +0800
5628@@ -0,0 +1,699 @@
4bf12011 5629+/******************************************************************************
5630+*******************************************************************************
5631+**
5632+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5633+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
5634+**
5635+** This copyrighted material is made available to anyone wishing to use,
5636+** modify, copy, or redistribute it subject to the terms and conditions
5637+** of the GNU General Public License v.2.
5638+**
5639+*******************************************************************************
5640+******************************************************************************/
5641+
5642+#include <linux/module.h>
5643+
5644+#include "dlm_internal.h"
5645+#include "recoverd.h"
5646+#include "ast.h"
5647+#include "lkb.h"
5648+#include "nodes.h"
5649+#include "dir.h"
5650+#include "lowcomms.h"
5651+#include "config.h"
5652+#include "memory.h"
5653+#include "lockspace.h"
5654+#include "device.h"
5655+
5656+#define GDST_NONE (0)
5657+#define GDST_RUNNING (1)
5658+
10d56c87
AM
5659+static int dlmstate;
5660+static int dlmcount;
5661+static struct semaphore dlmstate_lock;
4bf12011 5662+struct list_head lslist;
5663+spinlock_t lslist_lock;
5664+struct kcl_service_ops ls_ops;
5665+
5666+static int new_lockspace(char *name, int namelen, void **lockspace, int flags);
5667+
5668+
5669+void dlm_lockspace_init(void)
5670+{
10d56c87
AM
5671+ dlmstate = GDST_NONE;
5672+ dlmcount = 0;
5673+ init_MUTEX(&dlmstate_lock);
4bf12011 5674+ INIT_LIST_HEAD(&lslist);
5675+ spin_lock_init(&lslist_lock);
5676+}
5677+
10d56c87 5678+struct dlm_ls *find_lockspace_by_global_id(uint32_t id)
4bf12011 5679+{
10d56c87 5680+ struct dlm_ls *ls;
4bf12011 5681+
5682+ spin_lock(&lslist_lock);
5683+
5684+ list_for_each_entry(ls, &lslist, ls_list) {
5685+ if (ls->ls_global_id == id)
5686+ goto out;
5687+ }
5688+ ls = NULL;
5689+ out:
5690+ spin_unlock(&lslist_lock);
5691+ return ls;
5692+}
5693+
5694+/* TODO: make this more efficient */
10d56c87 5695+struct dlm_ls *find_lockspace_by_local_id(void *id)
4bf12011 5696+{
10d56c87 5697+ struct dlm_ls *ls;
4bf12011 5698+
5699+ spin_lock(&lslist_lock);
5700+
5701+ list_for_each_entry(ls, &lslist, ls_list) {
5702+ if (ls->ls_local_id == (uint32_t)(long)id)
5703+ goto out;
5704+ }
5705+ ls = NULL;
5706+ out:
5707+ spin_unlock(&lslist_lock);
5708+ return ls;
5709+}
5710+
10d56c87 5711+struct dlm_ls *find_lockspace_by_name(char *name, int namelen)
4bf12011 5712+{
10d56c87 5713+ struct dlm_ls *ls;
4bf12011 5714+
5715+ spin_lock(&lslist_lock);
5716+
5717+ list_for_each_entry(ls, &lslist, ls_list) {
5718+ if (ls->ls_namelen == namelen &&
5719+ memcmp(ls->ls_name, name, namelen) == 0)
5720+ goto out;
5721+ }
5722+ ls = NULL;
5723+ out:
5724+ spin_unlock(&lslist_lock);
5725+ return ls;
5726+}
5727+
5728+/*
5729+ * Called from dlm_init. These are the general threads which are not
10d56c87 5730+ * lockspace-specific and work for all dlm lockspaces.
4bf12011 5731+ */
5732+
5733+static int threads_start(void)
5734+{
5735+ int error;
5736+
5737+ /* Thread which interacts with cman for all ls's */
10d56c87 5738+ error = dlm_recoverd_start();
4bf12011 5739+ if (error) {
5740+ log_print("cannot start recovery thread %d", error);
5741+ goto fail;
5742+ }
5743+
5744+ /* Thread which process lock requests for all ls's */
5745+ error = astd_start();
5746+ if (error) {
5747+ log_print("cannot start ast thread %d", error);
5748+ goto recoverd_fail;
5749+ }
5750+
5751+ /* Thread for sending/receiving messages for all ls's */
5752+ error = lowcomms_start();
5753+ if (error) {
5754+ log_print("cannot start lowcomms %d", error);
5755+ goto astd_fail;
5756+ }
5757+
5758+ return 0;
5759+
5760+ astd_fail:
5761+ astd_stop();
5762+
5763+ recoverd_fail:
10d56c87 5764+ dlm_recoverd_stop();
4bf12011 5765+
5766+ fail:
5767+ return error;
5768+}
5769+
5770+static void threads_stop(void)
5771+{
5772+ lowcomms_stop();
5773+ astd_stop();
10d56c87 5774+ dlm_recoverd_stop();
4bf12011 5775+}
5776+
5777+static int init_internal(void)
5778+{
5779+ int error = 0;
5780+
10d56c87
AM
5781+ if (dlmstate == GDST_RUNNING)
5782+ dlmcount++;
4bf12011 5783+ else {
5784+ error = threads_start();
5785+ if (error)
5786+ goto out;
5787+
10d56c87
AM
5788+ dlmstate = GDST_RUNNING;
5789+ dlmcount = 1;
4bf12011 5790+ }
5791+
5792+ out:
5793+ return error;
5794+}
5795+
5796+
5797+/*
10d56c87 5798+ * Called after dlm module is loaded and before any lockspaces are created.
4bf12011 5799+ * Starts and initializes global threads and structures. These global entities
5800+ * are shared by and independent of all lockspaces.
5801+ *
10d56c87 5802+ * There should be a dlm-specific user command which a person can run which
4bf12011 5803+ * calls this function. If a user hasn't run that command and something
5804+ * creates a new lockspace, this is called first.
5805+ *
5806+ * This also starts the default lockspace.
5807+ */
5808+
5809+int dlm_init(void)
5810+{
5811+ int error;
5812+
10d56c87 5813+ down(&dlmstate_lock);
4bf12011 5814+ error = init_internal();
10d56c87 5815+ up(&dlmstate_lock);
4bf12011 5816+
5817+ return error;
5818+}
5819+
5820+int dlm_release(void)
5821+{
5822+ int error = 0;
5823+
10d56c87 5824+ down(&dlmstate_lock);
4bf12011 5825+
10d56c87 5826+ if (dlmstate == GDST_NONE)
4bf12011 5827+ goto out;
5828+
10d56c87
AM
5829+ if (dlmcount)
5830+ dlmcount--;
4bf12011 5831+
10d56c87 5832+ if (dlmcount)
4bf12011 5833+ goto out;
5834+
5835+ spin_lock(&lslist_lock);
5836+ if (!list_empty(&lslist)) {
5837+ spin_unlock(&lslist_lock);
5838+ log_print("cannot stop threads, lockspaces still exist");
5839+ goto out;
5840+ }
5841+ spin_unlock(&lslist_lock);
5842+
5843+ threads_stop();
10d56c87 5844+ dlmstate = GDST_NONE;
4bf12011 5845+
5846+ out:
10d56c87 5847+ up(&dlmstate_lock);
4bf12011 5848+
5849+ return error;
5850+}
5851+
10d56c87 5852+struct dlm_ls *allocate_ls(int namelen)
4bf12011 5853+{
10d56c87 5854+ struct dlm_ls *ls;
4bf12011 5855+
5856+ /* FIXME: use appropriate malloc type */
5857+
10d56c87 5858+ ls = kmalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
4bf12011 5859+ if (ls)
10d56c87 5860+ memset(ls, 0, sizeof(struct dlm_ls) + namelen);
4bf12011 5861+
5862+ return ls;
5863+}
5864+
4bf12011 5865+static int new_lockspace(char *name, int namelen, void **lockspace, int flags)
5866+{
10d56c87
AM
5867+ struct dlm_ls *ls;
5868+ int i, size, error = -ENOMEM;
4bf12011 5869+ uint32_t local_id = 0;
5870+
5871+ if (!try_module_get(THIS_MODULE))
5872+ return -EINVAL;
5873+
5874+ if (namelen > MAX_SERVICE_NAME_LEN)
5875+ return -EINVAL;
5876+
5877+ if ((ls = find_lockspace_by_name(name, namelen))) {
10d56c87 5878+ *lockspace = (void *)(long)ls->ls_local_id;
4bf12011 5879+ return -EEXIST;
5880+ }
5881+
5882+ /*
5883+ * Initialize ls fields
5884+ */
5885+
5886+ ls = allocate_ls(namelen);
5887+ if (!ls)
5888+ goto out;
5889+
5890+ memcpy(ls->ls_name, name, namelen);
5891+ ls->ls_namelen = namelen;
5892+
5893+ ls->ls_allocation = GFP_KERNEL;
10d56c87 5894+ ls->ls_flags = 0;
4bf12011 5895+
10d56c87
AM
5896+ size = dlm_config.rsbtbl_size;
5897+ ls->ls_rsbtbl_size = size;
5898+
5899+ ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
5900+ if (!ls->ls_rsbtbl)
4bf12011 5901+ goto out_lsfree;
10d56c87
AM
5902+ for (i = 0; i < size; i++) {
5903+ INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list);
5904+ rwlock_init(&ls->ls_rsbtbl[i].lock);
5905+ }
4bf12011 5906+
10d56c87
AM
5907+ size = dlm_config.lkbtbl_size;
5908+ ls->ls_lkbtbl_size = size;
4bf12011 5909+
10d56c87
AM
5910+ ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
5911+ if (!ls->ls_lkbtbl)
5912+ goto out_rsbfree;
5913+ for (i = 0; i < size; i++) {
5914+ INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
5915+ rwlock_init(&ls->ls_lkbtbl[i].lock);
5916+ ls->ls_lkbtbl[i].counter = 1;
5917+ }
4bf12011 5918+
10d56c87
AM
5919+ size = dlm_config.dirtbl_size;
5920+ ls->ls_dirtbl_size = size;
5921+
5922+ ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
5923+ if (!ls->ls_dirtbl)
5924+ goto out_lkbfree;
5925+ for (i = 0; i < size; i++) {
5926+ INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
5927+ rwlock_init(&ls->ls_dirtbl[i].lock);
5928+ }
4bf12011 5929+
5930+ INIT_LIST_HEAD(&ls->ls_nodes);
4bf12011 5931+ INIT_LIST_HEAD(&ls->ls_nodes_gone);
10d56c87 5932+ ls->ls_num_nodes = 0;
4bf12011 5933+ INIT_LIST_HEAD(&ls->ls_recover);
5934+ spin_lock_init(&ls->ls_recover_lock);
5935+ INIT_LIST_HEAD(&ls->ls_recover_list);
5936+ ls->ls_recover_list_count = 0;
5937+ spin_lock_init(&ls->ls_recover_list_lock);
5938+ init_waitqueue_head(&ls->ls_wait_general);
10d56c87 5939+ INIT_LIST_HEAD(&ls->ls_rootres);
4bf12011 5940+ INIT_LIST_HEAD(&ls->ls_requestqueue);
5941+ INIT_LIST_HEAD(&ls->ls_rebuild_rootrsb_list);
5942+ ls->ls_last_stop = 0;
5943+ ls->ls_last_start = 0;
5944+ ls->ls_last_finish = 0;
5945+ ls->ls_rcom_msgid = 0;
5946+ init_MUTEX(&ls->ls_rcom_lock);
5947+ init_rwsem(&ls->ls_in_recovery);
5948+ init_rwsem(&ls->ls_unlock_sem);
5949+ init_rwsem(&ls->ls_rec_rsblist);
5950+ init_rwsem(&ls->ls_gap_rsblist);
5951+ down_write(&ls->ls_in_recovery);
5952+
4bf12011 5953+ if (flags & DLM_LSF_NOTIMERS)
5954+ set_bit(LSFL_NOTIMERS, &ls->ls_flags);
10d56c87
AM
5955+ if (flags & DLM_LSF_NOCONVGRANT)
5956+ set_bit(LSFL_NOCONVGRANT, &ls->ls_flags);
4bf12011 5957+
5958+ /*
5959+ * Connect this lockspace with the cluster manager
5960+ */
5961+
5962+ error = kcl_register_service(name, namelen, SERVICE_LEVEL_GDLM,
5963+ &ls_ops, TRUE, (void *) ls, &local_id);
5964+ if (error)
10d56c87 5965+ goto out_dirfree;
4bf12011 5966+
5967+ ls->ls_state = LSST_INIT;
5968+ ls->ls_local_id = local_id;
5969+
5970+ spin_lock(&lslist_lock);
5971+ list_add(&ls->ls_list, &lslist);
5972+ spin_unlock(&lslist_lock);
5973+
5974+ error = kcl_join_service(local_id);
5975+ if (error) {
5976+ log_error(ls, "service manager join error %d", error);
5977+ goto out_reg;
5978+ }
5979+
5980+ /* The ls isn't actually running until it receives a start() from CMAN.
10d56c87 5981+ Neither does it have a global ls id until started. */
4bf12011 5982+
5983+ /* Return the local ID as the lockspace handle. I've left this
5984+ cast to a void* as it allows us to replace it with pretty much
5985+ anything at a future date without breaking clients. But returning
5986+ the address of the lockspace is a bad idea as it could get
5987+ forcibly removed, leaving client with a dangling pointer */
10d56c87 5988+ *lockspace = (void *)(long)local_id;
4bf12011 5989+
5990+ return 0;
5991+
10d56c87 5992+ out_reg:
4bf12011 5993+ kcl_unregister_service(ls->ls_local_id);
10d56c87
AM
5994+ out_dirfree:
5995+ kfree(ls->ls_dirtbl);
5996+ out_lkbfree:
5997+ kfree(ls->ls_lkbtbl);
5998+ out_rsbfree:
5999+ kfree(ls->ls_rsbtbl);
6000+ out_lsfree:
6001+ kfree(ls);
6002+ out:
4bf12011 6003+ return error;
6004+}
6005+
6006+/*
6007+ * Called by a system like GFS which wants independent lock spaces.
6008+ */
6009+
6010+int dlm_new_lockspace(char *name, int namelen, void **lockspace, int flags)
6011+{
6012+ int error = -ENOSYS;
6013+
10d56c87 6014+ down(&dlmstate_lock);
4bf12011 6015+ error = init_internal();
6016+ if (error)
6017+ goto out;
6018+
6019+ error = new_lockspace(name, namelen, lockspace, flags);
10d56c87
AM
6020+ out:
6021+ up(&dlmstate_lock);
4bf12011 6022+ return error;
6023+}
6024+
6025+/* Return 1 if the lockspace still has active remote locks,
6026+ * 2 if the lockspace still has active local locks.
6027+ */
10d56c87
AM
6028+static int lockspace_busy(struct dlm_ls *ls)
6029+{
6030+ int i, lkb_found = 0;
6031+ struct dlm_lkb *lkb;
6032+
6033+ /* NOTE: We check the lockidtbl here rather than the resource table.
6034+ This is because there may be LKBs queued as ASTs that have been
6035+ unlinked from their RSBs and are pending deletion once the AST has
6036+ been delivered */
6037+
6038+ for (i = 0; i < ls->ls_lkbtbl_size; i++) {
6039+ read_lock(&ls->ls_lkbtbl[i].lock);
6040+ if (!list_empty(&ls->ls_lkbtbl[i].list)) {
6041+ lkb_found = 1;
6042+ list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
6043+ lkb_idtbl_list) {
6044+ if (!lkb->lkb_nodeid) {
6045+ read_unlock(&ls->ls_lkbtbl[i].lock);
6046+ return 2;
6047+ }
6048+ }
4bf12011 6049+ }
10d56c87 6050+ read_unlock(&ls->ls_lkbtbl[i].lock);
4bf12011 6051+ }
10d56c87 6052+ return lkb_found;
4bf12011 6053+}
6054+
10d56c87 6055+static int release_lockspace(struct dlm_ls *ls, int force)
4bf12011 6056+{
10d56c87
AM
6057+ struct dlm_lkb *lkb;
6058+ struct dlm_rsb *rsb;
6059+ struct dlm_recover *rv;
6060+ struct dlm_csb *csb;
4bf12011 6061+ struct list_head *head;
6062+ int i;
6063+ int busy = lockspace_busy(ls);
6064+
6065+ /* Don't destroy a busy lockspace */
6066+ if (busy > force)
6067+ return -EBUSY;
6068+
6069+ if (force < 3) {
6070+ kcl_leave_service(ls->ls_local_id);
6071+ kcl_unregister_service(ls->ls_local_id);
6072+ }
6073+
6074+ spin_lock(&lslist_lock);
6075+ list_del(&ls->ls_list);
6076+ spin_unlock(&lslist_lock);
6077+
6078+ /*
6079+ * Free resdata structs.
6080+ */
6081+
10d56c87
AM
6082+ dlm_dir_clear(ls);
6083+ kfree(ls->ls_dirtbl);
4bf12011 6084+
6085+ /*
10d56c87 6086+ * Free all lkb's on lkbtbl[] lists.
4bf12011 6087+ */
6088+
10d56c87
AM
6089+ for (i = 0; i < ls->ls_lkbtbl_size; i++) {
6090+ head = &ls->ls_lkbtbl[i].list;
4bf12011 6091+ while (!list_empty(head)) {
10d56c87
AM
6092+ lkb = list_entry(head->next, struct dlm_lkb,
6093+ lkb_idtbl_list);
4bf12011 6094+ list_del(&lkb->lkb_idtbl_list);
6095+
6096+ if (lkb->lkb_lockqueue_state)
6097+ remove_from_lockqueue(lkb);
6098+
5cdbd17b 6099+ if (lkb->lkb_astflags & (AST_COMP | AST_BAST))
4bf12011 6100+ list_del(&lkb->lkb_astqueue);
6101+
10d56c87 6102+ if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY)
4bf12011 6103+ free_lvb(lkb->lkb_lvbptr);
6104+
6105+ free_lkb(lkb);
6106+ }
6107+ }
6108+
10d56c87 6109+ kfree(ls->ls_lkbtbl);
4bf12011 6110+
6111+ /*
10d56c87 6112+ * Free all rsb's on rsbtbl[] lists
4bf12011 6113+ */
6114+
10d56c87
AM
6115+ for (i = 0; i < ls->ls_rsbtbl_size; i++) {
6116+ head = &ls->ls_rsbtbl[i].list;
4bf12011 6117+ while (!list_empty(head)) {
10d56c87
AM
6118+ rsb = list_entry(head->next, struct dlm_rsb,
6119+ res_hashchain);
4bf12011 6120+ list_del(&rsb->res_hashchain);
6121+
6122+ if (rsb->res_lvbptr)
6123+ free_lvb(rsb->res_lvbptr);
6124+
6125+ free_rsb(rsb);
6126+ }
6127+ }
6128+
10d56c87 6129+ kfree(ls->ls_rsbtbl);
4bf12011 6130+
6131+ /*
6132+ * Free structures on any other lists
6133+ */
6134+
6135+ head = &ls->ls_recover;
6136+ while (!list_empty(head)) {
10d56c87
AM
6137+ rv = list_entry(head->next, struct dlm_recover, list);
6138+ list_del(&rv->list);
6139+ kfree(rv);
4bf12011 6140+ }
6141+
6142+ head = &ls->ls_nodes;
6143+ while (!list_empty(head)) {
10d56c87
AM
6144+ csb = list_entry(head->next, struct dlm_csb, list);
6145+ list_del(&csb->list);
4bf12011 6146+ release_csb(csb);
6147+ }
6148+
6149+ head = &ls->ls_nodes_gone;
6150+ while (!list_empty(head)) {
10d56c87
AM
6151+ csb = list_entry(head->next, struct dlm_csb, list);
6152+ list_del(&csb->list);
4bf12011 6153+ release_csb(csb);
6154+ }
6155+
10d56c87 6156+ kfree(ls);
4bf12011 6157+
6158+ dlm_release();
6159+
6160+ module_put(THIS_MODULE);
6161+ return 0;
6162+}
6163+
6164+
6165+/*
6166+ * Called when a system has released all its locks and is not going to use the
6167+ * lockspace any longer. We blindly free everything we're managing for this
6168+ * lockspace. Remaining nodes will go through the recovery process as if we'd
6169+ * died. The lockspace must continue to function as usual, participating in
6170+ * recoveries, until kcl_leave_service returns.
6171+ *
6172+ * Force has 4 possible values:
6173+ * 0 - don't destroy locksapce if it has any LKBs
6174+ * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
6175+ * 2 - destroy lockspace regardless of LKBs
6176+ * 3 - destroy lockspace as part of a forced shutdown
6177+ */
6178+
6179+int dlm_release_lockspace(void *lockspace, int force)
6180+{
10d56c87 6181+ struct dlm_ls *ls;
4bf12011 6182+
6183+ ls = find_lockspace_by_local_id(lockspace);
6184+ if (!ls)
10d56c87 6185+ return -EINVAL;
4bf12011 6186+
6187+ return release_lockspace(ls, force);
6188+}
6189+
6190+
6191+/* Called when the cluster is being shut down dirtily */
6192+void dlm_emergency_shutdown()
6193+{
10d56c87
AM
6194+ struct dlm_ls *ls;
6195+ struct dlm_ls *tmp;
4bf12011 6196+
6197+ /* Shut lowcomms down to prevent any socket activity */
6198+ lowcomms_stop_accept();
6199+
6200+ /* Delete the devices that belong the the userland
6201+ lockspaces to be deleted. */
6202+ dlm_device_free_devices();
6203+
6204+ /* Now try to clean the lockspaces */
6205+ spin_lock(&lslist_lock);
6206+
6207+ list_for_each_entry_safe(ls, tmp, &lslist, ls_list) {
6208+ spin_unlock(&lslist_lock);
6209+ release_lockspace(ls, 3);
6210+ spin_lock(&lslist_lock);
6211+ }
6212+
6213+ spin_unlock(&lslist_lock);
6214+}
6215+
10d56c87 6216+struct dlm_recover *allocate_dlm_recover(void)
4bf12011 6217+{
10d56c87 6218+ struct dlm_recover *rv;
4bf12011 6219+
10d56c87
AM
6220+ rv = kmalloc(sizeof(struct dlm_recover), GFP_KERNEL);
6221+ if (rv)
6222+ memset(rv, 0, sizeof(struct dlm_recover));
6223+ return rv;
4bf12011 6224+}
6225+
6226+/*
6227+ * Called by CMAN on a specific ls. "stop" means set flag which while set
6228+ * causes all new requests to ls to be queued and not submitted until flag is
6229+ * cleared. stop on a ls also needs to cancel any prior starts on the ls.
6230+ * The recoverd thread carries out any work called for by this event.
6231+ */
6232+
6233+static int dlm_ls_stop(void *servicedata)
6234+{
10d56c87 6235+ struct dlm_ls *ls = (struct dlm_ls *) servicedata;
4bf12011 6236+ int new;
6237+
6238+ spin_lock(&ls->ls_recover_lock);
6239+ ls->ls_last_stop = ls->ls_last_start;
6240+ set_bit(LSFL_LS_STOP, &ls->ls_flags);
6241+ new = test_and_clear_bit(LSFL_LS_RUN, &ls->ls_flags);
6242+ spin_unlock(&ls->ls_recover_lock);
6243+
6244+ /*
6245+ * This in_recovery lock does two things:
6246+ *
6247+ * 1) Keeps this function from returning until all threads are out
6248+ * of locking routines and locking is truely stopped.
6249+ * 2) Keeps any new requests from being processed until it's unlocked
6250+ * when recovery is complete.
6251+ */
6252+
6253+ if (new)
6254+ down_write(&ls->ls_in_recovery);
6255+
6256+ clear_bit(LSFL_RESDIR_VALID, &ls->ls_flags);
6257+ clear_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags);
6258+ clear_bit(LSFL_NODES_VALID, &ls->ls_flags);
6259+ clear_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags);
6260+
10d56c87 6261+ dlm_recoverd_kick(ls);
4bf12011 6262+
6263+ return 0;
6264+}
6265+
6266+/*
6267+ * Called by CMAN on a specific ls. "start" means enable the lockspace to do
6268+ * request processing which first requires that the recovery procedure be
6269+ * stepped through with all nodes sharing the lockspace (nodeids). The first
6270+ * start on the ls after it's created is a special case and requires some extra
6271+ * work like figuring out our own local nodeid. We can't do all this in the
6272+ * calling CMAN context, so we must pass this work off to the recoverd thread
10d56c87 6273+ * which was created in dlm_init(). The recoverd thread carries out any work
4bf12011 6274+ * called for by this event.
6275+ */
6276+
6277+static int dlm_ls_start(void *servicedata, uint32_t *nodeids, int count,
6278+ int event_id, int type)
6279+{
10d56c87
AM
6280+ struct dlm_ls *ls = (struct dlm_ls *) servicedata;
6281+ struct dlm_recover *rv;
4bf12011 6282+ int error = -ENOMEM;
6283+
10d56c87
AM
6284+ rv = allocate_dlm_recover();
6285+ if (!rv)
4bf12011 6286+ goto out;
6287+
10d56c87
AM
6288+ rv->nodeids = nodeids;
6289+ rv->node_count = count;
6290+ rv->event_id = event_id;
4bf12011 6291+
6292+ spin_lock(&ls->ls_recover_lock);
6293+ ls->ls_last_start = event_id;
10d56c87 6294+ list_add_tail(&rv->list, &ls->ls_recover);
4bf12011 6295+ set_bit(LSFL_LS_START, &ls->ls_flags);
6296+ spin_unlock(&ls->ls_recover_lock);
6297+
10d56c87 6298+ dlm_recoverd_kick(ls);
4bf12011 6299+ error = 0;
6300+
6301+ out:
6302+ return error;
6303+}
6304+
6305+/*
6306+ * Called by CMAN on a specific ls. "finish" means that all nodes which
6307+ * received a "start" have completed the start and called kcl_start_done.
6308+ * The recoverd thread carries out any work called for by this event.
6309+ */
6310+
6311+static void dlm_ls_finish(void *servicedata, int event_id)
6312+{
10d56c87 6313+ struct dlm_ls *ls = (struct dlm_ls *) servicedata;
4bf12011 6314+
6315+ spin_lock(&ls->ls_recover_lock);
6316+ ls->ls_last_finish = event_id;
6317+ set_bit(LSFL_LS_FINISH, &ls->ls_flags);
6318+ spin_unlock(&ls->ls_recover_lock);
6319+
10d56c87 6320+ dlm_recoverd_kick(ls);
4bf12011 6321+}
6322+
6323+struct kcl_service_ops ls_ops = {
6324+ .stop = dlm_ls_stop,
6325+ .start = dlm_ls_start,
6326+ .finish = dlm_ls_finish
6327+};
6328diff -urN linux-orig/cluster/dlm/lockspace.h linux-patched/cluster/dlm/lockspace.h
6329--- linux-orig/cluster/dlm/lockspace.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
6330+++ linux-patched/cluster/dlm/lockspace.h 2004-07-13 18:57:22.000000000 +0800
6331@@ -0,0 +1,27 @@
4bf12011 6332+/******************************************************************************
6333+*******************************************************************************
6334+**
6335+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6336+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6337+**
6338+** This copyrighted material is made available to anyone wishing to use,
6339+** modify, copy, or redistribute it subject to the terms and conditions
6340+** of the GNU General Public License v.2.
6341+**
6342+*******************************************************************************
6343+******************************************************************************/
6344+
6345+#ifndef __LOCKSPACE_DOT_H__
6346+#define __LOCKSPACE_DOT_H__
6347+
6348+void dlm_lockspace_init(void);
6349+int dlm_init(void);
6350+int dlm_release(void);
6351+int dlm_new_lockspace(char *name, int namelen, void **ls, int flags);
6352+int dlm_release_lockspace(void *ls, int force);
10d56c87
AM
6353+struct dlm_ls *find_lockspace_by_global_id(uint32_t id);
6354+struct dlm_ls *find_lockspace_by_local_id(void *id);
6355+struct dlm_ls *find_lockspace_by_name(char *name, int namelen);
4bf12011 6356+void dlm_emergency_shutdown(void);
6357+
6358+#endif /* __LOCKSPACE_DOT_H__ */
6359diff -urN linux-orig/cluster/dlm/lowcomms.c linux-patched/cluster/dlm/lowcomms.c
6360--- linux-orig/cluster/dlm/lowcomms.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 6361+++ linux-patched/cluster/dlm/lowcomms.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 6362@@ -0,0 +1,1354 @@
6363+/******************************************************************************
6364+*******************************************************************************
6365+**
6366+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6367+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
6368+**
6369+** This copyrighted material is made available to anyone wishing to use,
6370+** modify, copy, or redistribute it subject to the terms and conditions
6371+** of the GNU General Public License v.2.
6372+**
6373+*******************************************************************************
6374+******************************************************************************/
6375+
6376+/*
6377+ * lowcomms.c
6378+ *
6379+ * This is the "low-level" comms layer.
6380+ *
6381+ * It is responsible for sending/receiving messages
6382+ * from other nodes in the cluster.
6383+ *
6384+ * Cluster nodes are referred to by their nodeids. nodeids are
6385+ * simply 32 bit numbers to the locking module - if they need to
6386+ * be expanded for the cluster infrastructure then that is it's
6387+ * responsibility. It is this layer's
6388+ * responsibility to resolve these into IP address or
6389+ * whatever it needs for inter-node communication.
6390+ *
6391+ * The comms level is two kernel threads that deal mainly with
6392+ * the receiving of messages from other nodes and passing them
6393+ * up to the mid-level comms layer (which understands the
6394+ * message format) for execution by the locking core, and
6395+ * a send thread which does all the setting up of connections
6396+ * to remote nodes and the sending of data. Threads are not allowed
6397+ * to send their own data because it may cause them to wait in times
6398+ * of high load. Also, this way, the sending thread can collect together
6399+ * messages bound for one node and send them in one block.
6400+ *
6401+ * I don't see any problem with the recv thread executing the locking
6402+ * code on behalf of remote processes as the locking code is
6403+ * short, efficient and never waits.
6404+ *
6405+ */
6406+
6407+
6408+#include <asm/ioctls.h>
6409+#include <net/sock.h>
6410+#include <net/tcp.h>
6411+#include <linux/pagemap.h>
6412+#include <cluster/cnxman.h>
6413+
6414+#include "dlm_internal.h"
6415+#include "lowcomms.h"
6416+#include "midcomms.h"
6417+#include "config.h"
6418+
6419+struct cbuf {
6420+ unsigned base;
6421+ unsigned len;
6422+ unsigned mask;
6423+};
6424+
6425+#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0)
6426+#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0)
6427+#define CBUF_EMPTY(cb) ((cb)->len == 0)
6428+#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1))
6429+#define CBUF_EAT(cb, n) do { (cb)->len -= (n); \
6430+ (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0)
6431+#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask)
6432+
6433+struct connection {
6434+ struct socket *sock; /* NULL if not connected */
6435+ uint32_t nodeid; /* So we know who we are in the list */
6436+ struct rw_semaphore sock_sem; /* Stop connect races */
6437+ struct list_head read_list; /* On this list when ready for reading */
6438+ struct list_head write_list; /* On this list when ready for writing */
6439+ struct list_head state_list; /* On this list when ready to connect */
6440+ unsigned long flags; /* bit 1,2 = We are on the read/write lists */
6441+#define CF_READ_PENDING 1
6442+#define CF_WRITE_PENDING 2
6443+#define CF_CONNECT_PENDING 3
6444+#define CF_IS_OTHERSOCK 4
6445+ struct list_head writequeue; /* List of outgoing writequeue_entries */
6446+ struct list_head listenlist; /* List of allocated listening sockets */
6447+ spinlock_t writequeue_lock;
6448+ int (*rx_action) (struct connection *); /* What to do when active */
6449+ struct page *rx_page;
6450+ struct cbuf cb;
6451+ int retries;
6452+#define MAX_CONNECT_RETRIES 3
6453+ struct connection *othersock;
6454+};
6455+#define sock2con(x) ((struct connection *)(x)->sk_user_data)
6456+#define nodeid2con(x) (&connections[(x)])
6457+
6458+/* An entry waiting to be sent */
6459+struct writequeue_entry {
6460+ struct list_head list;
6461+ struct page *page;
6462+ int offset;
6463+ int len;
6464+ int end;
6465+ int users;
6466+ struct connection *con;
6467+};
6468+
6469+/* "Template" structure for IPv4 and IPv6 used to fill
6470+ * in the missing bits when converting between cman (which knows
6471+ * nothing about sockaddr structs) and real life where we actually
6472+ * have to connect to these addresses. Also one of these structs
6473+ * will hold the cached "us" address.
6474+ *
6475+ * It's an in6 sockaddr just so there's enough space for anything
6476+ * we're likely to see here.
6477+ */
6478+static struct sockaddr_in6 local_addr;
6479+
6480+/* Manage daemons */
6481+static struct semaphore thread_lock;
6482+static struct completion thread_completion;
6483+static atomic_t send_run;
6484+static atomic_t recv_run;
6485+
6486+/* An array of connections, indexed by NODEID */
6487+static struct connection *connections;
6488+static int conn_array_size;
6489+static atomic_t writequeue_length;
6490+static atomic_t accepting;
6491+
6492+static wait_queue_t lowcomms_send_waitq_head;
6493+static wait_queue_head_t lowcomms_send_waitq;
6494+
6495+static wait_queue_t lowcomms_recv_waitq_head;
6496+static wait_queue_head_t lowcomms_recv_waitq;
6497+
6498+/* List of sockets that have reads pending */
6499+static struct list_head read_sockets;
6500+static spinlock_t read_sockets_lock;
6501+
6502+/* List of sockets which have writes pending */
6503+static struct list_head write_sockets;
6504+static spinlock_t write_sockets_lock;
6505+
6506+/* List of sockets which have connects pending */
6507+static struct list_head state_sockets;
6508+static spinlock_t state_sockets_lock;
6509+
6510+/* List of allocated listen sockets */
6511+static struct list_head listen_sockets;
6512+
6513+static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr);
6514+static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len);
6515+
6516+
6517+/* Data available on socket or listen socket received a connect */
6518+static void lowcomms_data_ready(struct sock *sk, int count_unused)
6519+{
6520+ struct connection *con = sock2con(sk);
6521+
6522+ if (test_and_set_bit(CF_READ_PENDING, &con->flags))
6523+ return;
6524+
6525+ spin_lock_bh(&read_sockets_lock);
6526+ list_add_tail(&con->read_list, &read_sockets);
6527+ spin_unlock_bh(&read_sockets_lock);
6528+
6529+ wake_up_interruptible(&lowcomms_recv_waitq);
6530+}
6531+
6532+static void lowcomms_write_space(struct sock *sk)
6533+{
6534+ struct connection *con = sock2con(sk);
6535+
6536+ if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
6537+ return;
6538+
6539+ spin_lock_bh(&write_sockets_lock);
6540+ list_add_tail(&con->write_list, &write_sockets);
6541+ spin_unlock_bh(&write_sockets_lock);
6542+
6543+ wake_up_interruptible(&lowcomms_send_waitq);
6544+}
6545+
6546+static inline void lowcomms_connect_sock(struct connection *con)
6547+{
6548+ if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
6549+ return;
6550+ if (!atomic_read(&accepting))
6551+ return;
6552+
6553+ spin_lock_bh(&state_sockets_lock);
6554+ list_add_tail(&con->state_list, &state_sockets);
6555+ spin_unlock_bh(&state_sockets_lock);
6556+
6557+ wake_up_interruptible(&lowcomms_send_waitq);
6558+}
6559+
6560+static void lowcomms_state_change(struct sock *sk)
6561+{
6562+/* struct connection *con = sock2con(sk); */
6563+
6564+ switch (sk->sk_state) {
6565+ case TCP_ESTABLISHED:
6566+ lowcomms_write_space(sk);
6567+ break;
6568+
6569+ case TCP_FIN_WAIT1:
6570+ case TCP_FIN_WAIT2:
6571+ case TCP_TIME_WAIT:
6572+ case TCP_CLOSE:
6573+ case TCP_CLOSE_WAIT:
6574+ case TCP_LAST_ACK:
6575+ case TCP_CLOSING:
6576+ /* FIXME: I think this causes more trouble than it solves.
6577+ lowcomms wil reconnect anyway when there is something to
6578+ send. This just attempts reconnection if a node goes down!
6579+ */
6580+ /* lowcomms_connect_sock(con); */
6581+ break;
6582+
6583+ default:
6584+ printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state);
6585+ break;
6586+ }
6587+}
6588+
6589+/* Make a socket active */
6590+static int add_sock(struct socket *sock, struct connection *con)
6591+{
6592+ con->sock = sock;
6593+
6594+ /* Install a data_ready callback */
6595+ con->sock->sk->sk_data_ready = lowcomms_data_ready;
6596+ con->sock->sk->sk_write_space = lowcomms_write_space;
6597+ con->sock->sk->sk_state_change = lowcomms_state_change;
6598+
6599+ return 0;
6600+}
6601+
6602+/* Add the port number to an IP6 or 4 sockaddr and return the address
6603+ length */
6604+static void make_sockaddr(struct sockaddr_in6 *saddr, uint16_t port,
6605+ int *addr_len)
6606+{
6607+ saddr->sin6_family = local_addr.sin6_family;
6608+ if (local_addr.sin6_family == AF_INET) {
6609+ struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr;
6610+ in4_addr->sin_port = cpu_to_be16(port);
6611+ *addr_len = sizeof(struct sockaddr_in);
6612+ }
6613+ else {
6614+ saddr->sin6_port = cpu_to_be16(port);
6615+ *addr_len = sizeof(struct sockaddr_in6);
6616+ }
6617+}
6618+
6619+/* Close a remote connection and tidy up */
6620+static void close_connection(struct connection *con)
6621+{
6622+ if (test_bit(CF_IS_OTHERSOCK, &con->flags))
6623+ return;
6624+
6625+ down_write(&con->sock_sem);
6626+
6627+ if (con->sock) {
6628+ sock_release(con->sock);
6629+ con->sock = NULL;
6630+ if (con->othersock) {
6631+ down_write(&con->othersock->sock_sem);
6632+ sock_release(con->othersock->sock);
6633+ con->othersock->sock = NULL;
6634+ up_write(&con->othersock->sock_sem);
6635+ kfree(con->othersock);
6636+ con->othersock = NULL;
6637+ }
6638+ }
6639+ if (con->rx_page) {
6640+ __free_page(con->rx_page);
6641+ con->rx_page = NULL;
6642+ }
6643+ up_write(&con->sock_sem);
6644+}
6645+
6646+/* Data received from remote end */
6647+static int receive_from_sock(struct connection *con)
6648+{
6649+ int ret = 0;
6650+ struct msghdr msg;
6651+ struct iovec iov[2];
6652+ mm_segment_t fs;
6653+ unsigned len;
6654+ int r;
6655+ int call_again_soon = 0;
6656+
6657+ down_read(&con->sock_sem);
6658+
6659+ if (con->sock == NULL)
6660+ goto out;
6661+ if (con->rx_page == NULL) {
6662+ /*
6663+ * This doesn't need to be atomic, but I think it should
6664+ * improve performance if it is.
6665+ */
6666+ con->rx_page = alloc_page(GFP_ATOMIC);
6667+ if (con->rx_page == NULL)
6668+ goto out_resched;
6669+ CBUF_INIT(&con->cb, PAGE_CACHE_SIZE);
6670+ }
6671+ /*
6672+ * To avoid doing too many short reads, we will reschedule for another
6673+ * another time if there are less than 32 bytes left in the buffer.
6674+ */
6675+ if (!CBUF_MAY_ADD(&con->cb, 32))
6676+ goto out_resched;
6677+
6678+ msg.msg_control = NULL;
6679+ msg.msg_controllen = 0;
6680+ msg.msg_iovlen = 1;
6681+ msg.msg_iov = iov;
6682+ msg.msg_name = NULL;
6683+ msg.msg_namelen = 0;
6684+ msg.msg_flags = 0;
6685+
6686+ /*
6687+ * iov[0] is the bit of the circular buffer between the current end
6688+ * point (cb.base + cb.len) and the end of the buffer.
6689+ */
6690+ iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb);
6691+ iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb);
6692+ iov[1].iov_len = 0;
6693+
6694+ /*
6695+ * iov[1] is the bit of the circular buffer between the start of the
6696+ * buffer and the start of the currently used section (cb.base)
6697+ */
6698+ if (CBUF_DATA(&con->cb) >= con->cb.base) {
6699+ iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb);
6700+ iov[1].iov_len = con->cb.base;
6701+ iov[1].iov_base = page_address(con->rx_page);
6702+ msg.msg_iovlen = 2;
6703+ }
6704+ len = iov[0].iov_len + iov[1].iov_len;
6705+
6706+ fs = get_fs();
6707+ set_fs(get_ds());
6708+ r = ret = sock_recvmsg(con->sock, &msg, len,
6709+ MSG_DONTWAIT | MSG_NOSIGNAL);
6710+ set_fs(fs);
6711+
6712+ if (ret <= 0)
6713+ goto out_close;
6714+ if (ret == len)
6715+ call_again_soon = 1;
6716+ CBUF_ADD(&con->cb, ret);
6717+ ret = midcomms_process_incoming_buffer(con->nodeid,
6718+ page_address(con->rx_page),
6719+ con->cb.base, con->cb.len,
6720+ PAGE_CACHE_SIZE);
6721+ if (ret == -EBADMSG) {
6722+ printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, "
6723+ "iov_len=%u, iov_base[0]=%p, read=%d\n",
6724+ page_address(con->rx_page), con->cb.base, con->cb.len,
6725+ len, iov[0].iov_base, r);
6726+ }
6727+ if (ret < 0)
6728+ goto out_close;
6729+ CBUF_EAT(&con->cb, ret);
6730+
6731+ if (CBUF_EMPTY(&con->cb) && !call_again_soon) {
6732+ __free_page(con->rx_page);
6733+ con->rx_page = NULL;
6734+ }
6735+ out:
6736+ if (call_again_soon)
6737+ goto out_resched;
6738+ up_read(&con->sock_sem);
6739+ ret = 0;
6740+ goto out_ret;
6741+
6742+ out_resched:
6743+ lowcomms_data_ready(con->sock->sk, 0);
6744+ up_read(&con->sock_sem);
6745+ ret = 0;
6746+ goto out_ret;
6747+
6748+ out_close:
6749+ up_read(&con->sock_sem);
6750+ if (ret != -EAGAIN && !test_bit(CF_IS_OTHERSOCK, &con->flags)) {
6751+ close_connection(con);
6752+ lowcomms_connect_sock(con);
6753+ }
6754+
6755+ out_ret:
6756+ return ret;
6757+}
6758+
6759+/* Listening socket is busy, accept a connection */
6760+static int accept_from_sock(struct connection *con)
6761+{
6762+ int result;
6763+ struct sockaddr_in6 peeraddr;
6764+ struct socket *newsock;
6765+ int len;
6766+ int nodeid;
6767+ struct connection *newcon;
6768+
6769+ memset(&peeraddr, 0, sizeof(peeraddr));
6770+ newsock = sock_alloc();
6771+ if (!newsock)
6772+ return -ENOMEM;
6773+
6774+ down_read(&con->sock_sem);
6775+
6776+ result = -ENOTCONN;
6777+ if (con->sock == NULL)
6778+ goto accept_err;
6779+
6780+ newsock->type = con->sock->type;
6781+ newsock->ops = con->sock->ops;
6782+
6783+ result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK);
6784+ if (result < 0)
6785+ goto accept_err;
6786+
6787+ /* Get the connected socket's peer */
6788+ if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr,
6789+ &len, 2)) {
6790+ result = -ECONNABORTED;
6791+ goto accept_err;
6792+ }
6793+
6794+ /* Get the new node's NODEID */
6795+ nodeid = lowcomms_nodeid_from_ipaddr((struct sockaddr *)&peeraddr, len);
6796+ if (nodeid == 0) {
6797+ printk("dlm: connect from non cluster node\n");
6798+ sock_release(newsock);
6799+ up_read(&con->sock_sem);
6800+ return -1;
6801+ }
6802+
6803+ log_print("got connection from %d", nodeid);
6804+
6805+ /* Check to see if we already have a connection to this node. This
6806+ * could happen if the two nodes initiate a connection at roughly
6807+ * the same time and the connections cross on the wire.
6808+ * TEMPORARY FIX:
6809+ * In this case we store the incoming one in "othersock"
6810+ */
6811+ newcon = nodeid2con(nodeid);
6812+ down_write(&newcon->sock_sem);
6813+ if (newcon->sock) {
6814+ struct connection *othercon;
6815+
6816+ othercon = kmalloc(sizeof(struct connection), GFP_KERNEL);
6817+ if (!othercon) {
6818+ printk("dlm: failed to allocate incoming socket\n");
6819+ sock_release(newsock);
6820+ up_write(&newcon->sock_sem);
6821+ up_read(&con->sock_sem);
6822+ goto accept_out;
6823+ }
6824+ memset(othercon, 0, sizeof(*othercon));
6825+ newcon->othersock = othercon;
6826+ othercon->nodeid = nodeid;
6827+ othercon->sock = newsock;
6828+ othercon->rx_action = receive_from_sock;
6829+ add_sock(newsock, othercon);
6830+ init_rwsem(&othercon->sock_sem);
6831+ set_bit(CF_IS_OTHERSOCK, &othercon->flags);
6832+ newsock->sk->sk_user_data = othercon;
6833+
6834+ up_write(&newcon->sock_sem);
6835+ lowcomms_data_ready(newsock->sk, 0);
6836+ up_read(&con->sock_sem);
6837+ goto accept_out;
6838+ }
6839+
6840+ newsock->sk->sk_user_data = newcon;
6841+ newcon->rx_action = receive_from_sock;
6842+ add_sock(newsock, newcon);
6843+ up_write(&newcon->sock_sem);
6844+
6845+ /*
6846+ * Add it to the active queue in case we got data
6847+ * beween processing the accept adding the socket
6848+ * to the read_sockets list
6849+ */
6850+ lowcomms_data_ready(newsock->sk, 0);
6851+
6852+ up_read(&con->sock_sem);
6853+
6854+ accept_out:
6855+ return 0;
6856+
6857+ accept_err:
6858+ up_read(&con->sock_sem);
6859+ sock_release(newsock);
6860+
6861+ printk("dlm: error accepting connection from node: %d\n", result);
6862+ return result;
6863+}
6864+
6865+/* Connect a new socket to its peer */
6866+static int connect_to_sock(struct connection *con)
6867+{
6868+ int result = -EHOSTUNREACH;
6869+ struct sockaddr_in6 saddr;
6870+ int addr_len;
6871+ struct socket *sock;
6872+
6873+ if (con->nodeid == 0) {
6874+ log_print("attempt to connect sock 0 foiled");
6875+ return 0;
6876+ }
6877+
6878+ down_write(&con->sock_sem);
6879+ if (con->retries++ > MAX_CONNECT_RETRIES)
6880+ goto out;
6881+
6882+ // FIXME not sure this should happen, let alone like this.
6883+ if (con->sock) {
6884+ sock_release(con->sock);
6885+ con->sock = NULL;
6886+ }
6887+
6888+ /* Create a socket to communicate with */
6889+ result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock);
6890+ if (result < 0)
6891+ goto out_err;
6892+
6893+ if (lowcomms_ipaddr_from_nodeid(con->nodeid, (struct sockaddr *)&saddr) < 0)
6894+ goto out_err;
6895+
6896+ sock->sk->sk_user_data = con;
6897+ con->rx_action = receive_from_sock;
6898+
6899+ make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
6900+
6901+ add_sock(sock, con);
6902+ result =
6903+ sock->ops->connect(sock, (struct sockaddr *) &saddr, addr_len,
6904+ O_NONBLOCK);
6905+ if (result == -EINPROGRESS)
6906+ result = 0;
6907+ if (result != 0)
6908+ goto out_err;
6909+
6910+ out:
6911+ up_write(&con->sock_sem);
6912+ /*
6913+ * Returning an error here means we've given up trying to connect to
6914+ * a remote node, otherwise we return 0 and reschedule the connetion
6915+ * attempt
6916+ */
6917+ return result;
6918+
6919+ out_err:
6920+ if (con->sock) {
6921+ sock_release(con->sock);
6922+ con->sock = NULL;
6923+ }
6924+ /*
6925+ * Some errors are fatal and this list might need adjusting. For other
6926+ * errors we try again until the max number of retries is reached.
6927+ */
6928+ if (result != -EHOSTUNREACH && result != -ENETUNREACH &&
6929+ result != -ENETDOWN && result != EINVAL
6930+ && result != -EPROTONOSUPPORT) {
6931+ lowcomms_connect_sock(con);
6932+ result = 0;
6933+ }
6934+ goto out;
6935+}
6936+
6937+static struct socket *create_listen_sock(struct connection *con, char *addr, int addr_len)
6938+{
6939+ struct socket *sock = NULL;
6940+ mm_segment_t fs;
6941+ int result = 0;
6942+ int one = 1;
6943+ struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
6944+
6945+ /* Create a socket to communicate with */
6946+ result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock);
6947+ if (result < 0) {
6948+ printk("dlm: Can't create listening comms socket\n");
6949+ goto create_out;
6950+ }
6951+
6952+ fs = get_fs();
6953+ set_fs(get_ds());
6954+ result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one));
6955+ set_fs(fs);
6956+ if (result < 0) {
6957+ printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result);
6958+ }
6959+ sock->sk->sk_user_data = con;
6960+ con->rx_action = accept_from_sock;
6961+ con->sock = sock;
6962+
6963+ /* Bind to our port */
6964+ make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
6965+ result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
6966+ if (result < 0) {
6967+ printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
6968+ sock_release(sock);
6969+ sock = NULL;
6970+ goto create_out;
6971+ }
6972+
6973+ fs = get_fs();
6974+ set_fs(get_ds());
6975+
6976+ result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one));
6977+ set_fs(fs);
6978+ if (result < 0) {
6979+ printk("dlm: Set keepalive failed: %d\n", result);
6980+ }
6981+
6982+ result = sock->ops->listen(sock, 5);
6983+ if (result < 0) {
6984+ printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
6985+ sock_release(sock);
6986+ sock = NULL;
6987+ goto create_out;
6988+ }
6989+
6990+ create_out:
6991+ return sock;
6992+}
6993+
6994+
6995+/* Listen on all interfaces */
6996+static int listen_for_all(void)
6997+{
6998+ int result = 0;
6999+ int nodeid;
7000+ struct socket *sock = NULL;
7001+ struct list_head *addr_list;
7002+ struct connection *con = nodeid2con(0);
7003+ struct cluster_node_addr *node_addr;
7004+ char local_addr[sizeof(struct sockaddr_in6)];
7005+
7006+ /* This will also fill in local_addr */
7007+ nodeid = lowcomms_our_nodeid();
7008+
7009+ addr_list = kcl_get_node_addresses(nodeid);
7010+ if (!addr_list) {
7011+ printk("dlm: cannot initialise comms layer\n");
7012+ result = -ENOTCONN;
7013+ goto create_out;
7014+ }
7015+
7016+ list_for_each_entry(node_addr, addr_list, list) {
7017+
7018+ if (!con) {
7019+ con = kmalloc(sizeof(struct connection), GFP_KERNEL);
7020+ if (!con) {
7021+ printk("dlm: failed to allocate listen socket\n");
7022+ goto create_out;
7023+ }
7024+ memset(con, 0, sizeof(*con));
7025+ init_rwsem(&con->sock_sem);
7026+ spin_lock_init(&con->writequeue_lock);
7027+ INIT_LIST_HEAD(&con->writequeue);
7028+ set_bit(CF_IS_OTHERSOCK, &con->flags);
7029+ }
7030+
7031+ memcpy(local_addr, node_addr->addr, node_addr->addr_len);
7032+ sock = create_listen_sock(con, local_addr,
7033+ node_addr->addr_len);
7034+ if (sock) {
7035+ add_sock(sock, con);
7036+ }
7037+ else {
7038+ kfree(con);
7039+ }
7040+
7041+ /* Keep a list of dynamically allocated listening sockets
7042+ so we can free them at shutdown */
7043+ if (test_bit(CF_IS_OTHERSOCK, &con->flags)) {
7044+ list_add_tail(&con->listenlist, &listen_sockets);
7045+ }
7046+ con = NULL;
7047+ }
7048+
7049+ create_out:
7050+ return result;
7051+}
7052+
7053+
7054+
7055+static struct writequeue_entry *new_writequeue_entry(struct connection *con,
7056+ int allocation)
7057+{
7058+ struct writequeue_entry *entry;
7059+
7060+ entry = kmalloc(sizeof(struct writequeue_entry), allocation);
7061+ if (!entry)
7062+ return NULL;
7063+
7064+ entry->page = alloc_page(allocation);
7065+ if (!entry->page) {
7066+ kfree(entry);
7067+ return NULL;
7068+ }
7069+
7070+ entry->offset = 0;
7071+ entry->len = 0;
7072+ entry->end = 0;
7073+ entry->users = 0;
7074+ entry->con = con;
7075+
7076+ return entry;
7077+}
7078+
7079+struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len,
7080+ int allocation, char **ppc)
7081+{
7082+ struct connection *con = nodeid2con(nodeid);
7083+ struct writequeue_entry *e;
7084+ int offset = 0;
7085+ int users = 0;
7086+
7087+ if (!atomic_read(&accepting))
7088+ return NULL;
7089+
7090+ spin_lock(&con->writequeue_lock);
7091+ e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
7092+ if (((struct list_head *) e == &con->writequeue) ||
7093+ (PAGE_CACHE_SIZE - e->end < len)) {
7094+ e = NULL;
7095+ } else {
7096+ offset = e->end;
7097+ e->end += len;
7098+ users = e->users++;
7099+ }
7100+ spin_unlock(&con->writequeue_lock);
7101+
7102+ if (e) {
7103+ got_one:
7104+ if (users == 0)
7105+ kmap(e->page);
7106+ *ppc = page_address(e->page) + offset;
7107+ return e;
7108+ }
7109+
7110+ e = new_writequeue_entry(con, allocation);
7111+ if (e) {
7112+ spin_lock(&con->writequeue_lock);
7113+ offset = e->end;
7114+ e->end += len;
7115+ users = e->users++;
7116+ list_add_tail(&e->list, &con->writequeue);
7117+ spin_unlock(&con->writequeue_lock);
7118+ atomic_inc(&writequeue_length);
7119+ goto got_one;
7120+ }
7121+ return NULL;
7122+}
7123+
7124+void lowcomms_commit_buffer(struct writequeue_entry *e)
7125+{
7126+ struct connection *con = e->con;
7127+ int users;
7128+
7129+ if (!atomic_read(&accepting))
7130+ return;
7131+
7132+ spin_lock(&con->writequeue_lock);
7133+ users = --e->users;
7134+ if (users)
7135+ goto out;
7136+ e->len = e->end - e->offset;
7137+ kunmap(e->page);
7138+ spin_unlock(&con->writequeue_lock);
7139+
7140+ if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
7141+ spin_lock_bh(&write_sockets_lock);
7142+ list_add_tail(&con->write_list, &write_sockets);
7143+ spin_unlock_bh(&write_sockets_lock);
7144+
7145+ wake_up_interruptible(&lowcomms_send_waitq);
7146+ }
7147+ return;
7148+
7149+ out:
7150+ spin_unlock(&con->writequeue_lock);
7151+ return;
7152+}
7153+
7154+static void free_entry(struct writequeue_entry *e)
7155+{
7156+ __free_page(e->page);
7157+ kfree(e);
7158+ atomic_dec(&writequeue_length);
7159+}
7160+
7161+/* Send a message */
7162+static int send_to_sock(struct connection *con)
7163+{
7164+ int ret = 0;
7165+ ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int);
7166+ const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
7167+ struct writequeue_entry *e;
7168+ int len, offset;
7169+
7170+ down_read(&con->sock_sem);
7171+ if (con->sock == NULL)
7172+ goto out_connect;
7173+
7174+ sendpage = con->sock->ops->sendpage;
7175+
7176+ spin_lock(&con->writequeue_lock);
7177+ for (;;) {
7178+ e = list_entry(con->writequeue.next, struct writequeue_entry,
7179+ list);
7180+ if ((struct list_head *) e == &con->writequeue)
7181+ break;
7182+
7183+ len = e->len;
7184+ offset = e->offset;
7185+ BUG_ON(len == 0 && e->users == 0);
7186+ spin_unlock(&con->writequeue_lock);
7187+
7188+ ret = 0;
7189+ if (len) {
7190+ ret = sendpage(con->sock, e->page, offset, len,
7191+ msg_flags);
7192+ if (ret == -EAGAIN || ret == 0)
7193+ goto out;
7194+ if (ret <= 0)
7195+ goto send_error;
7196+ }
7197+
7198+ spin_lock(&con->writequeue_lock);
7199+ e->offset += ret;
7200+ e->len -= ret;
7201+
7202+ if (e->len == 0 && e->users == 0) {
7203+ list_del(&e->list);
7204+ free_entry(e);
7205+ continue;
7206+ }
7207+ }
7208+ spin_unlock(&con->writequeue_lock);
7209+ out:
7210+ up_read(&con->sock_sem);
7211+ return ret;
7212+
7213+ send_error:
7214+ up_read(&con->sock_sem);
7215+ close_connection(con);
7216+ lowcomms_connect_sock(con);
7217+ return ret;
7218+
7219+ out_connect:
7220+ up_read(&con->sock_sem);
7221+ lowcomms_connect_sock(con);
7222+ return 0;
7223+}
7224+
7225+/* Called from recoverd when it knows that a node has
7226+ left the cluster */
7227+int lowcomms_close(int nodeid)
7228+{
7229+ struct connection *con;
7230+
7231+ if (!connections)
7232+ goto out;
7233+
7234+ con = nodeid2con(nodeid);
7235+ if (con->sock) {
7236+ close_connection(con);
7237+ return 0;
7238+ }
7239+
7240+ out:
7241+ return -1;
7242+}
7243+
7244+/* API send message call, may queue the request */
7245+/* N.B. This is the old interface - use the new one for new calls */
7246+int lowcomms_send_message(int nodeid, char *buf, int len, int allocation)
7247+{
7248+ struct writequeue_entry *e;
7249+ char *b;
7250+
10d56c87 7251+ DLM_ASSERT(nodeid < dlm_config.max_connections,
4bf12011 7252+ printk("nodeid=%u\n", nodeid););
7253+
7254+ e = lowcomms_get_buffer(nodeid, len, allocation, &b);
7255+ if (e) {
7256+ memcpy(b, buf, len);
7257+ lowcomms_commit_buffer(e);
7258+ return 0;
7259+ }
7260+ return -ENOBUFS;
7261+}
7262+
7263+/* Look for activity on active sockets */
7264+static void process_sockets(void)
7265+{
7266+ struct list_head *list;
7267+ struct list_head *temp;
7268+
7269+ spin_lock_bh(&read_sockets_lock);
7270+ list_for_each_safe(list, temp, &read_sockets) {
7271+ struct connection *con =
7272+ list_entry(list, struct connection, read_list);
7273+ list_del(&con->read_list);
7274+ clear_bit(CF_READ_PENDING, &con->flags);
7275+
7276+ spin_unlock_bh(&read_sockets_lock);
7277+
7278+ con->rx_action(con);
7279+
7280+ /* Don't starve out everyone else */
7281+ schedule();
7282+ spin_lock_bh(&read_sockets_lock);
7283+ }
7284+ spin_unlock_bh(&read_sockets_lock);
7285+}
7286+
7287+/* Try to send any messages that are pending
7288+ */
7289+static void process_output_queue(void)
7290+{
7291+ struct list_head *list;
7292+ struct list_head *temp;
7293+ int ret;
7294+
7295+ spin_lock_bh(&write_sockets_lock);
7296+ list_for_each_safe(list, temp, &write_sockets) {
7297+ struct connection *con =
7298+ list_entry(list, struct connection, write_list);
7299+ list_del(&con->write_list);
7300+ clear_bit(CF_WRITE_PENDING, &con->flags);
7301+
7302+ spin_unlock_bh(&write_sockets_lock);
7303+
7304+ ret = send_to_sock(con);
7305+ if (ret < 0) {
7306+ }
7307+ spin_lock_bh(&write_sockets_lock);
7308+ }
7309+ spin_unlock_bh(&write_sockets_lock);
7310+}
7311+
7312+static void process_state_queue(void)
7313+{
7314+ struct list_head *list;
7315+ struct list_head *temp;
7316+ int ret;
7317+
7318+ spin_lock_bh(&state_sockets_lock);
7319+ list_for_each_safe(list, temp, &state_sockets) {
7320+ struct connection *con =
7321+ list_entry(list, struct connection, state_list);
7322+ list_del(&con->state_list);
7323+ clear_bit(CF_CONNECT_PENDING, &con->flags);
7324+ spin_unlock_bh(&state_sockets_lock);
7325+
7326+ ret = connect_to_sock(con);
7327+ if (ret < 0) {
7328+ }
7329+ spin_lock_bh(&state_sockets_lock);
7330+ }
7331+ spin_unlock_bh(&state_sockets_lock);
7332+}
7333+
7334+/* Discard all entries on the write queues */
7335+static void clean_writequeues(void)
7336+{
7337+ struct list_head *list;
7338+ struct list_head *temp;
7339+ int nodeid;
7340+
7341+ for (nodeid = 1; nodeid < dlm_config.max_connections; nodeid++) {
7342+ struct connection *con = nodeid2con(nodeid);
7343+
7344+ spin_lock(&con->writequeue_lock);
7345+ list_for_each_safe(list, temp, &con->writequeue) {
7346+ struct writequeue_entry *e =
7347+ list_entry(list, struct writequeue_entry, list);
7348+ list_del(&e->list);
7349+ free_entry(e);
7350+ }
7351+ spin_unlock(&con->writequeue_lock);
7352+ }
7353+}
7354+
7355+static int read_list_empty(void)
7356+{
7357+ int status;
7358+
7359+ spin_lock_bh(&read_sockets_lock);
7360+ status = list_empty(&read_sockets);
7361+ spin_unlock_bh(&read_sockets_lock);
7362+
7363+ return status;
7364+}
7365+
7366+/* DLM Transport comms receive daemon */
7367+static int dlm_recvd(void *data)
7368+{
7369+ daemonize("dlm_recvd");
7370+ atomic_set(&recv_run, 1);
7371+
7372+ init_waitqueue_head(&lowcomms_recv_waitq);
7373+ init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
7374+ add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
7375+
7376+ complete(&thread_completion);
7377+
7378+ while (atomic_read(&recv_run)) {
7379+
7380+ set_task_state(current, TASK_INTERRUPTIBLE);
7381+
7382+ if (read_list_empty())
7383+ schedule();
7384+
7385+ set_task_state(current, TASK_RUNNING);
7386+
7387+ process_sockets();
7388+ }
7389+
7390+ down(&thread_lock);
7391+ up(&thread_lock);
7392+
7393+ complete(&thread_completion);
7394+
7395+ return 0;
7396+}
7397+
7398+static int write_and_state_lists_empty(void)
7399+{
7400+ int status;
7401+
7402+ spin_lock_bh(&write_sockets_lock);
7403+ status = list_empty(&write_sockets);
7404+ spin_unlock_bh(&write_sockets_lock);
7405+
7406+ spin_lock_bh(&state_sockets_lock);
7407+ if (list_empty(&state_sockets) == 0)
7408+ status = 0;
7409+ spin_unlock_bh(&state_sockets_lock);
7410+
7411+ return status;
7412+}
7413+
7414+/* DLM Transport send daemon */
7415+static int dlm_sendd(void *data)
7416+{
7417+ daemonize("dlm_sendd");
7418+ atomic_set(&send_run, 1);
7419+
7420+ init_waitqueue_head(&lowcomms_send_waitq);
7421+ init_waitqueue_entry(&lowcomms_send_waitq_head, current);
7422+ add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
7423+
7424+ complete(&thread_completion);
7425+
7426+ while (atomic_read(&send_run)) {
7427+
7428+ set_task_state(current, TASK_INTERRUPTIBLE);
7429+
7430+ if (write_and_state_lists_empty())
7431+ schedule();
7432+
7433+ set_task_state(current, TASK_RUNNING);
7434+
7435+ process_state_queue();
7436+ process_output_queue();
7437+ }
7438+
7439+ down(&thread_lock);
7440+ up(&thread_lock);
7441+
7442+ complete(&thread_completion);
7443+
7444+ return 0;
7445+}
7446+
7447+static void daemons_stop(void)
7448+{
7449+ if (atomic_read(&recv_run)) {
7450+ down(&thread_lock);
7451+ atomic_set(&recv_run, 0);
7452+ wake_up_interruptible(&lowcomms_recv_waitq);
7453+ up(&thread_lock);
7454+ wait_for_completion(&thread_completion);
7455+ }
7456+
7457+ if (atomic_read(&send_run)) {
7458+ down(&thread_lock);
7459+ atomic_set(&send_run, 0);
7460+ wake_up_interruptible(&lowcomms_send_waitq);
7461+ up(&thread_lock);
7462+ wait_for_completion(&thread_completion);
7463+ }
7464+}
7465+
7466+static int daemons_start(void)
7467+{
7468+ int error;
7469+
7470+ error = kernel_thread(dlm_recvd, NULL, 0);
7471+ if (error < 0) {
7472+ log_print("can't start recvd thread: %d", error);
7473+ goto out;
7474+ }
7475+ wait_for_completion(&thread_completion);
7476+
7477+ error = kernel_thread(dlm_sendd, NULL, 0);
7478+ if (error < 0) {
7479+ log_print("can't start sendd thread: %d", error);
7480+ daemons_stop();
7481+ goto out;
7482+ }
7483+ wait_for_completion(&thread_completion);
7484+
7485+ error = 0;
7486+ out:
7487+ return error;
7488+}
7489+
7490+/*
7491+ * Return the largest buffer size we can cope with.
7492+ */
7493+int lowcomms_max_buffer_size(void)
7494+{
7495+ return PAGE_CACHE_SIZE;
7496+}
7497+
7498+void lowcomms_stop(void)
7499+{
7500+ int i;
7501+ struct connection *temp;
7502+ struct connection *lcon;
7503+
7504+ atomic_set(&accepting, 0);
7505+
7506+ /* Set all the activity flags to prevent any
7507+ socket activity.
7508+ */
7509+ for (i = 0; i < conn_array_size; i++) {
7510+ connections[i].flags = 0x7;
7511+ }
7512+ daemons_stop();
7513+ clean_writequeues();
7514+
7515+ for (i = 0; i < conn_array_size; i++) {
7516+ close_connection(nodeid2con(i));
7517+ }
7518+
7519+ kfree(connections);
7520+ connections = NULL;
7521+
7522+ /* Free up any dynamically allocated listening sockets */
7523+ list_for_each_entry_safe(lcon, temp, &listen_sockets, listenlist) {
7524+ sock_release(lcon->sock);
7525+ kfree(lcon);
7526+ }
7527+
7528+ kcl_releaseref_cluster();
7529+}
7530+
7531+/* This is quite likely to sleep... */
7532+int lowcomms_start(void)
7533+{
7534+ int error = 0;
7535+ int i;
7536+
7537+ INIT_LIST_HEAD(&read_sockets);
7538+ INIT_LIST_HEAD(&write_sockets);
7539+ INIT_LIST_HEAD(&state_sockets);
7540+ INIT_LIST_HEAD(&listen_sockets);
7541+
7542+ spin_lock_init(&read_sockets_lock);
7543+ spin_lock_init(&write_sockets_lock);
7544+ spin_lock_init(&state_sockets_lock);
7545+
7546+ init_completion(&thread_completion);
7547+ init_MUTEX(&thread_lock);
7548+ atomic_set(&send_run, 0);
7549+ atomic_set(&recv_run, 0);
7550+
7551+ error = -ENOTCONN;
7552+ if (kcl_addref_cluster())
7553+ goto out;
7554+
7555+ /*
7556+ * Temporarily initialise the waitq head so that lowcomms_send_message
7557+ * doesn't crash if it gets called before the thread is fully
7558+ * initialised
7559+ */
7560+ init_waitqueue_head(&lowcomms_send_waitq);
7561+
7562+ error = -ENOMEM;
7563+
7564+ connections = kmalloc(sizeof(struct connection) *
7565+ dlm_config.max_connections, GFP_KERNEL);
7566+ if (!connections)
7567+ goto out;
7568+
7569+ memset(connections, 0,
7570+ sizeof(struct connection) * dlm_config.max_connections);
7571+ for (i = 0; i < dlm_config.max_connections; i++) {
7572+ connections[i].nodeid = i;
7573+ init_rwsem(&connections[i].sock_sem);
7574+ INIT_LIST_HEAD(&connections[i].writequeue);
7575+ spin_lock_init(&connections[i].writequeue_lock);
7576+ }
7577+ conn_array_size = dlm_config.max_connections;
7578+
7579+ /* Start listening */
7580+ error = listen_for_all();
7581+ if (error)
7582+ goto fail_free_conn;
7583+
7584+ error = daemons_start();
7585+ if (error)
7586+ goto fail_free_conn;
7587+
7588+ atomic_set(&accepting, 1);
7589+
7590+ return 0;
7591+
7592+ fail_free_conn:
7593+ kfree(connections);
7594+
7595+ out:
7596+ return error;
7597+}
7598+
7599+/* Don't accept any more outgoing work */
7600+void lowcomms_stop_accept()
7601+{
7602+ atomic_set(&accepting, 0);
7603+}
7604+
7605+/* Cluster Manager interface functions for looking up
7606+ nodeids and IP addresses by each other
7607+*/
7608+
7609+/* Return the IP address of a node given its NODEID */
7610+static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr)
7611+{
7612+ struct list_head *addrs;
7613+ struct cluster_node_addr *node_addr;
7614+ struct cluster_node_addr *current_addr = NULL;
7615+ struct sockaddr_in6 *saddr;
7616+ int interface;
7617+ int i;
7618+
7619+ addrs = kcl_get_node_addresses(nodeid);
7620+ if (!addrs)
7621+ return -1;
7622+
7623+ interface = kcl_get_current_interface();
7624+
7625+ /* Look for address number <interface> */
7626+ i=0; /* i/f numbers start at 1 */
7627+ list_for_each_entry(node_addr, addrs, list) {
7628+ if (interface == ++i) {
7629+ current_addr = node_addr;
7630+ break;
7631+ }
7632+ }
7633+
7634+ /* If that failed then just use the first one */
7635+ if (!current_addr)
7636+ current_addr = (struct cluster_node_addr *)addrs->next;
7637+
7638+ saddr = (struct sockaddr_in6 *)current_addr->addr;
7639+
7640+ /* Extract the IP address */
7641+ if (saddr->sin6_family == AF_INET) {
7642+ struct sockaddr_in *in4 = (struct sockaddr_in *)saddr;
7643+ struct sockaddr_in *ret4 = (struct sockaddr_in *)retaddr;
7644+ ret4->sin_addr.s_addr = in4->sin_addr.s_addr;
7645+ }
7646+ else {
7647+ struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *)retaddr;
7648+ memcpy(&ret6->sin6_addr, &saddr->sin6_addr, sizeof(saddr->sin6_addr));
7649+ }
7650+
7651+ return 0;
7652+}
7653+
7654+/* Return the NODEID for a node given its sockaddr */
7655+static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len)
7656+{
7657+ struct kcl_cluster_node node;
7658+ struct sockaddr_in6 ipv6_addr;
7659+ struct sockaddr_in ipv4_addr;
7660+
7661+ if (addr->sa_family == AF_INET) {
7662+ struct sockaddr_in *in4 = (struct sockaddr_in *)addr;
7663+ memcpy(&ipv4_addr, &local_addr, addr_len);
7664+ memcpy(&ipv4_addr.sin_addr, &in4->sin_addr, sizeof(ipv4_addr.sin_addr));
7665+
7666+ addr = (struct sockaddr *)&ipv4_addr;
7667+ }
7668+ else {
7669+ struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr;
7670+ memcpy(&ipv6_addr, &local_addr, addr_len);
7671+ memcpy(&ipv6_addr.sin6_addr, &in6->sin6_addr, sizeof(ipv6_addr.sin6_addr));
7672+
7673+ addr = (struct sockaddr *)&ipv6_addr;
7674+ }
7675+
7676+ if (kcl_get_node_by_addr((char *)addr, addr_len, &node) == 0)
7677+ return node.node_id;
7678+ else
7679+ return 0;
7680+}
7681+
7682+int lowcomms_our_nodeid(void)
7683+{
7684+ struct kcl_cluster_node node;
7685+ struct list_head *addrs;
7686+ struct cluster_node_addr *first_addr;
7687+ static int our_nodeid = 0;
7688+
7689+ if (our_nodeid)
7690+ return our_nodeid;
7691+
7692+ if (kcl_get_node_by_nodeid(0, &node) == -1)
7693+ return 0;
7694+
7695+ our_nodeid = node.node_id;
7696+
7697+ /* Fill in the "template" structure */
7698+ addrs = kcl_get_node_addresses(our_nodeid);
7699+ if (!addrs)
7700+ return 0;
7701+
7702+ first_addr = (struct cluster_node_addr *) addrs->next;
7703+ memcpy(&local_addr, &first_addr->addr, first_addr->addr_len);
7704+
7705+ return node.node_id;
7706+}
7707+/*
7708+ * Overrides for Emacs so that we follow Linus's tabbing style.
7709+ * Emacs will notice this stuff at the end of the file and automatically
7710+ * adjust the settings for this buffer only. This must remain at the end
7711+ * of the file.
7712+ * ---------------------------------------------------------------------------
7713+ * Local variables:
7714+ * c-file-style: "linux"
7715+ * End:
7716+ */
7717diff -urN linux-orig/cluster/dlm/lowcomms.h linux-patched/cluster/dlm/lowcomms.h
7718--- linux-orig/cluster/dlm/lowcomms.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 7719+++ linux-patched/cluster/dlm/lowcomms.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 7720@@ -0,0 +1,34 @@
7721+/******************************************************************************
7722+*******************************************************************************
7723+**
7724+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
7725+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
7726+**
7727+** This copyrighted material is made available to anyone wishing to use,
7728+** modify, copy, or redistribute it subject to the terms and conditions
7729+** of the GNU General Public License v.2.
7730+**
7731+*******************************************************************************
7732+******************************************************************************/
7733+
7734+#ifndef __LOWCOMMS_DOT_H__
7735+#define __LOWCOMMS_DOT_H__
7736+
7737+/* The old interface */
7738+int lowcomms_send_message(int csid, char *buf, int len, int allocation);
7739+
7740+/* The new interface */
7741+struct writequeue_entry;
7742+extern struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len,
7743+ int allocation, char **ppc);
7744+extern void lowcomms_commit_buffer(struct writequeue_entry *e);
7745+
7746+int lowcomms_start(void);
7747+void lowcomms_stop(void);
7748+void lowcomms_stop_accept(void);
7749+int lowcomms_close(int nodeid);
7750+int lowcomms_max_buffer_size(void);
7751+
7752+int lowcomms_our_nodeid(void);
7753+
7754+#endif /* __LOWCOMMS_DOT_H__ */
7755diff -urN linux-orig/cluster/dlm/main.c linux-patched/cluster/dlm/main.c
7756--- linux-orig/cluster/dlm/main.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 7757+++ linux-patched/cluster/dlm/main.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 7758@@ -0,0 +1,98 @@
7759+/******************************************************************************
7760+*******************************************************************************
7761+**
7762+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
7763+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
7764+**
7765+** This copyrighted material is made available to anyone wishing to use,
7766+** modify, copy, or redistribute it subject to the terms and conditions
7767+** of the GNU General Public License v.2.
7768+**
7769+*******************************************************************************
7770+******************************************************************************/
7771+
7772+#define EXPORT_SYMTAB
7773+
7774+#include <linux/init.h>
7775+#include <linux/proc_fs.h>
7776+#include <linux/ctype.h>
4bf12011 7777+#include <linux/module.h>
7778+#include <net/sock.h>
7779+
7780+#include <cluster/cnxman.h>
7781+
7782+#include "dlm_internal.h"
7783+#include "lockspace.h"
7784+#include "recoverd.h"
7785+#include "ast.h"
7786+#include "lkb.h"
7787+#include "nodes.h"
7788+#include "locking.h"
7789+#include "config.h"
7790+#include "memory.h"
7791+#include "recover.h"
7792+#include "lowcomms.h"
7793+
7794+int dlm_device_init(void);
7795+void dlm_device_exit(void);
7796+void dlm_proc_init(void);
7797+void dlm_proc_exit(void);
7798+
7799+
7800+/* Cluster manager callbacks, we want to know if a node dies
7801+ N.B. this is independent of lockspace-specific event callbacks from SM */
7802+
7803+static void cman_callback(kcl_callback_reason reason, long arg)
7804+{
7805+ if (reason == DIED) {
7806+ lowcomms_close((int) arg);
7807+ }
7808+
7809+ /* This is unconditional. so do what we can to tidy up */
7810+ if (reason == LEAVING) {
7811+ dlm_emergency_shutdown();
7812+ }
7813+}
7814+
7815+int __init init_dlm(void)
7816+{
7817+ dlm_proc_init();
7818+ dlm_lockspace_init();
7819+ dlm_recoverd_init();
7820+ dlm_nodes_init();
7821+ dlm_device_init();
7822+ dlm_memory_init();
7823+ dlm_config_init();
7824+
7825+ kcl_add_callback(cman_callback);
7826+
7827+ printk("DLM %s (built %s %s) installed\n",
7828+ DLM_RELEASE_NAME, __DATE__, __TIME__);
7829+
7830+ return 0;
7831+}
7832+
7833+void __exit exit_dlm(void)
7834+{
7835+ kcl_remove_callback(cman_callback);
7836+
7837+ dlm_device_exit();
7838+ dlm_memory_exit();
7839+ dlm_config_exit();
7840+ dlm_proc_exit();
7841+}
7842+
7843+MODULE_DESCRIPTION("Distributed Lock Manager " DLM_RELEASE_NAME);
7844+MODULE_AUTHOR("Red Hat, Inc.");
7845+MODULE_LICENSE("GPL");
7846+
7847+module_init(init_dlm);
7848+module_exit(exit_dlm);
7849+
7850+EXPORT_SYMBOL(dlm_init);
7851+EXPORT_SYMBOL(dlm_release);
7852+EXPORT_SYMBOL(dlm_new_lockspace);
7853+EXPORT_SYMBOL(dlm_release_lockspace);
7854+EXPORT_SYMBOL(dlm_lock);
7855+EXPORT_SYMBOL(dlm_unlock);
10d56c87 7856+EXPORT_SYMBOL(dlm_debug_dump);
4bf12011 7857diff -urN linux-orig/cluster/dlm/memory.c linux-patched/cluster/dlm/memory.c
7858--- linux-orig/cluster/dlm/memory.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 7859+++ linux-patched/cluster/dlm/memory.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 7860@@ -0,0 +1,238 @@
7861+/******************************************************************************
7862+*******************************************************************************
7863+**
7864+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
7865+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
7866+**
7867+** This copyrighted material is made available to anyone wishing to use,
7868+** modify, copy, or redistribute it subject to the terms and conditions
7869+** of the GNU General Public License v.2.
7870+**
7871+*******************************************************************************
7872+******************************************************************************/
7873+
7874+/* memory.c
7875+ *
7876+ * memory allocation routines
7877+ *
7878+ */
7879+
7880+#include "dlm_internal.h"
7881+#include "memory.h"
7882+#include "config.h"
7883+
7884+/* as the man says...Shouldn't this be in a header file somewhere? */
7885+#define BYTES_PER_WORD sizeof(void *)
7886+
7887+static kmem_cache_t *rsb_cache_small;
7888+static kmem_cache_t *rsb_cache_large;
7889+static kmem_cache_t *lkb_cache;
7890+static kmem_cache_t *lvb_cache;
7891+static kmem_cache_t *resdir_cache_large;
7892+static kmem_cache_t *resdir_cache_small;
7893+
7894+/* The thresholds above which we allocate large RSBs/resdatas rather than small
7895+ * ones. This must make the resultant structure end on a word boundary */
7896+#define LARGE_RSB_NAME 28
7897+#define LARGE_RES_NAME 28
7898+
7899+int dlm_memory_init()
7900+{
7901+ int ret = -ENOMEM;
7902+
7903+
7904+ rsb_cache_small =
7905+ kmem_cache_create("dlm_rsb(small)",
10d56c87
AM
7906+ (sizeof(struct dlm_rsb) + LARGE_RSB_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1),
7907+ __alignof__(struct dlm_rsb), 0, NULL, NULL);
4bf12011 7908+ if (!rsb_cache_small)
7909+ goto out;
7910+
7911+ rsb_cache_large =
7912+ kmem_cache_create("dlm_rsb(large)",
10d56c87
AM
7913+ sizeof(struct dlm_rsb) + DLM_RESNAME_MAXLEN,
7914+ __alignof__(struct dlm_rsb), 0, NULL, NULL);
4bf12011 7915+ if (!rsb_cache_large)
7916+ goto out_free_rsbs;
7917+
10d56c87
AM
7918+ lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
7919+ __alignof__(struct dlm_lkb), 0, NULL, NULL);
4bf12011 7920+ if (!lkb_cache)
7921+ goto out_free_rsbl;
7922+
7923+ resdir_cache_large =
7924+ kmem_cache_create("dlm_resdir(l)",
10d56c87
AM
7925+ sizeof(struct dlm_direntry) + DLM_RESNAME_MAXLEN,
7926+ __alignof__(struct dlm_direntry), 0, NULL, NULL);
4bf12011 7927+ if (!resdir_cache_large)
7928+ goto out_free_lkb;
7929+
7930+ resdir_cache_small =
7931+ kmem_cache_create("dlm_resdir(s)",
10d56c87
AM
7932+ (sizeof(struct dlm_direntry) + LARGE_RES_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1),
7933+ __alignof__(struct dlm_direntry), 0, NULL, NULL);
4bf12011 7934+ if (!resdir_cache_small)
7935+ goto out_free_resl;
7936+
7937+ /* LVB cache also holds ranges, so should be 64bit aligned */
7938+ lvb_cache = kmem_cache_create("dlm_lvb/range", DLM_LVB_LEN,
7939+ __alignof__(uint64_t), 0, NULL, NULL);
7940+ if (!lkb_cache)
7941+ goto out_free_ress;
7942+
7943+ ret = 0;
7944+ goto out;
7945+
7946+ out_free_ress:
7947+ kmem_cache_destroy(resdir_cache_small);
7948+
7949+ out_free_resl:
7950+ kmem_cache_destroy(resdir_cache_large);
7951+
7952+ out_free_lkb:
7953+ kmem_cache_destroy(lkb_cache);
7954+
7955+ out_free_rsbl:
7956+ kmem_cache_destroy(rsb_cache_large);
7957+
7958+ out_free_rsbs:
7959+ kmem_cache_destroy(rsb_cache_small);
7960+
7961+ out:
7962+ return ret;
7963+}
7964+
7965+void dlm_memory_exit()
7966+{
7967+ kmem_cache_destroy(rsb_cache_large);
7968+ kmem_cache_destroy(rsb_cache_small);
7969+ kmem_cache_destroy(lkb_cache);
7970+ kmem_cache_destroy(resdir_cache_small);
7971+ kmem_cache_destroy(resdir_cache_large);
7972+ kmem_cache_destroy(lvb_cache);
7973+}
7974+
10d56c87 7975+struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen)
4bf12011 7976+{
10d56c87 7977+ struct dlm_rsb *r;
4bf12011 7978+
10d56c87 7979+ DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
4bf12011 7980+
7981+ if (namelen >= LARGE_RSB_NAME)
7982+ r = kmem_cache_alloc(rsb_cache_large, ls->ls_allocation);
7983+ else
7984+ r = kmem_cache_alloc(rsb_cache_small, ls->ls_allocation);
7985+
7986+ if (r)
10d56c87 7987+ memset(r, 0, sizeof(struct dlm_rsb) + namelen);
4bf12011 7988+
7989+ return r;
7990+}
7991+
10d56c87 7992+void free_rsb(struct dlm_rsb *r)
4bf12011 7993+{
7994+ int length = r->res_length;
7995+
7996+#ifdef POISON
10d56c87 7997+ memset(r, 0x55, sizeof(struct dlm_rsb) + r->res_length);
4bf12011 7998+#endif
7999+
8000+ if (length >= LARGE_RSB_NAME)
8001+ kmem_cache_free(rsb_cache_large, r);
8002+ else
8003+ kmem_cache_free(rsb_cache_small, r);
8004+}
8005+
10d56c87 8006+struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
4bf12011 8007+{
10d56c87 8008+ struct dlm_lkb *l;
4bf12011 8009+
8010+ l = kmem_cache_alloc(lkb_cache, ls->ls_allocation);
8011+ if (l)
10d56c87 8012+ memset(l, 0, sizeof(struct dlm_lkb));
4bf12011 8013+
8014+ return l;
8015+}
8016+
10d56c87 8017+void free_lkb(struct dlm_lkb *l)
4bf12011 8018+{
8019+#ifdef POISON
10d56c87 8020+ memset(l, 0xAA, sizeof(struct dlm_lkb));
4bf12011 8021+#endif
8022+ kmem_cache_free(lkb_cache, l);
8023+}
8024+
10d56c87 8025+struct dlm_direntry *allocate_resdata(struct dlm_ls *ls, int namelen)
4bf12011 8026+{
10d56c87 8027+ struct dlm_direntry *rd;
4bf12011 8028+
10d56c87 8029+ DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
4bf12011 8030+
8031+ if (namelen >= LARGE_RES_NAME)
8032+ rd = kmem_cache_alloc(resdir_cache_large, ls->ls_allocation);
8033+ else
8034+ rd = kmem_cache_alloc(resdir_cache_small, ls->ls_allocation);
8035+
8036+ if (rd)
10d56c87 8037+ memset(rd, 0, sizeof(struct dlm_direntry));
4bf12011 8038+
8039+ return rd;
8040+}
8041+
10d56c87 8042+void free_resdata(struct dlm_direntry *de)
4bf12011 8043+{
10d56c87
AM
8044+ if (de->length >= LARGE_RES_NAME)
8045+ kmem_cache_free(resdir_cache_large, de);
4bf12011 8046+ else
10d56c87 8047+ kmem_cache_free(resdir_cache_small, de);
4bf12011 8048+}
8049+
10d56c87 8050+char *allocate_lvb(struct dlm_ls *ls)
4bf12011 8051+{
8052+ char *l;
8053+
8054+ l = kmem_cache_alloc(lvb_cache, ls->ls_allocation);
8055+ if (l)
8056+ memset(l, 0, DLM_LVB_LEN);
8057+
8058+ return l;
8059+}
8060+
8061+void free_lvb(char *l)
8062+{
8063+ kmem_cache_free(lvb_cache, l);
8064+}
8065+
8066+/* Ranges are allocated from the LVB cache as they are the same size (4x64
8067+ * bits) */
10d56c87 8068+uint64_t *allocate_range(struct dlm_ls * ls)
4bf12011 8069+{
8070+ uint64_t *l;
8071+
8072+ l = kmem_cache_alloc(lvb_cache, ls->ls_allocation);
8073+ if (l)
8074+ memset(l, 0, DLM_LVB_LEN);
8075+
8076+ return l;
8077+}
8078+
8079+void free_range(uint64_t *l)
8080+{
8081+ kmem_cache_free(lvb_cache, l);
8082+}
8083+
10d56c87 8084+struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls)
4bf12011 8085+{
10d56c87 8086+ struct dlm_rcom *rc;
4bf12011 8087+
8088+ rc = kmalloc(dlm_config.buffer_size, ls->ls_allocation);
8089+ if (rc)
8090+ memset(rc, 0, dlm_config.buffer_size);
8091+
8092+ return rc;
8093+}
8094+
10d56c87 8095+void free_rcom_buffer(struct dlm_rcom *rc)
4bf12011 8096+{
8097+ kfree(rc);
8098+}
8099diff -urN linux-orig/cluster/dlm/memory.h linux-patched/cluster/dlm/memory.h
8100--- linux-orig/cluster/dlm/memory.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 8101+++ linux-patched/cluster/dlm/memory.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 8102@@ -0,0 +1,32 @@
8103+/******************************************************************************
8104+*******************************************************************************
8105+**
8106+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8107+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8108+**
8109+** This copyrighted material is made available to anyone wishing to use,
8110+** modify, copy, or redistribute it subject to the terms and conditions
8111+** of the GNU General Public License v.2.
8112+**
8113+*******************************************************************************
8114+******************************************************************************/
8115+
8116+#ifndef __MEMORY_DOT_H__
8117+#define __MEMORY_DOT_H__
8118+
8119+int dlm_memory_init(void);
8120+void dlm_memory_exit(void);
10d56c87
AM
8121+struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen);
8122+void free_rsb(struct dlm_rsb *r);
8123+struct dlm_lkb *allocate_lkb(struct dlm_ls *ls);
8124+void free_lkb(struct dlm_lkb *l);
8125+struct dlm_direntry *allocate_resdata(struct dlm_ls *ls, int namelen);
8126+void free_resdata(struct dlm_direntry *de);
8127+char *allocate_lvb(struct dlm_ls *ls);
4bf12011 8128+void free_lvb(char *l);
10d56c87
AM
8129+struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls);
8130+void free_rcom_buffer(struct dlm_rcom *rc);
8131+uint64_t *allocate_range(struct dlm_ls *ls);
8132+void free_range(uint64_t *l);
4bf12011 8133+
8134+#endif /* __MEMORY_DOT_H__ */
8135diff -urN linux-orig/cluster/dlm/midcomms.c linux-patched/cluster/dlm/midcomms.c
8136--- linux-orig/cluster/dlm/midcomms.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 8137+++ linux-patched/cluster/dlm/midcomms.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 8138@@ -0,0 +1,351 @@
8139+/******************************************************************************
8140+*******************************************************************************
8141+**
8142+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8143+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8144+**
8145+** This copyrighted material is made available to anyone wishing to use,
8146+** modify, copy, or redistribute it subject to the terms and conditions
8147+** of the GNU General Public License v.2.
8148+**
8149+*******************************************************************************
8150+******************************************************************************/
8151+
8152+/*
8153+ * midcomms.c
8154+ *
8155+ * This is the appallingly named "mid-level" comms layer.
8156+ *
8157+ * Its purpose is to take packets from the "real" comms layer,
8158+ * split them up into packets and pass them to the interested
8159+ * part of the locking mechanism.
8160+ *
8161+ * It also takes messages from the locking layer, formats them
8162+ * into packets and sends them to the comms layer.
8163+ *
8164+ * It knows the format of the mid-level messages used and nodeidss
8165+ * but it does not know how to resolve a nodeid into an IP address
8166+ * or any of the comms channel details
8167+ *
8168+ */
8169+
8170+#include "dlm_internal.h"
8171+#include "lowcomms.h"
8172+#include "midcomms.h"
8173+#include "lockqueue.h"
8174+#include "nodes.h"
8175+#include "reccomms.h"
8176+#include "config.h"
8177+
8178+/* Byteorder routines */
8179+
8180+static void host_to_network(void *msg)
8181+{
10d56c87
AM
8182+ struct dlm_header *head = msg;
8183+ struct dlm_request *req = msg;
8184+ struct dlm_reply *rep = msg;
8185+ struct dlm_query_request *qreq = msg;
8186+ struct dlm_query_reply *qrep= msg;
8187+ struct dlm_rcom *rc = msg;
4bf12011 8188+
8189+ /* Force into network byte order */
8190+
8191+ /*
8192+ * Do the common header first
8193+ */
8194+
8195+ head->rh_length = cpu_to_le16(head->rh_length);
8196+ head->rh_lockspace = cpu_to_le32(head->rh_lockspace);
8197+ /* Leave the lkid alone as it is transparent at the remote end */
8198+
8199+ /*
8200+ * Do the fields in the remlockrequest or remlockreply structs
8201+ */
8202+
8203+ switch (req->rr_header.rh_cmd) {
8204+
8205+ case GDLM_REMCMD_LOCKREQUEST:
8206+ case GDLM_REMCMD_CONVREQUEST:
8207+ req->rr_range_start = cpu_to_le64(req->rr_range_start);
8208+ req->rr_range_end = cpu_to_le64(req->rr_range_end);
8209+ /* Deliberate fall through */
8210+ case GDLM_REMCMD_UNLOCKREQUEST:
8211+ case GDLM_REMCMD_LOOKUP:
8212+ case GDLM_REMCMD_LOCKGRANT:
8213+ case GDLM_REMCMD_SENDBAST:
8214+ case GDLM_REMCMD_SENDCAST:
8215+ case GDLM_REMCMD_REM_RESDATA:
8216+ req->rr_flags = cpu_to_le32(req->rr_flags);
8217+ req->rr_status = cpu_to_le32(req->rr_status);
8218+ break;
8219+
8220+ case GDLM_REMCMD_LOCKREPLY:
10d56c87
AM
8221+ rep->rl_lockstate = cpu_to_le32(rep->rl_lockstate);
8222+ rep->rl_nodeid = cpu_to_le32(rep->rl_nodeid);
8223+ rep->rl_status = cpu_to_le32(rep->rl_status);
4bf12011 8224+ break;
8225+
8226+ case GDLM_REMCMD_RECOVERMESSAGE:
8227+ case GDLM_REMCMD_RECOVERREPLY:
8228+ rc->rc_msgid = cpu_to_le32(rc->rc_msgid);
8229+ rc->rc_datalen = cpu_to_le16(rc->rc_datalen);
8230+ break;
8231+
8232+ case GDLM_REMCMD_QUERY:
10d56c87
AM
8233+ qreq->rq_mstlkid = cpu_to_le32(qreq->rq_mstlkid);
8234+ qreq->rq_query = cpu_to_le32(qreq->rq_query);
8235+ qreq->rq_maxlocks = cpu_to_le32(qreq->rq_maxlocks);
4bf12011 8236+ break;
8237+
8238+ case GDLM_REMCMD_QUERYREPLY:
10d56c87
AM
8239+ qrep->rq_numlocks = cpu_to_le32(qrep->rq_numlocks);
8240+ qrep->rq_status = cpu_to_le32(qrep->rq_status);
8241+ qrep->rq_grantcount = cpu_to_le32(qrep->rq_grantcount);
8242+ qrep->rq_waitcount = cpu_to_le32(qrep->rq_waitcount);
8243+ qrep->rq_convcount = cpu_to_le32(qrep->rq_convcount);
4bf12011 8244+ break;
8245+
8246+ default:
8247+ printk("dlm: warning, unknown REMCMD type %u\n",
8248+ req->rr_header.rh_cmd);
8249+ }
8250+}
8251+
8252+static void network_to_host(void *msg)
8253+{
10d56c87
AM
8254+ struct dlm_header *head = msg;
8255+ struct dlm_request *req = msg;
8256+ struct dlm_reply *rep = msg;
8257+ struct dlm_query_request *qreq = msg;
8258+ struct dlm_query_reply *qrep = msg;
8259+ struct dlm_rcom *rc = msg;
4bf12011 8260+
8261+ /* Force into host byte order */
8262+
8263+ /*
8264+ * Do the common header first
8265+ */
8266+
8267+ head->rh_length = le16_to_cpu(head->rh_length);
8268+ head->rh_lockspace = le32_to_cpu(head->rh_lockspace);
8269+ /* Leave the lkid alone as it is transparent at the remote end */
8270+
8271+ /*
8272+ * Do the fields in the remlockrequest or remlockreply structs
8273+ */
8274+
8275+ switch (req->rr_header.rh_cmd) {
8276+
8277+ case GDLM_REMCMD_LOCKREQUEST:
8278+ case GDLM_REMCMD_CONVREQUEST:
8279+ req->rr_range_start = le64_to_cpu(req->rr_range_start);
8280+ req->rr_range_end = le64_to_cpu(req->rr_range_end);
8281+ case GDLM_REMCMD_LOOKUP:
8282+ case GDLM_REMCMD_UNLOCKREQUEST:
8283+ case GDLM_REMCMD_LOCKGRANT:
8284+ case GDLM_REMCMD_SENDBAST:
8285+ case GDLM_REMCMD_SENDCAST:
8286+ case GDLM_REMCMD_REM_RESDATA:
8287+ /* Actually, not much to do here as the remote lock IDs are
8288+ * transparent too */
8289+ req->rr_flags = le32_to_cpu(req->rr_flags);
8290+ req->rr_status = le32_to_cpu(req->rr_status);
8291+ break;
8292+
8293+ case GDLM_REMCMD_LOCKREPLY:
10d56c87
AM
8294+ rep->rl_lockstate = le32_to_cpu(rep->rl_lockstate);
8295+ rep->rl_nodeid = le32_to_cpu(rep->rl_nodeid);
8296+ rep->rl_status = le32_to_cpu(rep->rl_status);
4bf12011 8297+ break;
8298+
8299+ case GDLM_REMCMD_RECOVERMESSAGE:
8300+ case GDLM_REMCMD_RECOVERREPLY:
8301+ rc->rc_msgid = le32_to_cpu(rc->rc_msgid);
8302+ rc->rc_datalen = le16_to_cpu(rc->rc_datalen);
8303+ break;
8304+
8305+
8306+ case GDLM_REMCMD_QUERY:
10d56c87
AM
8307+ qreq->rq_mstlkid = le32_to_cpu(qreq->rq_mstlkid);
8308+ qreq->rq_query = le32_to_cpu(qreq->rq_query);
8309+ qreq->rq_maxlocks = le32_to_cpu(qreq->rq_maxlocks);
4bf12011 8310+ break;
8311+
8312+ case GDLM_REMCMD_QUERYREPLY:
10d56c87
AM
8313+ qrep->rq_numlocks = le32_to_cpu(qrep->rq_numlocks);
8314+ qrep->rq_status = le32_to_cpu(qrep->rq_status);
8315+ qrep->rq_grantcount = le32_to_cpu(qrep->rq_grantcount);
8316+ qrep->rq_waitcount = le32_to_cpu(qrep->rq_waitcount);
8317+ qrep->rq_convcount = le32_to_cpu(qrep->rq_convcount);
4bf12011 8318+ break;
8319+
8320+ default:
8321+ printk("dlm: warning, unknown REMCMD type %u\n",
8322+ req->rr_header.rh_cmd);
8323+ }
8324+}
8325+
8326+static void copy_from_cb(void *dst, const void *base, unsigned offset,
8327+ unsigned len, unsigned limit)
8328+{
8329+ unsigned copy = len;
8330+
8331+ if ((copy + offset) > limit)
8332+ copy = limit - offset;
8333+ memcpy(dst, base + offset, copy);
8334+ len -= copy;
8335+ if (len)
8336+ memcpy(dst + copy, base, len);
8337+}
8338+
8339+static void khexdump(const unsigned char *c, int len)
8340+{
8341+ while (len > 16) {
8342+ printk(KERN_INFO
8343+ "%02x %02x %02x %02x %02x %02x %02x %02x-%02x %02x %02x %02x %02x %02x %02x %02x\n",
8344+ c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8],
8345+ c[9], c[10], c[11], c[12], c[13], c[14], c[15]);
8346+ len -= 16;
8347+ }
8348+ while (len > 4) {
8349+ printk(KERN_INFO "%02x %02x %02x %02x\n", c[0], c[1], c[2],
8350+ c[3]);
8351+ len -= 4;
8352+ }
8353+ while (len > 0) {
8354+ printk(KERN_INFO "%02x\n", c[0]);
8355+ len--;
8356+ }
8357+}
8358+
8359+/*
8360+ * Called from the low-level comms layer to process a buffer of
8361+ * commands.
8362+ *
8363+ * Only complete messages are processed here, any "spare" bytes from
8364+ * the end of a buffer are saved and tacked onto the front of the next
8365+ * message that comes in. I doubt this will happen very often but we
8366+ * need to be able to cope with it and I don't want the task to be waiting
8367+ * for packets to come in when there is useful work to be done.
8368+ *
8369+ */
8370+int midcomms_process_incoming_buffer(int nodeid, const void *base,
8371+ unsigned offset, unsigned len,
8372+ unsigned limit)
8373+{
10d56c87
AM
8374+ unsigned char __tmp[sizeof(struct dlm_header) + 64];
8375+ struct dlm_header *msg = (struct dlm_header *) __tmp;
4bf12011 8376+ int ret = 0;
8377+ int err = 0;
8378+ unsigned msglen;
8379+ __u32 id, space;
8380+
10d56c87 8381+ while (len > sizeof(struct dlm_header)) {
4bf12011 8382+ /* Get message header and check it over */
10d56c87 8383+ copy_from_cb(msg, base, offset, sizeof(struct dlm_header),
4bf12011 8384+ limit);
8385+ msglen = le16_to_cpu(msg->rh_length);
8386+ id = msg->rh_lkid;
8387+ space = msg->rh_lockspace;
8388+
8389+ /* Check message size */
8390+ err = -EINVAL;
10d56c87 8391+ if (msglen < sizeof(struct dlm_header))
4bf12011 8392+ break;
8393+ err = -E2BIG;
8394+ if (msglen > dlm_config.buffer_size) {
8395+ printk("dlm: message size too big %d\n", msglen);
8396+ break;
8397+ }
8398+ err = 0;
8399+
8400+ /* Not enough in buffer yet? wait for some more */
8401+ if (msglen > len)
8402+ break;
8403+
8404+ /* Make sure our temp buffer is large enough */
8405+ if (msglen > sizeof(__tmp) &&
10d56c87 8406+ msg == (struct dlm_header *) __tmp) {
4bf12011 8407+ msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
8408+ if (msg == NULL)
8409+ return ret;
8410+ }
8411+
8412+ copy_from_cb(msg, base, offset, msglen, limit);
8413+ BUG_ON(id != msg->rh_lkid);
8414+ BUG_ON(space != msg->rh_lockspace);
8415+ ret += msglen;
8416+ offset += msglen;
8417+ offset &= (limit - 1);
8418+ len -= msglen;
8419+ network_to_host(msg);
8420+
8421+ if ((msg->rh_cmd > 32) ||
8422+ (msg->rh_cmd == 0) ||
10d56c87 8423+ (msg->rh_length < sizeof(struct dlm_header)) ||
4bf12011 8424+ (msg->rh_length > dlm_config.buffer_size)) {
8425+
8426+ printk("dlm: midcomms: cmd=%u, flags=%u, length=%hu, "
8427+ "lkid=%u, lockspace=%u\n",
8428+ msg->rh_cmd, msg->rh_flags, msg->rh_length,
8429+ msg->rh_lkid, msg->rh_lockspace);
8430+
8431+ printk("dlm: midcomms: base=%p, offset=%u, len=%u, "
8432+ "ret=%u, limit=%08x newbuf=%d\n",
8433+ base, offset, len, ret, limit,
10d56c87 8434+ ((struct dlm_header *) __tmp == msg));
4bf12011 8435+
8436+ khexdump((const unsigned char *) msg, msg->rh_length);
8437+
8438+ return -EBADMSG;
8439+ }
8440+
8441+ switch (msg->rh_cmd) {
8442+ case GDLM_REMCMD_RECOVERMESSAGE:
8443+ case GDLM_REMCMD_RECOVERREPLY:
8444+ process_recovery_comm(nodeid, msg);
8445+ break;
8446+ default:
8447+ process_cluster_request(nodeid, msg, FALSE);
8448+ }
8449+ }
8450+
10d56c87 8451+ if (msg != (struct dlm_header *) __tmp)
4bf12011 8452+ kfree(msg);
8453+
8454+ return err ? err : ret;
8455+}
8456+
8457+/*
8458+ * Send a lowcomms buffer
8459+ */
8460+
10d56c87 8461+void midcomms_send_buffer(struct dlm_header *msg, struct writequeue_entry *e)
4bf12011 8462+{
8463+ host_to_network(msg);
8464+ lowcomms_commit_buffer(e);
8465+}
8466+
8467+/*
8468+ * Make the message into network byte order and send it
8469+ */
8470+
10d56c87 8471+int midcomms_send_message(uint32_t nodeid, struct dlm_header *msg,
4bf12011 8472+ int allocation)
8473+{
8474+ int len = msg->rh_length;
8475+
8476+ host_to_network(msg);
8477+
8478+ /*
8479+ * Loopback. In fact, the locking code pretty much prevents this from
8480+ * being needed but it can happen when the directory node is also the
8481+ * local node.
8482+ */
8483+
8484+ if (nodeid == our_nodeid())
8485+ return midcomms_process_incoming_buffer(nodeid, (char *) msg, 0,
8486+ len, len);
8487+
8488+ return lowcomms_send_message(nodeid, (char *) msg, len, allocation);
8489+}
8490diff -urN linux-orig/cluster/dlm/midcomms.h linux-patched/cluster/dlm/midcomms.h
8491--- linux-orig/cluster/dlm/midcomms.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 8492+++ linux-patched/cluster/dlm/midcomms.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 8493@@ -0,0 +1,24 @@
8494+/******************************************************************************
8495+*******************************************************************************
8496+**
8497+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8498+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8499+**
8500+** This copyrighted material is made available to anyone wishing to use,
8501+** modify, copy, or redistribute it subject to the terms and conditions
8502+** of the GNU General Public License v.2.
8503+**
8504+*******************************************************************************
8505+******************************************************************************/
8506+
8507+#ifndef __MIDCOMMS_DOT_H__
8508+#define __MIDCOMMS_DOT_H__
8509+
10d56c87 8510+int midcomms_send_message(uint32_t csid, struct dlm_header *msg,
4bf12011 8511+ int allocation);
8512+int midcomms_process_incoming_buffer(int csid, const void *buf, unsigned offset,
8513+ unsigned len, unsigned limit);
10d56c87 8514+void midcomms_send_buffer(struct dlm_header *msg,
4bf12011 8515+ struct writequeue_entry *e);
8516+
8517+#endif /* __MIDCOMMS_DOT_H__ */
8518diff -urN linux-orig/cluster/dlm/nodes.c linux-patched/cluster/dlm/nodes.c
8519--- linux-orig/cluster/dlm/nodes.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 8520+++ linux-patched/cluster/dlm/nodes.c 2004-07-13 18:57:22.000000000 +0800
4bf12011 8521@@ -0,0 +1,325 @@
8522+/******************************************************************************
8523+*******************************************************************************
8524+**
8525+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8526+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8527+**
8528+** This copyrighted material is made available to anyone wishing to use,
8529+** modify, copy, or redistribute it subject to the terms and conditions
8530+** of the GNU General Public License v.2.
8531+**
8532+*******************************************************************************
8533+******************************************************************************/
8534+
8535+#include <net/sock.h>
8536+#include <cluster/cnxman.h>
8537+
8538+#include "dlm_internal.h"
8539+#include "lowcomms.h"
8540+#include "nodes.h"
8541+#include "recover.h"
8542+#include "reccomms.h"
8543+#include "util.h"
8544+
8545+static struct list_head cluster_nodes;
8546+static spinlock_t node_lock;
8547+static uint32_t local_nodeid;
8548+static struct semaphore local_init_lock;
8549+
8550+
8551+void dlm_nodes_init(void)
8552+{
8553+ INIT_LIST_HEAD(&cluster_nodes);
8554+ spin_lock_init(&node_lock);
8555+ local_nodeid = 0;
8556+ init_MUTEX(&local_init_lock);
8557+}
8558+
10d56c87 8559+static struct dlm_node *search_node(uint32_t nodeid)
4bf12011 8560+{
10d56c87 8561+ struct dlm_node *node;
4bf12011 8562+
10d56c87
AM
8563+ list_for_each_entry(node, &cluster_nodes, list) {
8564+ if (node->nodeid == nodeid)
4bf12011 8565+ goto out;
8566+ }
8567+ node = NULL;
8568+ out:
8569+ return node;
8570+}
8571+
10d56c87 8572+static void put_node(struct dlm_node *node)
4bf12011 8573+{
8574+ spin_lock(&node_lock);
10d56c87
AM
8575+ node->refcount--;
8576+ if (node->refcount == 0) {
8577+ list_del(&node->list);
4bf12011 8578+ spin_unlock(&node_lock);
8579+ kfree(node);
8580+ return;
8581+ }
8582+ spin_unlock(&node_lock);
8583+}
8584+
10d56c87 8585+static int get_node(uint32_t nodeid, struct dlm_node **ndp)
4bf12011 8586+{
10d56c87 8587+ struct dlm_node *node, *node2;
4bf12011 8588+ int error = -ENOMEM;
8589+
8590+ spin_lock(&node_lock);
8591+ node = search_node(nodeid);
8592+ if (node)
10d56c87 8593+ node->refcount++;
4bf12011 8594+ spin_unlock(&node_lock);
8595+
8596+ if (node)
8597+ goto out;
8598+
10d56c87 8599+ node = (struct dlm_node *) kmalloc(sizeof(struct dlm_node), GFP_KERNEL);
4bf12011 8600+ if (!node)
8601+ goto fail;
8602+
10d56c87
AM
8603+ memset(node, 0, sizeof(struct dlm_node));
8604+ node->nodeid = nodeid;
4bf12011 8605+
8606+ spin_lock(&node_lock);
8607+ node2 = search_node(nodeid);
8608+ if (node2) {
10d56c87 8609+ node2->refcount++;
4bf12011 8610+ spin_unlock(&node_lock);
8611+ kfree(node);
8612+ node = node2;
8613+ goto out;
8614+ }
8615+
10d56c87
AM
8616+ node->refcount = 1;
8617+ list_add_tail(&node->list, &cluster_nodes);
4bf12011 8618+ spin_unlock(&node_lock);
8619+
8620+ out:
8621+ *ndp = node;
8622+ return 0;
8623+
8624+ fail:
8625+ return error;
8626+}
8627+
10d56c87 8628+int init_new_csb(uint32_t nodeid, struct dlm_csb **ret_csb)
4bf12011 8629+{
10d56c87
AM
8630+ struct dlm_csb *csb;
8631+ struct dlm_node *node;
4bf12011 8632+ int error = -ENOMEM;
8633+
10d56c87 8634+ csb = (struct dlm_csb *) kmalloc(sizeof(struct dlm_csb), GFP_KERNEL);
4bf12011 8635+ if (!csb)
8636+ goto fail;
8637+
10d56c87 8638+ memset(csb, 0, sizeof(struct dlm_csb));
4bf12011 8639+
8640+ error = get_node(nodeid, &node);
8641+ if (error)
8642+ goto fail_free;
8643+
10d56c87 8644+ csb->node = node;
4bf12011 8645+
8646+ down(&local_init_lock);
8647+
8648+ if (!local_nodeid) {
8649+ if (nodeid == our_nodeid()) {
10d56c87 8650+ local_nodeid = node->nodeid;
4bf12011 8651+ }
8652+ }
8653+ up(&local_init_lock);
8654+
8655+ *ret_csb = csb;
8656+ return 0;
8657+
8658+ fail_free:
8659+ kfree(csb);
8660+ fail:
8661+ return error;
8662+}
8663+
10d56c87 8664+void release_csb(struct dlm_csb *csb)
4bf12011 8665+{
10d56c87 8666+ put_node(csb->node);
4bf12011 8667+ kfree(csb);
8668+}
8669+
8670+uint32_t our_nodeid(void)
8671+{
8672+ return lowcomms_our_nodeid();
8673+}
8674+
10d56c87 8675+int nodes_reconfig_wait(struct dlm_ls *ls)
4bf12011 8676+{
8677+ int error;
8678+
8679+ if (ls->ls_low_nodeid == our_nodeid()) {
10d56c87 8680+ error = dlm_wait_status_all(ls, NODES_VALID);
4bf12011 8681+ if (!error)
8682+ set_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags);
8683+
8684+ /* Experimental: this delay should allow any final messages
8685+ * from the previous node to be received before beginning
8686+ * recovery. */
8687+
8688+ if (ls->ls_num_nodes == 1) {
8689+ current->state = TASK_UNINTERRUPTIBLE;
8690+ schedule_timeout((2) * HZ);
8691+ }
8692+
8693+ } else
10d56c87 8694+ error = dlm_wait_status_low(ls, NODES_ALL_VALID);
4bf12011 8695+
8696+ return error;
8697+}
8698+
10d56c87 8699+static void add_ordered_node(struct dlm_ls *ls, struct dlm_csb *new)
4bf12011 8700+{
10d56c87 8701+ struct dlm_csb *csb = NULL;
4bf12011 8702+ struct list_head *tmp;
10d56c87 8703+ struct list_head *newlist = &new->list;
4bf12011 8704+ struct list_head *head = &ls->ls_nodes;
8705+
8706+ list_for_each(tmp, head) {
10d56c87 8707+ csb = list_entry(tmp, struct dlm_csb, list);
4bf12011 8708+
10d56c87 8709+ if (new->node->nodeid < csb->node->nodeid)
4bf12011 8710+ break;
8711+ }
8712+
8713+ if (!csb)
8714+ list_add_tail(newlist, head);
8715+ else {
8716+ /* FIXME: can use list macro here */
8717+ newlist->prev = tmp->prev;
8718+ newlist->next = tmp;
8719+ tmp->prev->next = newlist;
8720+ tmp->prev = newlist;
8721+ }
8722+}
8723+
10d56c87 8724+int ls_nodes_reconfig(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
4bf12011 8725+{
10d56c87 8726+ struct dlm_csb *csb, *safe;
4bf12011 8727+ int error, i, found, pos = 0, neg = 0;
8728+ uint32_t low = (uint32_t) (-1);
8729+
8730+ /*
8731+ * Remove (and save) departed nodes from lockspace's nodes list
8732+ */
8733+
10d56c87 8734+ list_for_each_entry_safe(csb, safe, &ls->ls_nodes, list) {
4bf12011 8735+ found = FALSE;
10d56c87
AM
8736+ for (i = 0; i < rv->node_count; i++) {
8737+ if (csb->node->nodeid == rv->nodeids[i]) {
4bf12011 8738+ found = TRUE;
8739+ break;
8740+ }
8741+ }
8742+
8743+ if (!found) {
8744+ neg++;
10d56c87
AM
8745+ csb->gone_event = rv->event_id;
8746+ list_del(&csb->list);
8747+ list_add_tail(&csb->list, &ls->ls_nodes_gone);
4bf12011 8748+ ls->ls_num_nodes--;
10d56c87 8749+ log_all(ls, "remove node %u", csb->node->nodeid);
4bf12011 8750+ }
8751+ }
8752+
8753+ /*
8754+ * Add new nodes to lockspace's nodes list
8755+ */
8756+
10d56c87 8757+ for (i = 0; i < rv->node_count; i++) {
4bf12011 8758+ found = FALSE;
10d56c87
AM
8759+ list_for_each_entry(csb, &ls->ls_nodes, list) {
8760+ if (csb->node->nodeid == rv->nodeids[i]) {
4bf12011 8761+ found = TRUE;
8762+ break;
8763+ }
8764+ }
8765+
8766+ if (!found) {
8767+ pos++;
8768+
10d56c87
AM
8769+ error = init_new_csb(rv->nodeids[i], &csb);
8770+ DLM_ASSERT(!error,);
4bf12011 8771+
8772+ add_ordered_node(ls, csb);
8773+ ls->ls_num_nodes++;
10d56c87 8774+ log_all(ls, "add node %u", csb->node->nodeid);
4bf12011 8775+ }
8776+ }
8777+
10d56c87
AM
8778+ list_for_each_entry(csb, &ls->ls_nodes, list) {
8779+ if (csb->node->nodeid < low)
8780+ low = csb->node->nodeid;
4bf12011 8781+ }
8782+
8783+ rcom_log_clear(ls);
8784+ ls->ls_low_nodeid = low;
10d56c87 8785+ ls->ls_nodes_mask = dlm_next_power2(ls->ls_num_nodes) - 1;
4bf12011 8786+ set_bit(LSFL_NODES_VALID, &ls->ls_flags);
8787+ *neg_out = neg;
8788+
8789+ error = nodes_reconfig_wait(ls);
8790+
8791+ log_all(ls, "total nodes %d", ls->ls_num_nodes);
8792+
8793+ return error;
8794+}
8795+
10d56c87 8796+int ls_nodes_init(struct dlm_ls *ls, struct dlm_recover *rv)
4bf12011 8797+{
10d56c87 8798+ struct dlm_csb *csb;
4bf12011 8799+ int i, error;
8800+ uint32_t low = (uint32_t) (-1);
8801+
8802+ log_all(ls, "add nodes");
8803+
10d56c87
AM
8804+ for (i = 0; i < rv->node_count; i++) {
8805+ error = init_new_csb(rv->nodeids[i], &csb);
4bf12011 8806+ if (error)
8807+ goto fail;
8808+
8809+ add_ordered_node(ls, csb);
8810+ ls->ls_num_nodes++;
8811+
10d56c87
AM
8812+ if (csb->node->nodeid < low)
8813+ low = csb->node->nodeid;
4bf12011 8814+ }
8815+
8816+ ls->ls_low_nodeid = low;
10d56c87 8817+ ls->ls_nodes_mask = dlm_next_power2(ls->ls_num_nodes) - 1;
4bf12011 8818+ set_bit(LSFL_NODES_VALID, &ls->ls_flags);
8819+
8820+ error = nodes_reconfig_wait(ls);
8821+
8822+ log_all(ls, "total nodes %d", ls->ls_num_nodes);
8823+
8824+ return error;
8825+
8826+ fail:
8827+ while (!list_empty(&ls->ls_nodes)) {
10d56c87
AM
8828+ csb = list_entry(ls->ls_nodes.next, struct dlm_csb, list);
8829+ list_del(&csb->list);
4bf12011 8830+ release_csb(csb);
8831+ }
8832+ ls->ls_num_nodes = 0;
8833+
8834+ return error;
8835+}
8836+
10d56c87 8837+int in_nodes_gone(struct dlm_ls *ls, uint32_t nodeid)
4bf12011 8838+{
10d56c87 8839+ struct dlm_csb *csb;
4bf12011 8840+
10d56c87
AM
8841+ list_for_each_entry(csb, &ls->ls_nodes_gone, list) {
8842+ if (csb->node->nodeid == nodeid)
4bf12011 8843+ return TRUE;
8844+ }
8845+ return FALSE;
8846+}
8847diff -urN linux-orig/cluster/dlm/nodes.h linux-patched/cluster/dlm/nodes.h
8848--- linux-orig/cluster/dlm/nodes.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 8849+++ linux-patched/cluster/dlm/nodes.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 8850@@ -0,0 +1,25 @@
8851+/******************************************************************************
8852+*******************************************************************************
8853+**
8854+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8855+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8856+**
8857+** This copyrighted material is made available to anyone wishing to use,
8858+** modify, copy, or redistribute it subject to the terms and conditions
8859+** of the GNU General Public License v.2.
8860+**
8861+*******************************************************************************
8862+******************************************************************************/
8863+
8864+#ifndef __NODES_DOT_H__
8865+#define __NODES_DOT_H__
8866+
8867+void dlm_nodes_init(void);
10d56c87
AM
8868+int init_new_csb(uint32_t nodeid, struct dlm_csb ** ret_csb);
8869+void release_csb(struct dlm_csb * csb);
4bf12011 8870+uint32_t our_nodeid(void);
10d56c87
AM
8871+int ls_nodes_reconfig(struct dlm_ls * ls, struct dlm_recover * gr, int *neg);
8872+int ls_nodes_init(struct dlm_ls * ls, struct dlm_recover * gr);
8873+int in_nodes_gone(struct dlm_ls * ls, uint32_t nodeid);
4bf12011 8874+
8875+#endif /* __NODES_DOT_H__ */
8876diff -urN linux-orig/cluster/dlm/proc.c linux-patched/cluster/dlm/proc.c
8877--- linux-orig/cluster/dlm/proc.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
8878+++ linux-patched/cluster/dlm/proc.c 2004-07-13 18:57:22.000000000 +0800
8879@@ -0,0 +1,473 @@
4bf12011 8880+/******************************************************************************
8881+*******************************************************************************
8882+**
8883+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
8884+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
8885+**
8886+** This copyrighted material is made available to anyone wishing to use,
8887+** modify, copy, or redistribute it subject to the terms and conditions
8888+** of the GNU General Public License v.2.
8889+**
8890+*******************************************************************************
8891+******************************************************************************/
8892+
8893+#include <linux/init.h>
8894+#include <linux/proc_fs.h>
8895+#include <linux/ctype.h>
8896+#include <linux/seq_file.h>
8897+#include <linux/module.h>
8898+
8899+#include "dlm_internal.h"
8900+#include "lockspace.h"
8901+
8902+#if defined(DLM_DEBUG)
8903+#define DLM_DEBUG_SIZE (1024)
8904+#define MAX_DEBUG_MSG_LEN (64)
8905+#else
8906+#define DLM_DEBUG_SIZE (0)
8907+#define MAX_DEBUG_MSG_LEN (0)
8908+#endif
8909+
8910+static char * debug_buf;
8911+static unsigned int debug_size;
8912+static unsigned int debug_point;
8913+static int debug_wrap;
8914+static spinlock_t debug_lock;
8915+static struct proc_dir_entry * debug_proc_entry = NULL;
8916+static struct proc_dir_entry * rcom_proc_entry = NULL;
8917+static char proc_ls_name[255] = "";
8918+
8919+#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS
8920+static struct proc_dir_entry * locks_proc_entry = NULL;
8921+static struct seq_operations locks_info_op;
8922+
8923+
8924+static int locks_open(struct inode *inode, struct file *file)
8925+{
8926+ return seq_open(file, &locks_info_op);
8927+}
8928+
8929+/* Write simply sets the lockspace to use */
8930+static ssize_t locks_write(struct file *file, const char *buf,
8931+ size_t count, loff_t * ppos)
8932+{
8933+ if (count < sizeof(proc_ls_name)) {
8934+ copy_from_user(proc_ls_name, buf, count);
8935+ proc_ls_name[count] = '\0';
8936+
8937+ /* Remove any trailing LF so that lazy users
8938+ can just echo "lsname" > /proc/cluster/dlm_locks */
8939+ if (proc_ls_name[count - 1] == '\n')
8940+ proc_ls_name[count - 1] = '\0';
8941+
8942+ return count;
8943+ }
8944+ return 0;
8945+}
8946+
8947+static struct file_operations locks_fops = {
8948+ open:locks_open,
8949+ write:locks_write,
8950+ read:seq_read,
8951+ llseek:seq_lseek,
8952+ release:seq_release,
8953+};
8954+
8955+struct ls_dumpinfo {
8956+ int entry;
8957+ struct list_head *next;
10d56c87
AM
8958+ struct dlm_ls *ls;
8959+ struct dlm_rsb *rsb;
4bf12011 8960+};
8961+
10d56c87 8962+static int print_resource(struct dlm_rsb * res, struct seq_file *s);
4bf12011 8963+
8964+static struct ls_dumpinfo *next_rsb(struct ls_dumpinfo *di)
8965+{
10d56c87
AM
8966+ int i;
8967+
4bf12011 8968+ if (!di->next) {
8969+ /* Find the next non-empty hash bucket */
10d56c87
AM
8970+ for (i = di->entry; i < di->ls->ls_rsbtbl_size; i++) {
8971+ read_lock(&di->ls->ls_rsbtbl[i].lock);
8972+ if (!list_empty(&di->ls->ls_rsbtbl[i].list)) {
8973+ di->next = di->ls->ls_rsbtbl[i].list.next;
8974+ read_unlock(&di->ls->ls_rsbtbl[i].lock);
8975+ break;
8976+ }
8977+ read_unlock(&di->ls->ls_rsbtbl[i].lock);
4bf12011 8978+ }
10d56c87 8979+ di->entry = i;
4bf12011 8980+
10d56c87
AM
8981+ if (di->entry >= di->ls->ls_rsbtbl_size)
8982+ return NULL; /* End of hash list */
4bf12011 8983+ } else { /* Find the next entry in the list */
10d56c87
AM
8984+ i = di->entry;
8985+ read_lock(&di->ls->ls_rsbtbl[i].lock);
4bf12011 8986+ di->next = di->next->next;
10d56c87 8987+ if (di->next->next == di->ls->ls_rsbtbl[i].list.next) {
4bf12011 8988+ /* End of list - move to next bucket */
8989+ di->next = NULL;
8990+ di->entry++;
10d56c87 8991+ read_unlock(&di->ls->ls_rsbtbl[i].lock);
4bf12011 8992+ return next_rsb(di); /* do the top half of this conditional */
8993+ }
10d56c87 8994+ read_unlock(&di->ls->ls_rsbtbl[i].lock);
4bf12011 8995+ }
10d56c87 8996+ di->rsb = list_entry(di->next, struct dlm_rsb, res_hashchain);
4bf12011 8997+
8998+ return di;
8999+}
9000+
9001+static void *s_start(struct seq_file *m, loff_t * pos)
9002+{
9003+ struct ls_dumpinfo *di;
10d56c87 9004+ struct dlm_ls *ls;
4bf12011 9005+ int i;
9006+
9007+ ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name));
9008+ if (!ls)
9009+ return NULL;
9010+
9011+ di = kmalloc(sizeof(struct ls_dumpinfo), GFP_KERNEL);
9012+ if (!di)
9013+ return NULL;
9014+
9015+ if (*pos == 0)
9016+ seq_printf(m, "DLM lockspace '%s'\n", proc_ls_name);
9017+
9018+ di->entry = 0;
9019+ di->next = NULL;
9020+ di->ls = ls;
9021+
9022+ for (i = 0; i < *pos; i++)
9023+ if (next_rsb(di) == NULL)
9024+ return NULL;
9025+
9026+ return next_rsb(di);
9027+}
9028+
9029+static void *s_next(struct seq_file *m, void *p, loff_t * pos)
9030+{
9031+ struct ls_dumpinfo *di = p;
9032+
9033+ *pos += 1;
9034+
9035+ return next_rsb(di);
9036+}
9037+
9038+static int s_show(struct seq_file *m, void *p)
9039+{
9040+ struct ls_dumpinfo *di = p;
9041+ return print_resource(di->rsb, m);
9042+}
9043+
9044+static void s_stop(struct seq_file *m, void *p)
9045+{
9046+ kfree(p);
9047+}
9048+
9049+static struct seq_operations locks_info_op = {
9050+ start:s_start,
9051+ next:s_next,
9052+ stop:s_stop,
9053+ show:s_show
9054+};
9055+
9056+static char *print_lockmode(int mode)
9057+{
9058+ switch (mode) {
9059+ case DLM_LOCK_IV:
9060+ return "--";
9061+ case DLM_LOCK_NL:
9062+ return "NL";
9063+ case DLM_LOCK_CR:
9064+ return "CR";
9065+ case DLM_LOCK_CW:
9066+ return "CW";
9067+ case DLM_LOCK_PR:
9068+ return "PR";
9069+ case DLM_LOCK_PW:
9070+ return "PW";
9071+ case DLM_LOCK_EX:
9072+ return "EX";
9073+ default:
9074+ return "??";
9075+ }
9076+}
9077+
10d56c87 9078+static void print_lock(struct seq_file *s, struct dlm_lkb * lkb, struct dlm_rsb * res)
4bf12011 9079+{
9080+
9081+ seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
9082+
9083+ if (lkb->lkb_status == GDLM_LKSTS_CONVERT
9084+ || lkb->lkb_status == GDLM_LKSTS_WAITING)
9085+ seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode));
9086+
9087+ if (lkb->lkb_range) {
9088+ /* This warns on Alpha. Tough. Only I see it */
9089+ if (lkb->lkb_status == GDLM_LKSTS_CONVERT
9090+ || lkb->lkb_status == GDLM_LKSTS_GRANTED)
9091+ seq_printf(s, " %" PRIx64 "-%" PRIx64,
9092+ lkb->lkb_range[GR_RANGE_START],
9093+ lkb->lkb_range[GR_RANGE_END]);
9094+ if (lkb->lkb_status == GDLM_LKSTS_CONVERT
9095+ || lkb->lkb_status == GDLM_LKSTS_WAITING)
9096+ seq_printf(s, " (%" PRIx64 "-%" PRIx64 ")",
9097+ lkb->lkb_range[RQ_RANGE_START],
9098+ lkb->lkb_range[RQ_RANGE_END]);
9099+ }
9100+
9101+ if (lkb->lkb_nodeid) {
9102+ if (lkb->lkb_nodeid != res->res_nodeid)
9103+ seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid,
9104+ lkb->lkb_remid);
9105+ else
9106+ seq_printf(s, " Master: %08x", lkb->lkb_remid);
9107+ }
9108+
9109+ if (lkb->lkb_status != GDLM_LKSTS_GRANTED)
9110+ seq_printf(s, " LQ: %d", lkb->lkb_lockqueue_state);
9111+
9112+ seq_printf(s, "\n");
9113+}
9114+
10d56c87 9115+static int print_resource(struct dlm_rsb *res, struct seq_file *s)
4bf12011 9116+{
9117+ int i;
9118+ struct list_head *locklist;
9119+
9120+ seq_printf(s, "\nResource %p (parent %p). Name (len=%d) \"", res,
9121+ res->res_parent, res->res_length);
9122+ for (i = 0; i < res->res_length; i++) {
9123+ if (isprint(res->res_name[i]))
9124+ seq_printf(s, "%c", res->res_name[i]);
9125+ else
9126+ seq_printf(s, "%c", '.');
9127+ }
9128+ if (res->res_nodeid)
9129+ seq_printf(s, "\" \nLocal Copy, Master is node %d\n",
9130+ res->res_nodeid);
9131+ else
9132+ seq_printf(s, "\" \nMaster Copy\n");
9133+
9134+ /* Print the LVB: */
9135+ if (res->res_lvbptr) {
9136+ seq_printf(s, "LVB: ");
9137+ for (i = 0; i < DLM_LVB_LEN; i++) {
9138+ if (i == DLM_LVB_LEN / 2)
9139+ seq_printf(s, "\n ");
9140+ seq_printf(s, "%02x ",
9141+ (unsigned char) res->res_lvbptr[i]);
9142+ }
9143+ seq_printf(s, "\n");
9144+ }
9145+
9146+ /* Print the locks attached to this resource */
9147+ seq_printf(s, "Granted Queue\n");
9148+ list_for_each(locklist, &res->res_grantqueue) {
10d56c87
AM
9149+ struct dlm_lkb *this_lkb =
9150+ list_entry(locklist, struct dlm_lkb, lkb_statequeue);
4bf12011 9151+ print_lock(s, this_lkb, res);
9152+ }
9153+
9154+ seq_printf(s, "Conversion Queue\n");
9155+ list_for_each(locklist, &res->res_convertqueue) {
10d56c87
AM
9156+ struct dlm_lkb *this_lkb =
9157+ list_entry(locklist, struct dlm_lkb, lkb_statequeue);
4bf12011 9158+ print_lock(s, this_lkb, res);
9159+ }
9160+
9161+ seq_printf(s, "Waiting Queue\n");
9162+ list_for_each(locklist, &res->res_waitqueue) {
10d56c87
AM
9163+ struct dlm_lkb *this_lkb =
9164+ list_entry(locklist, struct dlm_lkb, lkb_statequeue);
4bf12011 9165+ print_lock(s, this_lkb, res);
9166+ }
9167+ return 0;
9168+}
9169+#endif /* CONFIG_CLUSTER_DLM_PROCLOCKS */
9170+
10d56c87 9171+void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...)
4bf12011 9172+{
9173+ va_list va;
9174+ int i, n, size, len;
9175+ char buf[MAX_DEBUG_MSG_LEN+1];
9176+
9177+ spin_lock(&debug_lock);
9178+
9179+ if (!debug_buf)
9180+ goto out;
9181+
9182+ size = MAX_DEBUG_MSG_LEN;
9183+ memset(buf, 0, size+1);
9184+
9185+ n = snprintf(buf, size, "%s ", ls->ls_name);
9186+ size -= n;
9187+
9188+ va_start(va, fmt);
9189+ vsnprintf(buf+n, size, fmt, va);
9190+ va_end(va);
9191+
9192+ len = strlen(buf);
9193+ if (len > MAX_DEBUG_MSG_LEN-1)
9194+ len = MAX_DEBUG_MSG_LEN-1;
9195+ buf[len] = '\n';
9196+ buf[len+1] = '\0';
9197+
9198+ for (i = 0; i < strlen(buf); i++) {
9199+ debug_buf[debug_point++] = buf[i];
9200+
9201+ if (debug_point == debug_size) {
9202+ debug_point = 0;
9203+ debug_wrap = 1;
9204+ }
9205+ }
9206+ out:
9207+ spin_unlock(&debug_lock);
9208+}
9209+
9210+void dlm_debug_dump(void)
9211+{
9212+ int i;
9213+
9214+ spin_lock(&debug_lock);
9215+ if (debug_wrap) {
9216+ for (i = debug_point; i < debug_size; i++)
9217+ printk("%c", debug_buf[i]);
9218+ }
9219+ for (i = 0; i < debug_point; i++)
9220+ printk("%c", debug_buf[i]);
9221+ spin_unlock(&debug_lock);
9222+}
9223+
9224+void dlm_debug_setup(int size)
9225+{
9226+ char *b = NULL;
9227+
9228+ if (size > PAGE_SIZE)
9229+ size = PAGE_SIZE;
9230+ if (size)
9231+ b = kmalloc(size, GFP_KERNEL);
9232+
9233+ spin_lock(&debug_lock);
9234+ if (debug_buf)
9235+ kfree(debug_buf);
9236+ if (!size || !b)
9237+ goto out;
9238+ debug_size = size;
9239+ debug_point = 0;
9240+ debug_wrap = 0;
9241+ debug_buf = b;
9242+ memset(debug_buf, 0, debug_size);
9243+ out:
9244+ spin_unlock(&debug_lock);
9245+}
9246+
9247+static void dlm_debug_init(void)
9248+{
9249+ debug_buf = NULL;
9250+ debug_size = 0;
9251+ debug_point = 0;
9252+ debug_wrap = 0;
9253+ spin_lock_init(&debug_lock);
9254+
9255+ dlm_debug_setup(DLM_DEBUG_SIZE);
9256+}
9257+
9258+#ifdef CONFIG_PROC_FS
9259+int dlm_debug_info(char *b, char **start, off_t offset, int length)
9260+{
9261+ int i, n = 0;
9262+
9263+ spin_lock(&debug_lock);
9264+
9265+ if (debug_wrap) {
9266+ for (i = debug_point; i < debug_size; i++)
9267+ n += sprintf(b + n, "%c", debug_buf[i]);
9268+ }
9269+ for (i = 0; i < debug_point; i++)
9270+ n += sprintf(b + n, "%c", debug_buf[i]);
9271+
9272+ spin_unlock(&debug_lock);
9273+
9274+ return n;
9275+}
9276+
9277+int dlm_rcom_info(char *b, char **start, off_t offset, int length)
9278+{
10d56c87
AM
9279+ struct dlm_ls *ls;
9280+ struct dlm_csb *csb;
4bf12011 9281+ int n = 0;
9282+
9283+ ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name));
9284+ if (!ls)
9285+ return 0;
9286+
9287+ n += sprintf(b + n, "nodeid names_send_count names_send_msgid "
9288+ "names_recv_count names_recv_msgid "
9289+ "locks_send_count locks_send_msgid "
9290+ "locks_recv_count locks_recv_msgid\n");
9291+
10d56c87 9292+ list_for_each_entry(csb, &ls->ls_nodes, list) {
4bf12011 9293+ n += sprintf(b + n, "%u %u %u %u %u %u %u %u %u\n",
10d56c87
AM
9294+ csb->node->nodeid,
9295+ csb->names_send_count,
9296+ csb->names_send_msgid,
9297+ csb->names_recv_count,
9298+ csb->names_recv_msgid,
9299+ csb->locks_send_count,
9300+ csb->locks_send_msgid,
9301+ csb->locks_recv_count,
9302+ csb->locks_recv_msgid);
4bf12011 9303+ }
9304+ return n;
9305+}
9306+#endif
9307+
9308+void dlm_proc_init(void)
9309+{
9310+#ifdef CONFIG_PROC_FS
9311+ debug_proc_entry = create_proc_entry("cluster/dlm_debug", S_IRUGO,
9312+ NULL);
9313+ if (!debug_proc_entry)
9314+ return;
9315+
9316+ debug_proc_entry->get_info = &dlm_debug_info;
9317+
9318+ rcom_proc_entry = create_proc_entry("cluster/dlm_rcom", S_IRUGO, NULL);
9319+ if (!rcom_proc_entry)
9320+ return;
9321+
9322+ rcom_proc_entry->get_info = &dlm_rcom_info;
9323+#endif
9324+ dlm_debug_init();
9325+
9326+#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS
9327+ locks_proc_entry = create_proc_read_entry("cluster/dlm_locks",
9328+ S_IFREG | 0400,
9329+ NULL, NULL, NULL);
9330+ if (!locks_proc_entry)
9331+ return;
9332+ locks_proc_entry->proc_fops = &locks_fops;
9333+#endif
9334+}
9335+
9336+void dlm_proc_exit(void)
9337+{
9338+#ifdef CONFIG_PROC_FS
9339+ if (debug_proc_entry) {
9340+ remove_proc_entry("cluster/dlm_debug", NULL);
9341+ dlm_debug_setup(0);
9342+ }
9343+
9344+ if (rcom_proc_entry)
9345+ remove_proc_entry("cluster/dlm_rcom", NULL);
9346+#endif
9347+
9348+#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS
9349+ if (locks_proc_entry)
9350+ remove_proc_entry("cluster/dlm_locks", NULL);
9351+#endif
9352+}
9353diff -urN linux-orig/cluster/dlm/queries.c linux-patched/cluster/dlm/queries.c
9354--- linux-orig/cluster/dlm/queries.c 1970-01-01 07:30:00.000000000 +0730
10d56c87 9355+++ linux-patched/cluster/dlm/queries.c 2004-07-13 18:57:22.000000000 +0800
5cdbd17b 9356@@ -0,0 +1,696 @@
4bf12011 9357+/******************************************************************************
9358+*******************************************************************************
9359+**
9360+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
9361+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
9362+**
9363+** This copyrighted material is made available to anyone wishing to use,
9364+** modify, copy, or redistribute it subject to the terms and conditions
9365+** of the GNU General Public License v.2.
9366+**
9367+*******************************************************************************
9368+******************************************************************************/
9369+
9370+/*
9371+ * queries.c
9372+ *
9373+ * This file provides the kernel query interface to the DLM.
9374+ *
9375+ */
9376+
9377+#define EXPORT_SYMTAB
9378+#include <linux/module.h>
9379+
9380+#include "dlm_internal.h"
5cdbd17b 9381+#include "lockspace.h"
4bf12011 9382+#include "lockqueue.h"
9383+#include "locking.h"
9384+#include "lkb.h"
9385+#include "nodes.h"
9386+#include "dir.h"
9387+#include "ast.h"
9388+#include "memory.h"
9389+#include "lowcomms.h"
9390+#include "midcomms.h"
9391+#include "rsb.h"
9392+
10d56c87
AM
9393+static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo);
9394+static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo);
4bf12011 9395+
9396+/*
9397+ * API entry point.
9398+ */
9399+int dlm_query(void *lockspace,
9400+ struct dlm_lksb *lksb,
9401+ int query,
9402+ struct dlm_queryinfo *qinfo,
9403+ void (ast_routine(void *)),
9404+ void *astarg)
9405+{
9406+ int status = -EINVAL;
10d56c87
AM
9407+ struct dlm_lkb *target_lkb;
9408+ struct dlm_lkb *query_lkb = NULL; /* Our temporary LKB */
9409+ struct dlm_ls *ls = (struct dlm_ls *) find_lockspace_by_local_id(lockspace);
4bf12011 9410+
9411+
9412+ if (!qinfo)
9413+ goto out;
9414+ if (!ls)
9415+ goto out;
9416+ if (!ast_routine)
9417+ goto out;
9418+ if (!lksb)
9419+ goto out;
9420+
9421+ if (!qinfo->gqi_lockinfo)
9422+ qinfo->gqi_locksize = 0;
9423+
9424+ /* Find the lkid */
9425+ target_lkb = find_lock_by_id(ls, lksb->sb_lkid);
9426+ if (!target_lkb)
9427+ goto out;
9428+
9429+ /* If the user wants a list of locks that are blocking or
9430+ not blocking this lock, then it must be waiting
9431+ for something
9432+ */
9433+ if (((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING ||
9434+ (query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK) &&
9435+ target_lkb->lkb_status == GDLM_LKSTS_GRANTED)
9436+ return -EINVAL;
9437+
9438+ /* We now allocate an LKB for our own use (so we can hang
9439+ * things like the AST routine and the lksb from it) */
9440+ lksb->sb_status = -EBUSY;
9441+ query_lkb = create_lkb(ls);
9442+ if (!query_lkb) {
9443+ status = -ENOMEM;
9444+ goto out;
9445+ }
9446+ query_lkb->lkb_astaddr = ast_routine;
9447+ query_lkb->lkb_astparam = (long)astarg;
9448+ query_lkb->lkb_resource = target_lkb->lkb_resource;
9449+ query_lkb->lkb_lksb = lksb;
9450+
9451+ /* Don't free the resource while we are querying it. This ref
9452+ * will be dropped when the LKB is freed */
9453+ hold_rsb(query_lkb->lkb_resource);
9454+
9455+ /* Fill in the stuff that's always local */
9456+ if (qinfo->gqi_resinfo) {
9457+ if (target_lkb->lkb_resource->res_nodeid)
9458+ qinfo->gqi_resinfo->rsi_masternode =
9459+ target_lkb->lkb_resource->res_nodeid;
9460+ else
9461+ qinfo->gqi_resinfo->rsi_masternode = our_nodeid();
9462+ qinfo->gqi_resinfo->rsi_length =
9463+ target_lkb->lkb_resource->res_length;
9464+ memcpy(qinfo->gqi_resinfo->rsi_name,
9465+ target_lkb->lkb_resource->res_name,
9466+ qinfo->gqi_resinfo->rsi_length);
9467+ }
9468+
9469+ /* If the master is local (or the user doesn't want the overhead of a
9470+ * remote call) - fill in the details here */
9471+ if (target_lkb->lkb_resource->res_nodeid == 0 ||
9472+ (query & DLM_QUERY_LOCAL)) {
9473+
9474+ status = 0;
9475+ /* Resource info */
9476+ if (qinfo->gqi_resinfo) {
9477+ query_resource(target_lkb->lkb_resource,
9478+ qinfo->gqi_resinfo);
9479+ }
9480+
9481+ /* Lock lists */
9482+ if (qinfo->gqi_lockinfo) {
9483+ status = query_locks(query, target_lkb, qinfo);
9484+ }
9485+
9486+ query_lkb->lkb_retstatus = status;
5cdbd17b 9487+ queue_ast(query_lkb, AST_COMP | AST_DEL, 0);
4bf12011 9488+ wake_astd();
9489+
9490+ /* An AST will be delivered so we must return success here */
9491+ status = 0;
9492+ goto out;
9493+ }
9494+
9495+ /* Remote master */
9496+ if (target_lkb->lkb_resource->res_nodeid != 0)
9497+ {
10d56c87 9498+ struct dlm_query_request *remquery;
4bf12011 9499+ struct writequeue_entry *e;
9500+
9501+ /* Clear this cos the receiving end adds to it with
9502+ each incoming packet */
9503+ qinfo->gqi_lockcount = 0;
9504+
9505+ /* Squirrel a pointer to the query info struct
9506+ somewhere illegal */
10d56c87 9507+ query_lkb->lkb_request = (struct dlm_request *) qinfo;
4bf12011 9508+
9509+ e = lowcomms_get_buffer(query_lkb->lkb_resource->res_nodeid,
10d56c87 9510+ sizeof(struct dlm_query_request),
4bf12011 9511+ ls->ls_allocation,
9512+ (char **) &remquery);
9513+ if (!e) {
9514+ status = -ENOBUFS;
9515+ goto out;
9516+ }
9517+
9518+ /* Build remote packet */
10d56c87 9519+ memset(remquery, 0, sizeof(struct dlm_query_request));
4bf12011 9520+
9521+ remquery->rq_maxlocks = qinfo->gqi_locksize;
9522+ remquery->rq_query = query;
9523+ remquery->rq_mstlkid = target_lkb->lkb_remid;
9524+ if (qinfo->gqi_lockinfo)
9525+ remquery->rq_maxlocks = qinfo->gqi_locksize;
9526+
9527+ remquery->rq_header.rh_cmd = GDLM_REMCMD_QUERY;
9528+ remquery->rq_header.rh_flags = 0;
10d56c87 9529+ remquery->rq_header.rh_length = sizeof(struct dlm_query_request);
4bf12011 9530+ remquery->rq_header.rh_lkid = query_lkb->lkb_id;
9531+ remquery->rq_header.rh_lockspace = ls->ls_global_id;
9532+
9533+ midcomms_send_buffer(&remquery->rq_header, e);
9534+ status = 0;
9535+ }
9536+
9537+ out:
9538+
9539+ return status;
9540+}
9541+
9542+static inline int valid_range(struct dlm_range *r)
9543+{
9544+ if (r->ra_start != 0ULL ||
9545+ r->ra_end != 0xFFFFFFFFFFFFFFFFULL)
9546+ return 1;
9547+ else
9548+ return 0;
9549+}
9550+
9551+static void put_int(int x, char *buf, int *offp)
9552+{
9553+ x = cpu_to_le32(x);
9554+ memcpy(buf + *offp, &x, sizeof(int));
9555+ *offp += sizeof(int);
9556+}
9557+
9558+static void put_int64(uint64_t x, char *buf, int *offp)
9559+{
9560+ x = cpu_to_le64(x);
9561+ memcpy(buf + *offp, &x, sizeof(uint64_t));
9562+ *offp += sizeof(uint64_t);
9563+}
9564+
9565+static int get_int(char *buf, int *offp)
9566+{
9567+ int value;
9568+ memcpy(&value, buf + *offp, sizeof(int));
9569+ *offp += sizeof(int);
9570+ return le32_to_cpu(value);
9571+}
9572+
9573+static uint64_t get_int64(char *buf, int *offp)
9574+{
9575+ uint64_t value;
9576+
9577+ memcpy(&value, buf + *offp, sizeof(uint64_t));
9578+ *offp += sizeof(uint64_t);
9579+ return le64_to_cpu(value);
9580+}
9581+
9582+#define LOCK_LEN (sizeof(int)*4 + sizeof(uint8_t)*4)
9583+
9584+/* Called from recvd to get lock info for a remote node */
10d56c87 9585+int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg)
4bf12011 9586+{
10d56c87
AM
9587+ struct dlm_query_request *query = (struct dlm_query_request *) msg;
9588+ struct dlm_query_reply *reply;
4bf12011 9589+ struct dlm_resinfo resinfo;
9590+ struct dlm_queryinfo qinfo;
9591+ struct writequeue_entry *e;
9592+ char *buf;
10d56c87 9593+ struct dlm_lkb *lkb;
4bf12011 9594+ int status = 0;
9595+ int bufidx;
9596+ int finished = 0;
9597+ int cur_lock = 0;
9598+ int start_lock = 0;
9599+
9600+ lkb = find_lock_by_id(ls, query->rq_mstlkid);
9601+ if (!lkb) {
9602+ status = -EINVAL;
9603+ goto send_error;
9604+ }
9605+
9606+ qinfo.gqi_resinfo = &resinfo;
9607+ qinfo.gqi_locksize = query->rq_maxlocks;
9608+
9609+ /* Get the resource bits */
9610+ query_resource(lkb->lkb_resource, &resinfo);
9611+
9612+ /* Now get the locks if wanted */
9613+ if (query->rq_maxlocks) {
9614+ qinfo.gqi_lockinfo = kmalloc(sizeof(struct dlm_lockinfo) * query->rq_maxlocks,
9615+ GFP_KERNEL);
9616+ if (!qinfo.gqi_lockinfo) {
9617+ status = -ENOMEM;
9618+ goto send_error;
9619+ }
9620+
9621+ status = query_locks(query->rq_query, lkb, &qinfo);
9622+ if (status && status != -E2BIG) {
9623+ kfree(qinfo.gqi_lockinfo);
9624+ goto send_error;
9625+ }
9626+ }
9627+ else {
9628+ qinfo.gqi_lockinfo = NULL;
9629+ qinfo.gqi_lockcount = 0;
9630+ }
9631+
9632+ /* Send as many blocks as needed for all the locks */
9633+ do {
9634+ int i;
10d56c87 9635+ int msg_len = sizeof(struct dlm_query_reply);
4bf12011 9636+ int last_msg_len = msg_len; /* keeps compiler quiet */
9637+ int last_lock;
9638+
9639+ /* First work out how many locks we can fit into a block */
9640+ for (i=cur_lock; i < qinfo.gqi_lockcount && msg_len < PAGE_SIZE; i++) {
9641+
9642+ last_msg_len = msg_len;
9643+
9644+ msg_len += LOCK_LEN;
9645+ if (valid_range(&qinfo.gqi_lockinfo[i].lki_grrange) ||
9646+ valid_range(&qinfo.gqi_lockinfo[i].lki_rqrange)) {
9647+
9648+ msg_len += sizeof(uint64_t) * 4;
9649+ }
9650+ }
9651+
9652+ /* There must be a neater way of doing this... */
9653+ if (msg_len > PAGE_SIZE) {
9654+ last_lock = i-1;
9655+ msg_len = last_msg_len;
9656+ }
9657+ else {
9658+ last_lock = i;
9659+ }
9660+
9661+ e = lowcomms_get_buffer(nodeid,
9662+ msg_len,
9663+ ls->ls_allocation,
9664+ (char **) &reply);
9665+ if (!e) {
9666+ kfree(qinfo.gqi_lockinfo);
9667+ status = -ENOBUFS;
9668+ goto out;
9669+ }
9670+
9671+ reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY;
9672+ reply->rq_header.rh_length = msg_len;
9673+ reply->rq_header.rh_lkid = msg->rh_lkid;
9674+ reply->rq_header.rh_lockspace = msg->rh_lockspace;
9675+
9676+ reply->rq_status = status;
9677+ reply->rq_startlock = cur_lock;
9678+ reply->rq_grantcount = qinfo.gqi_resinfo->rsi_grantcount;
9679+ reply->rq_convcount = qinfo.gqi_resinfo->rsi_convcount;
9680+ reply->rq_waitcount = qinfo.gqi_resinfo->rsi_waitcount;
9681+ memcpy(reply->rq_valblk, qinfo.gqi_resinfo->rsi_valblk, DLM_LVB_LEN);
9682+
9683+ buf = (char *)reply;
10d56c87 9684+ bufidx = sizeof(struct dlm_query_reply);
4bf12011 9685+
9686+ for (; cur_lock < last_lock; cur_lock++) {
9687+
9688+ buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_state;
9689+ buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_grmode;
9690+ buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_rqmode;
9691+ put_int(qinfo.gqi_lockinfo[cur_lock].lki_lkid, buf, &bufidx);
9692+ put_int(qinfo.gqi_lockinfo[cur_lock].lki_mstlkid, buf, &bufidx);
9693+ put_int(qinfo.gqi_lockinfo[cur_lock].lki_parent, buf, &bufidx);
9694+ put_int(qinfo.gqi_lockinfo[cur_lock].lki_node, buf, &bufidx);
9695+
9696+ if (valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_grrange) ||
9697+ valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_rqrange)) {
9698+
9699+ buf[bufidx++] = 1;
9700+ put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_start, buf, &bufidx);
9701+ put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_end, buf, &bufidx);
9702+ put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_start, buf, &bufidx);
9703+ put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_end, buf, &bufidx);
9704+ }
9705+ else {
9706+ buf[bufidx++] = 0;
9707+ }
9708+ }
9709+
9710+ if (cur_lock == qinfo.gqi_lockcount) {
9711+ reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY;
9712+ finished = 1;
9713+ }
9714+ else {
9715+ reply->rq_header.rh_flags = 0;
9716+ }
9717+
9718+ reply->rq_numlocks = cur_lock - start_lock;
9719+ start_lock = cur_lock;
9720+
9721+ midcomms_send_buffer(&reply->rq_header, e);
9722+ } while (!finished);
9723+
9724+ kfree(qinfo.gqi_lockinfo);
9725+ out:
9726+ return status;
9727+
9728+ send_error:
9729+ e = lowcomms_get_buffer(nodeid,
10d56c87 9730+ sizeof(struct dlm_query_reply),
4bf12011 9731+ ls->ls_allocation,
9732+ (char **) &reply);
9733+ if (!e) {
9734+ status = -ENOBUFS;
9735+ goto out;
9736+ }
9737+ reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY;
9738+ reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY; /* Don't support multiple blocks yet */
10d56c87 9739+ reply->rq_header.rh_length = sizeof(struct dlm_query_reply);
4bf12011 9740+ reply->rq_header.rh_lkid = msg->rh_lkid;
9741+ reply->rq_header.rh_lockspace = msg->rh_lockspace;
9742+ reply->rq_status = status;
9743+ reply->rq_numlocks = 0;
9744+ reply->rq_startlock = 0;
9745+ reply->rq_grantcount = 0;
9746+ reply->rq_convcount = 0;
9747+ reply->rq_waitcount = 0;
9748+
9749+ midcomms_send_buffer(&reply->rq_header, e);
9750+
9751+ return status;
9752+}
9753+
9754+/* Reply to a remote query */
10d56c87 9755+int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg)
4bf12011 9756+{
10d56c87 9757+ struct dlm_lkb *query_lkb;
4bf12011 9758+ struct dlm_queryinfo *qinfo;
10d56c87 9759+ struct dlm_query_reply *reply;
4bf12011 9760+ char *buf;
9761+ int i;
9762+ int bufidx;
9763+
9764+ query_lkb = find_lock_by_id(ls, msg->rh_lkid);
9765+ if (!query_lkb)
9766+ return -EINVAL;
9767+
9768+ qinfo = (struct dlm_queryinfo *) query_lkb->lkb_request;
10d56c87 9769+ reply = (struct dlm_query_reply *) msg;
4bf12011 9770+
9771+ /* Copy the easy bits first */
9772+ qinfo->gqi_lockcount += reply->rq_numlocks;
9773+ if (qinfo->gqi_resinfo) {
9774+ qinfo->gqi_resinfo->rsi_grantcount = reply->rq_grantcount;
9775+ qinfo->gqi_resinfo->rsi_convcount = reply->rq_convcount;
9776+ qinfo->gqi_resinfo->rsi_waitcount = reply->rq_waitcount;
9777+ memcpy(qinfo->gqi_resinfo->rsi_valblk, reply->rq_valblk,
9778+ DLM_LVB_LEN);
9779+ }
9780+
9781+ /* Now unpack the locks */
10d56c87 9782+ bufidx = sizeof(struct dlm_query_reply);
4bf12011 9783+ buf = (char *) msg;
9784+
10d56c87 9785+ DLM_ASSERT(reply->rq_startlock + reply->rq_numlocks <= qinfo->gqi_locksize,
4bf12011 9786+ printk("start = %d, num + %d. Max= %d\n",
9787+ reply->rq_startlock, reply->rq_numlocks, qinfo->gqi_locksize););
9788+
9789+ for (i = reply->rq_startlock;
9790+ i < reply->rq_startlock + reply->rq_numlocks; i++) {
9791+ qinfo->gqi_lockinfo[i].lki_state = buf[bufidx++];
9792+ qinfo->gqi_lockinfo[i].lki_grmode = buf[bufidx++];
9793+ qinfo->gqi_lockinfo[i].lki_rqmode = buf[bufidx++];
9794+ qinfo->gqi_lockinfo[i].lki_lkid = get_int(buf, &bufidx);
9795+ qinfo->gqi_lockinfo[i].lki_mstlkid = get_int(buf, &bufidx);
9796+ qinfo->gqi_lockinfo[i].lki_parent = get_int(buf, &bufidx);
9797+ qinfo->gqi_lockinfo[i].lki_node = get_int(buf, &bufidx);
9798+ if (buf[bufidx++]) {
9799+ qinfo->gqi_lockinfo[i].lki_grrange.ra_start = get_int64(buf, &bufidx);
9800+ qinfo->gqi_lockinfo[i].lki_grrange.ra_end = get_int64(buf, &bufidx);
9801+ qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = get_int64(buf, &bufidx);
9802+ qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = get_int64(buf, &bufidx);
9803+ }
9804+ else {
9805+ qinfo->gqi_lockinfo[i].lki_grrange.ra_start = 0ULL;
9806+ qinfo->gqi_lockinfo[i].lki_grrange.ra_end = 0xFFFFFFFFFFFFFFFFULL;
9807+ qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = 0ULL;
9808+ qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = 0xFFFFFFFFFFFFFFFFULL;
9809+ }
9810+ }
9811+
9812+ /* If this was the last block then now tell the user */
9813+ if (msg->rh_flags & GDLM_REMFLAG_ENDQUERY) {
9814+ query_lkb->lkb_retstatus = reply->rq_status;
5cdbd17b 9815+ queue_ast(query_lkb, AST_COMP | AST_DEL, 0);
4bf12011 9816+ wake_astd();
9817+ }
9818+
9819+ return 0;
9820+}
9821+
9822+/* Aggregate resource information */
10d56c87 9823+static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo)
4bf12011 9824+{
9825+ struct list_head *tmp;
9826+
9827+
9828+ if (rsb->res_lvbptr)
9829+ memcpy(resinfo->rsi_valblk, rsb->res_lvbptr, DLM_LVB_LEN);
9830+
9831+ resinfo->rsi_grantcount = 0;
9832+ list_for_each(tmp, &rsb->res_grantqueue) {
9833+ resinfo->rsi_grantcount++;
9834+ }
9835+
9836+ resinfo->rsi_waitcount = 0;
9837+ list_for_each(tmp, &rsb->res_waitqueue) {
9838+ resinfo->rsi_waitcount++;
9839+ }
9840+
9841+ resinfo->rsi_convcount = 0;
9842+ list_for_each(tmp, &rsb->res_convertqueue) {
9843+ resinfo->rsi_convcount++;
9844+ }
9845+
9846+ return 0;
9847+}
9848+
10d56c87 9849+static int add_lock(struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo)
4bf12011 9850+{
9851+ int entry;
9852+
9853+ /* Don't fill it in if the buffer is full */
9854+ if (qinfo->gqi_lockcount == qinfo->gqi_locksize)
9855+ return -E2BIG;
9856+
9857+ /* gqi_lockcount contains the number of locks we have returned */
9858+ entry = qinfo->gqi_lockcount++;
9859+
9860+ /* Fun with master copies */
9861+ if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) {
9862+ qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_remid;
9863+ qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_id;
9864+ }
9865+ else {
9866+ qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_id;
9867+ qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_remid;
9868+ }
9869+
9870+ /* Also make sure we always have a valid nodeid in there, the
9871+ calling end may not know which node "0" is */
9872+ if (lkb->lkb_nodeid)
9873+ qinfo->gqi_lockinfo[entry].lki_node = lkb->lkb_nodeid;
9874+ else
9875+ qinfo->gqi_lockinfo[entry].lki_node = our_nodeid();
9876+
9877+ if (lkb->lkb_parent)
9878+ qinfo->gqi_lockinfo[entry].lki_parent = lkb->lkb_parent->lkb_id;
9879+ else
9880+ qinfo->gqi_lockinfo[entry].lki_parent = 0;
9881+
9882+ qinfo->gqi_lockinfo[entry].lki_state = lkb->lkb_status;
9883+ qinfo->gqi_lockinfo[entry].lki_rqmode = lkb->lkb_rqmode;
9884+ qinfo->gqi_lockinfo[entry].lki_grmode = lkb->lkb_grmode;
9885+
9886+ if (lkb->lkb_range) {
9887+ qinfo->gqi_lockinfo[entry].lki_grrange.ra_start =
9888+ lkb->lkb_range[GR_RANGE_START];
9889+ qinfo->gqi_lockinfo[entry].lki_grrange.ra_end =
9890+ lkb->lkb_range[GR_RANGE_END];
9891+ qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start =
9892+ lkb->lkb_range[RQ_RANGE_START];
9893+ qinfo->gqi_lockinfo[entry].lki_rqrange.ra_end =
9894+ lkb->lkb_range[RQ_RANGE_END];
9895+ } else {
9896+ qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0ULL;
9897+ qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0xffffffffffffffffULL;
9898+ qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0ULL;
9899+ qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0xffffffffffffffffULL;
9900+ }
9901+ return 0;
9902+}
9903+
9904+static int query_lkb_queue(struct list_head *queue, int query,
9905+ struct dlm_queryinfo *qinfo)
9906+{
9907+ struct list_head *tmp;
9908+ int status = 0;
9909+ int mode = query & DLM_QUERY_MODE_MASK;
9910+
9911+ list_for_each(tmp, queue) {
10d56c87 9912+ struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue);
4bf12011 9913+ int lkmode;
9914+
9915+ if (query & DLM_QUERY_RQMODE)
9916+ lkmode = lkb->lkb_rqmode;
9917+ else
9918+ lkmode = lkb->lkb_grmode;
9919+
9920+ /* Add the LKB info to the list if it matches the criteria in
9921+ * the query bitmap */
9922+ switch (query & DLM_QUERY_MASK) {
9923+ case DLM_QUERY_LOCKS_ALL:
9924+ status = add_lock(lkb, qinfo);
9925+ break;
9926+
9927+ case DLM_QUERY_LOCKS_HIGHER:
9928+ if (lkmode > mode)
9929+ status = add_lock(lkb, qinfo);
9930+ break;
9931+
9932+ case DLM_QUERY_LOCKS_EQUAL:
9933+ if (lkmode == mode)
9934+ status = add_lock(lkb, qinfo);
9935+ break;
9936+
9937+ case DLM_QUERY_LOCKS_LOWER:
9938+ if (lkmode < mode)
9939+ status = add_lock(lkb, qinfo);
9940+ break;
9941+ }
9942+ }
9943+ return status;
9944+}
9945+
9946+/*
9947+ * Return 1 if the locks' ranges overlap
9948+ * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff
9949+ */
10d56c87 9950+static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2)
4bf12011 9951+{
9952+ if (!lkb1->lkb_range || !lkb2->lkb_range)
9953+ return 1;
9954+
9955+ if (lkb1->lkb_range[RQ_RANGE_END] <= lkb2->lkb_range[GR_RANGE_START] ||
9956+ lkb1->lkb_range[RQ_RANGE_START] >= lkb2->lkb_range[GR_RANGE_END])
9957+ return 0;
9958+
9959+ return 1;
9960+}
9961+extern const int __dlm_compat_matrix[8][8];
9962+
9963+
10d56c87 9964+static int get_blocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo)
4bf12011 9965+{
9966+ struct list_head *tmp;
9967+ int status = 0;
9968+
9969+ list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) {
10d56c87 9970+ struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue);
4bf12011 9971+
9972+ if (ranges_overlap(lkb, qlkb) &&
9973+ !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1])
9974+ status = add_lock(lkb, qinfo);
9975+ }
9976+
9977+ return status;
9978+}
9979+
10d56c87 9980+static int get_nonblocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo)
4bf12011 9981+{
9982+ struct list_head *tmp;
9983+ int status = 0;
9984+
9985+ list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) {
10d56c87 9986+ struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue);
4bf12011 9987+
9988+ if (!(ranges_overlap(lkb, qlkb) &&
9989+ !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1]))
9990+ status = add_lock(lkb, qinfo);
9991+ }
9992+
9993+ return status;
9994+}
9995+
9996+/* Gather a list of appropriate locks */
10d56c87 9997+static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo)
4bf12011 9998+{
9999+ int status = 0;
10000+
10001+
10002+ /* Mask in the actual granted/requsted mode of the lock if LOCK_THIS
10003+ * was requested as the mode
10004+ */
10005+ if ((query & DLM_QUERY_MODE_MASK) == DLM_LOCK_THIS) {
10006+ query &= ~DLM_QUERY_MODE_MASK;
10007+ if (query & DLM_QUERY_RQMODE)
10008+ query |= lkb->lkb_rqmode;
10009+ else
10010+ query |= lkb->lkb_grmode;
10011+ }
10012+
10013+ qinfo->gqi_lockcount = 0;
10014+
10015+ /* BLOCKING/NOTBLOCK only look at the granted queue */
10016+ if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING)
10017+ return get_blocking_locks(lkb, qinfo);
10018+
10019+ if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK)
10020+ return get_nonblocking_locks(lkb, qinfo);
10021+
10022+ /* Do the lock queues that were requested */
10023+ if (query & DLM_QUERY_QUEUE_GRANT) {
10024+ status = query_lkb_queue(&lkb->lkb_resource->res_grantqueue,
10025+ query, qinfo);
10026+ }
10027+
10028+ if (!status && (query & DLM_QUERY_QUEUE_CONVERT)) {
10029+ status = query_lkb_queue(&lkb->lkb_resource->res_convertqueue,
10030+ query, qinfo);
10031+ }
10032+
10033+ if (!status && (query & DLM_QUERY_QUEUE_WAIT)) {
10034+ status = query_lkb_queue(&lkb->lkb_resource->res_waitqueue,
10035+ query, qinfo);
10036+ }
10037+
10038+
10039+ return status;
10040+}
10041+
10042+EXPORT_SYMBOL(dlm_query);
10043+/*
10044+ * Overrides for Emacs so that we follow Linus's tabbing style.
10045+ * Emacs will notice this stuff at the end of the file and automatically
10046+ * adjust the settings for this buffer only. This must remain at the end
10047+ * of the file.
10048+ * ---------------------------------------------------------------------------
10049+ * Local variables:
10050+ * c-file-style: "linux"
10051+ * End:
10052+ */
10053diff -urN linux-orig/cluster/dlm/queries.h linux-patched/cluster/dlm/queries.h
10054--- linux-orig/cluster/dlm/queries.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 10055+++ linux-patched/cluster/dlm/queries.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 10056@@ -0,0 +1,20 @@
10057+/******************************************************************************
10058+*******************************************************************************
10059+**
10060+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
10061+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
10062+**
10063+** This copyrighted material is made available to anyone wishing to use,
10064+** modify, copy, or redistribute it subject to the terms and conditions
10065+** of the GNU General Public License v.2.
10066+**
10067+*******************************************************************************
10068+******************************************************************************/
10069+
10070+#ifndef __QUERIES_DOT_H__
10071+#define __QUERIES_DOT_H__
10072+
10d56c87
AM
10073+extern int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg);
10074+extern int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg);
4bf12011 10075+
10076+#endif /* __QUERIES_DOT_H__ */
10077diff -urN linux-orig/cluster/dlm/rebuild.c linux-patched/cluster/dlm/rebuild.c
10078--- linux-orig/cluster/dlm/rebuild.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
10079+++ linux-patched/cluster/dlm/rebuild.c 2004-07-13 18:57:22.000000000 +0800
10080@@ -0,0 +1,1254 @@
4bf12011 10081+/******************************************************************************
10082+*******************************************************************************
10083+**
10084+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
10085+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
10086+**
10087+** This copyrighted material is made available to anyone wishing to use,
10088+** modify, copy, or redistribute it subject to the terms and conditions
10089+** of the GNU General Public License v.2.
10090+**
10091+*******************************************************************************
10092+******************************************************************************/
10093+
10094+/*
10095+ * Rebuild RSB's on new masters. Functions for transferring locks and
10096+ * subresources to new RSB masters during recovery.
10097+ */
10098+
10099+#include "dlm_internal.h"
10100+#include "reccomms.h"
10101+#include "lkb.h"
10102+#include "rsb.h"
10103+#include "nodes.h"
10104+#include "config.h"
10105+#include "memory.h"
10106+#include "recover.h"
10107+
10108+
10109+/* Types of entity serialised in remastering messages */
10110+#define REMASTER_ROOTRSB 1
10111+#define REMASTER_RSB 2
10112+#define REMASTER_LKB 3
10113+
10114+struct rcom_fill {
10115+ char * outbuf; /* Beginning of data */
10116+ int offset; /* Current offset into outbuf */
10117+ int maxlen; /* Max value of offset */
10118+ int remasterid;
10119+ int count;
10d56c87
AM
10120+ struct dlm_rsb * rsb;
10121+ struct dlm_rsb * subrsb;
10122+ struct dlm_lkb * lkb;
4bf12011 10123+ struct list_head * lkbqueue;
10124+ char more;
10125+};
10126+typedef struct rcom_fill rcom_fill_t;
10127+
10128+
10129+struct rebuild_node {
10130+ struct list_head list;
10131+ int nodeid;
10d56c87 10132+ struct dlm_rsb * rootrsb;
4bf12011 10133+};
10134+typedef struct rebuild_node rebuild_node_t;
10135+
10136+
10137+/*
10138+ * Root rsb passed in for which all lkb's (own and subrsbs) will be sent to new
10139+ * master. The rsb will be "done" with recovery when the new master has
10140+ * replied with all the new remote lockid's for this rsb's lkb's.
10141+ */
10142+
10d56c87 10143+void expect_new_lkids(struct dlm_rsb *rsb)
4bf12011 10144+{
10145+ rsb->res_newlkid_expect = 0;
10146+ recover_list_add(rsb);
10147+}
10148+
10149+/*
10150+ * This function is called on root rsb or subrsb when another lkb is being sent
10151+ * to the new master for which we expect to receive a corresponding remote lkid
10152+ */
10153+
10d56c87 10154+void need_new_lkid(struct dlm_rsb *rsb)
4bf12011 10155+{
10d56c87 10156+ struct dlm_rsb *root = rsb;
4bf12011 10157+
10158+ if (rsb->res_parent)
10159+ root = rsb->res_root;
10160+
10161+ if (!root->res_newlkid_expect)
10162+ recover_list_add(root);
10163+ else
10d56c87 10164+ DLM_ASSERT(test_bit(RESFL_RECOVER_LIST, &root->res_flags),);
4bf12011 10165+
10166+ root->res_newlkid_expect++;
10167+}
10168+
10169+/*
10170+ * This function is called for each lkb for which a new remote lkid is
10171+ * received. Decrement the expected number of remote lkids expected for the
10172+ * root rsb.
10173+ */
10174+
10d56c87 10175+void have_new_lkid(struct dlm_lkb *lkb)
4bf12011 10176+{
10d56c87 10177+ struct dlm_rsb *root = lkb->lkb_resource;
4bf12011 10178+
10179+ if (root->res_parent)
10180+ root = root->res_root;
10181+
10182+ down_write(&root->res_lock);
10183+
10d56c87
AM
10184+ DLM_ASSERT(root->res_newlkid_expect,
10185+ printk("newlkid_expect=%d\n", root->res_newlkid_expect););
4bf12011 10186+
10187+ root->res_newlkid_expect--;
10188+
10189+ if (!root->res_newlkid_expect) {
10190+ clear_bit(RESFL_NEW_MASTER, &root->res_flags);
10191+ recover_list_del(root);
10192+ }
10193+ up_write(&root->res_lock);
10194+}
10195+
10196+/*
10197+ * Return the rebuild struct for a node - will create an entry on the rootrsb
10198+ * list if necessary.
10199+ *
10d56c87 10200+ * Currently no locking is needed here as it all happens in the dlm_recvd
4bf12011 10201+ * thread
10202+ */
10203+
10d56c87 10204+static rebuild_node_t *find_rebuild_root(struct dlm_ls *ls, int nodeid)
4bf12011 10205+{
10206+ rebuild_node_t *node = NULL;
10207+
10208+ list_for_each_entry(node, &ls->ls_rebuild_rootrsb_list, list) {
10209+ if (node->nodeid == nodeid)
10210+ return node;
10211+ }
10212+
10213+ /* Not found, add one */
10214+ node = kmalloc(sizeof(rebuild_node_t), GFP_KERNEL);
10215+ if (!node)
10216+ return NULL;
10217+
10218+ node->nodeid = nodeid;
10219+ node->rootrsb = NULL;
10220+ list_add(&node->list, &ls->ls_rebuild_rootrsb_list);
10221+
10222+ return node;
10223+}
10224+
10225+/*
10226+ * Tidy up after a rebuild run. Called when all recovery has finished
10227+ */
10228+
10d56c87 10229+void rebuild_freemem(struct dlm_ls *ls)
4bf12011 10230+{
10231+ rebuild_node_t *node = NULL, *s;
10232+
10233+ list_for_each_entry_safe(node, s, &ls->ls_rebuild_rootrsb_list, list) {
10234+ list_del(&node->list);
10235+ kfree(node);
10236+ }
10237+}
10238+
10239+static void put_int(int x, char *buf, int *offp)
10240+{
10241+ x = cpu_to_le32(x);
10242+ memcpy(buf + *offp, &x, sizeof(int));
10243+ *offp += sizeof(int);
10244+}
10245+
10246+static void put_int64(uint64_t x, char *buf, int *offp)
10247+{
10248+ x = cpu_to_le64(x);
10249+ memcpy(buf + *offp, &x, sizeof(uint64_t));
10250+ *offp += sizeof(uint64_t);
10251+}
10252+
10253+static void put_bytes(char *x, int len, char *buf, int *offp)
10254+{
10255+ put_int(len, buf, offp);
10256+ memcpy(buf + *offp, x, len);
10257+ *offp += len;
10258+}
10259+
10260+static void put_char(char x, char *buf, int *offp)
10261+{
10262+ buf[*offp] = x;
10263+ *offp += 1;
10264+}
10265+
10266+static int get_int(char *buf, int *offp)
10267+{
10268+ int value;
10269+ memcpy(&value, buf + *offp, sizeof(int));
10270+ *offp += sizeof(int);
10271+ return le32_to_cpu(value);
10272+}
10273+
10274+static uint64_t get_int64(char *buf, int *offp)
10275+{
10276+ uint64_t value;
10277+
10278+ memcpy(&value, buf + *offp, sizeof(uint64_t));
10279+ *offp += sizeof(uint64_t);
10280+ return le64_to_cpu(value);
10281+}
10282+
10283+static char get_char(char *buf, int *offp)
10284+{
10285+ char x = buf[*offp];
10286+
10287+ *offp += 1;
10288+ return x;
10289+}
10290+
10291+static void get_bytes(char *bytes, int *len, char *buf, int *offp)
10292+{
10293+ *len = get_int(buf, offp);
10294+ memcpy(bytes, buf + *offp, *len);
10295+ *offp += *len;
10296+}
10297+
10d56c87 10298+static int lkb_length(struct dlm_lkb *lkb)
4bf12011 10299+{
10300+ int len = 0;
10301+
10302+ len += sizeof(int); /* lkb_id */
10303+ len += sizeof(int); /* lkb_resource->res_reamasterid */
10304+ len += sizeof(int); /* lkb_flags */
10305+ len += sizeof(int); /* lkb_status */
10306+ len += sizeof(char); /* lkb_rqmode */
10307+ len += sizeof(char); /* lkb_grmode */
10308+ len += sizeof(int); /* lkb_childcnt */
10309+ len += sizeof(int); /* lkb_parent->lkb_id */
10310+ len += sizeof(int); /* lkb_bastaddr */
10311+
10312+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) {
10313+ len += sizeof(int); /* number of lvb bytes */
10314+ len += DLM_LVB_LEN;
10315+ }
10316+
10317+ if (lkb->lkb_range) {
10318+ len += sizeof(uint64_t);
10319+ len += sizeof(uint64_t);
10320+ if (lkb->lkb_status == GDLM_LKSTS_CONVERT) {
10321+ len += sizeof(uint64_t);
10322+ len += sizeof(uint64_t);
10323+ }
10324+ }
10325+
10326+ return len;
10327+}
10328+
10329+/*
10330+ * It's up to the caller to be sure there's enough space in the buffer.
10331+ */
10332+
10d56c87 10333+static void serialise_lkb(struct dlm_lkb *lkb, char *buf, int *offp)
4bf12011 10334+{
10335+ int flags;
10336+
10337+ /* Need to tell the remote end if we have a range */
10338+ flags = lkb->lkb_flags;
10339+ if (lkb->lkb_range)
10340+ flags |= GDLM_LKFLG_RANGE;
10341+
10342+ /*
10343+ * See lkb_length()
10344+ * Total: 30 (no lvb) or 66 (with lvb) bytes
10345+ */
10346+
10347+ put_int(lkb->lkb_id, buf, offp);
10348+ put_int(lkb->lkb_resource->res_remasterid, buf, offp);
10349+ put_int(flags, buf, offp);
10350+ put_int(lkb->lkb_status, buf, offp);
10351+ put_char(lkb->lkb_rqmode, buf, offp);
10352+ put_char(lkb->lkb_grmode, buf, offp);
10353+ put_int(atomic_read(&lkb->lkb_childcnt), buf, offp);
10354+
10355+ if (lkb->lkb_parent)
10356+ put_int(lkb->lkb_parent->lkb_id, buf, offp);
10357+ else
10358+ put_int(0, buf, offp);
10359+
10360+ if (lkb->lkb_bastaddr)
10361+ put_int(1, buf, offp);
10362+ else
10363+ put_int(0, buf, offp);
10364+
10365+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) {
10d56c87 10366+ DLM_ASSERT(lkb->lkb_lvbptr,);
4bf12011 10367+ put_bytes(lkb->lkb_lvbptr, DLM_LVB_LEN, buf, offp);
10368+ }
10369+
10370+ /* Only send the range we actually need */
10371+ if (lkb->lkb_range) {
10372+ switch (lkb->lkb_status) {
10373+ case GDLM_LKSTS_CONVERT:
10374+ put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp);
10375+ put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp);
10376+ put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp);
10377+ put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp);
10378+ break;
10379+ case GDLM_LKSTS_WAITING:
10380+ put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp);
10381+ put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp);
10382+ break;
10383+ case GDLM_LKSTS_GRANTED:
10384+ put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp);
10385+ put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp);
10386+ break;
10387+ default:
10d56c87 10388+ DLM_ASSERT(0,);
4bf12011 10389+ }
10390+ }
10391+}
10392+
10d56c87 10393+static int rsb_length(struct dlm_rsb *rsb)
4bf12011 10394+{
10395+ int len = 0;
10396+
10397+ len += sizeof(int); /* number of res_name bytes */
10398+ len += rsb->res_length; /* res_name */
10399+ len += sizeof(int); /* res_remasterid */
10400+ len += sizeof(int); /* res_parent->res_remasterid */
10401+
10402+ return len;
10403+}
10404+
10d56c87 10405+static inline struct dlm_rsb *next_subrsb(struct dlm_rsb *subrsb)
4bf12011 10406+{
10407+ struct list_head *tmp;
10d56c87 10408+ struct dlm_rsb *r;
4bf12011 10409+
10410+ tmp = subrsb->res_subreslist.next;
10d56c87 10411+ r = list_entry(tmp, struct dlm_rsb, res_subreslist);
4bf12011 10412+
10413+ return r;
10414+}
10415+
10d56c87 10416+static inline int last_in_list(struct dlm_rsb *r, struct list_head *head)
4bf12011 10417+{
10d56c87
AM
10418+ struct dlm_rsb *last;
10419+ last = list_entry(head->prev, struct dlm_rsb, res_subreslist);
4bf12011 10420+ if (last == r)
10421+ return 1;
10422+ return 0;
10423+}
10424+
10425+/*
10426+ * Used to decide if an rsb should be rebuilt on a new master. An rsb only
10427+ * needs to be rebuild if we have lkb's queued on it. NOREBUILD lkb's on the
10428+ * wait queue are not rebuilt.
10429+ */
10430+
10d56c87 10431+static int lkbs_to_remaster(struct dlm_rsb *r)
4bf12011 10432+{
10d56c87
AM
10433+ struct dlm_lkb *lkb;
10434+ struct dlm_rsb *sub;
4bf12011 10435+
10436+ if (!list_empty(&r->res_grantqueue) ||
10437+ !list_empty(&r->res_convertqueue))
10438+ return TRUE;
10439+
10440+ list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) {
10441+ if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD)
10442+ continue;
10443+ return TRUE;
10444+ }
10445+
10446+ list_for_each_entry(sub, &r->res_subreslist, res_subreslist) {
10447+ if (!list_empty(&sub->res_grantqueue) ||
10448+ !list_empty(&sub->res_convertqueue))
10449+ return TRUE;
10450+
10451+ list_for_each_entry(lkb, &sub->res_waitqueue, lkb_statequeue) {
10452+ if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD)
10453+ continue;
10454+ return TRUE;
10455+ }
10456+ }
10457+
10458+ return FALSE;
10459+}
10460+
10d56c87 10461+static void serialise_rsb(struct dlm_rsb *rsb, char *buf, int *offp)
4bf12011 10462+{
10463+ /*
10464+ * See rsb_length()
10465+ * Total: 36 bytes (4 + 24 + 4 + 4)
10466+ */
10467+
10468+ put_bytes(rsb->res_name, rsb->res_length, buf, offp);
10469+ put_int(rsb->res_remasterid, buf, offp);
10470+
10471+ if (rsb->res_parent)
10472+ put_int(rsb->res_parent->res_remasterid, buf, offp);
10473+ else
10474+ put_int(0, buf, offp);
10475+
10d56c87 10476+ DLM_ASSERT(!rsb->res_lvbptr,);
4bf12011 10477+}
10478+
10479+/*
10480+ * Flatten an LKB into a buffer for sending to the new RSB master. As a
10481+ * side-effect the nodeid of the lock is set to the nodeid of the new RSB
10482+ * master.
10483+ */
10484+
10d56c87
AM
10485+static int pack_one_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb,
10486+ rcom_fill_t *fill)
4bf12011 10487+{
10488+ if (fill->offset + 1 + lkb_length(lkb) > fill->maxlen)
10489+ goto nospace;
10490+
10491+ lkb->lkb_nodeid = r->res_nodeid;
10492+
10493+ put_char(REMASTER_LKB, fill->outbuf, &fill->offset);
10494+ serialise_lkb(lkb, fill->outbuf, &fill->offset);
10495+
10496+ fill->count++;
10497+ need_new_lkid(r);
10498+ return 0;
10499+
10500+ nospace:
10501+ return -ENOSPC;
10502+}
10503+
10504+/*
10505+ * Pack all LKB's from a given queue, except for those with the NOREBUILD flag.
10506+ */
10507+
10d56c87 10508+static int pack_lkb_queue(struct dlm_rsb *r, struct list_head *queue,
4bf12011 10509+ rcom_fill_t *fill)
10510+{
10d56c87 10511+ struct dlm_lkb *lkb;
4bf12011 10512+ int error;
10513+
10514+ list_for_each_entry(lkb, queue, lkb_statequeue) {
10515+ if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD)
10516+ continue;
10517+
10518+ error = pack_one_lkb(r, lkb, fill);
10519+ if (error)
10520+ goto nospace;
10521+ }
10522+
10523+ return 0;
10524+
10525+ nospace:
10526+ fill->lkb = lkb;
10527+ fill->lkbqueue = queue;
10528+
10529+ return error;
10530+}
10531+
10d56c87 10532+static int pack_lkb_queues(struct dlm_rsb *r, rcom_fill_t *fill)
4bf12011 10533+{
10534+ int error;
10535+
10536+ error = pack_lkb_queue(r, &r->res_grantqueue, fill);
10537+ if (error)
10538+ goto nospace;
10539+
10540+ error = pack_lkb_queue(r, &r->res_convertqueue, fill);
10541+ if (error)
10542+ goto nospace;
10543+
10544+ error = pack_lkb_queue(r, &r->res_waitqueue, fill);
10545+
10546+ nospace:
10547+ return error;
10548+}
10549+
10550+/*
10551+ * Pack remaining lkb's for rsb or subrsb. This may include a partial lkb
10552+ * queue and full lkb queues.
10553+ */
10554+
10d56c87 10555+static int pack_lkb_remaining(struct dlm_rsb *r, rcom_fill_t *fill)
4bf12011 10556+{
10557+ struct list_head *tmp, *start, *end;
10d56c87 10558+ struct dlm_lkb *lkb;
4bf12011 10559+ int error;
10560+
10561+ /*
10562+ * Beginning with fill->lkb, pack remaining lkb's on fill->lkbqueue.
10563+ */
10564+
10565+ error = pack_one_lkb(r, fill->lkb, fill);
10566+ if (error)
10567+ goto out;
10568+
10569+ start = fill->lkb->lkb_statequeue.next;
10570+ end = fill->lkbqueue;
10571+
10572+ for (tmp = start; tmp != end; tmp = tmp->next) {
10d56c87 10573+ lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue);
4bf12011 10574+
10575+ error = pack_one_lkb(r, lkb, fill);
10576+ if (error) {
10577+ fill->lkb = lkb;
10578+ goto out;
10579+ }
10580+ }
10581+
10582+ /*
10583+ * Pack all lkb's on r's queues following fill->lkbqueue.
10584+ */
10585+
10586+ if (fill->lkbqueue == &r->res_waitqueue)
10587+ goto out;
10588+ if (fill->lkbqueue == &r->res_convertqueue)
10589+ goto skip;
10590+
10d56c87 10591+ DLM_ASSERT(fill->lkbqueue == &r->res_grantqueue,);
4bf12011 10592+
10593+ error = pack_lkb_queue(r, &r->res_convertqueue, fill);
10594+ if (error)
10595+ goto out;
10596+ skip:
10597+ error = pack_lkb_queue(r, &r->res_waitqueue, fill);
10598+
10599+ out:
10600+ return error;
10601+}
10602+
10d56c87
AM
10603+static int pack_one_subrsb(struct dlm_rsb *rsb, struct dlm_rsb *subrsb,
10604+ rcom_fill_t *fill)
4bf12011 10605+{
10606+ int error;
10607+
10608+ down_write(&subrsb->res_lock);
10609+
10610+ if (fill->offset + 1 + rsb_length(subrsb) > fill->maxlen)
10611+ goto nospace;
10612+
10613+ subrsb->res_nodeid = rsb->res_nodeid;
10614+ subrsb->res_remasterid = ++fill->remasterid;
10615+
10616+ put_char(REMASTER_RSB, fill->outbuf, &fill->offset);
10617+ serialise_rsb(subrsb, fill->outbuf, &fill->offset);
10618+
10619+ error = pack_lkb_queues(subrsb, fill);
10620+ if (error)
10621+ goto nospace;
10622+
10623+ up_write(&subrsb->res_lock);
10624+
10625+ return 0;
10626+
10627+ nospace:
10628+ up_write(&subrsb->res_lock);
10629+ fill->subrsb = subrsb;
10630+
10631+ return -ENOSPC;
10632+}
10633+
10d56c87
AM
10634+static int pack_subrsbs(struct dlm_rsb *rsb, struct dlm_rsb *in_subrsb,
10635+ rcom_fill_t *fill)
4bf12011 10636+{
10d56c87 10637+ struct dlm_rsb *subrsb;
4bf12011 10638+ int error = 0;
10639+
10640+ /*
10641+ * When an initial subrsb is given, we know it needs to be packed.
10642+ * When no initial subrsb is given, begin with the first (if any exist).
10643+ */
10644+
10645+ if (!in_subrsb) {
10646+ if (list_empty(&rsb->res_subreslist))
10647+ goto out;
10648+
10d56c87 10649+ subrsb = list_entry(rsb->res_subreslist.next, struct dlm_rsb,
4bf12011 10650+ res_subreslist);
10651+ } else
10652+ subrsb = in_subrsb;
10653+
10654+ for (;;) {
10655+ error = pack_one_subrsb(rsb, subrsb, fill);
10656+ if (error)
10657+ goto out;
10658+
10659+ if (last_in_list(subrsb, &rsb->res_subreslist))
10660+ break;
10661+
10662+ subrsb = next_subrsb(subrsb);
10663+ }
10664+
10665+ out:
10666+ return error;
10667+}
10668+
10669+/*
10670+ * Finish packing whatever is left in an rsb tree. If space runs out while
10671+ * finishing, save subrsb/lkb and this will be called again for the same rsb.
10672+ *
10673+ * !subrsb && lkb, we left off part way through root rsb's lkbs.
10674+ * subrsb && !lkb, we left off just before starting a new subrsb.
10675+ * subrsb && lkb, we left off part way through a subrsb's lkbs.
10676+ * !subrsb && !lkb, we shouldn't be in this function, but starting
10677+ * a new rsb in pack_rsb_tree().
10678+ */
10679+
10d56c87 10680+static int pack_rsb_tree_remaining(struct dlm_ls *ls, struct dlm_rsb *rsb,
4bf12011 10681+ rcom_fill_t *fill)
10682+{
10d56c87 10683+ struct dlm_rsb *subrsb = NULL;
4bf12011 10684+ int error = 0;
10685+
10686+ if (!fill->subrsb && fill->lkb) {
10687+ error = pack_lkb_remaining(rsb, fill);
10688+ if (error)
10689+ goto out;
10690+
10691+ error = pack_subrsbs(rsb, NULL, fill);
10692+ if (error)
10693+ goto out;
10694+ }
10695+
10696+ else if (fill->subrsb && !fill->lkb) {
10697+ error = pack_subrsbs(rsb, fill->subrsb, fill);
10698+ if (error)
10699+ goto out;
10700+ }
10701+
10702+ else if (fill->subrsb && fill->lkb) {
10703+ error = pack_lkb_remaining(fill->subrsb, fill);
10704+ if (error)
10705+ goto out;
10706+
10707+ if (last_in_list(fill->subrsb, &fill->rsb->res_subreslist))
10708+ goto out;
10709+
10710+ subrsb = next_subrsb(fill->subrsb);
10711+
10712+ error = pack_subrsbs(rsb, subrsb, fill);
10713+ if (error)
10714+ goto out;
10715+ }
10716+
10717+ fill->subrsb = NULL;
10718+ fill->lkb = NULL;
10719+
10720+ out:
10721+ return error;
10722+}
10723+
10724+/*
10725+ * Pack an RSB, all its LKB's, all its subrsb's and all their LKB's into a
10726+ * buffer. When the buffer runs out of space, save the place to restart (the
10727+ * queue+lkb, subrsb, or subrsb+queue+lkb which wouldn't fit).
10728+ */
10729+
10d56c87
AM
10730+static int pack_rsb_tree(struct dlm_ls *ls, struct dlm_rsb *rsb,
10731+ rcom_fill_t *fill)
4bf12011 10732+{
10733+ int error = -ENOSPC;
10734+
10735+ fill->remasterid = 0;
10736+
10737+ /*
10738+ * Pack the root rsb itself. A 1 byte type precedes the serialised
10739+ * rsb. Then pack the lkb's for the root rsb.
10740+ */
10741+
10742+ down_write(&rsb->res_lock);
10743+
10744+ if (fill->offset + 1 + rsb_length(rsb) > fill->maxlen)
10745+ goto out;
10746+
10747+ rsb->res_remasterid = ++fill->remasterid;
10748+ put_char(REMASTER_ROOTRSB, fill->outbuf, &fill->offset);
10749+ serialise_rsb(rsb, fill->outbuf, &fill->offset);
10750+
10751+ error = pack_lkb_queues(rsb, fill);
10752+ if (error)
10753+ goto out;
10754+
10755+ up_write(&rsb->res_lock);
10756+
10757+ /*
10758+ * Pack subrsb/lkb's under the root rsb.
10759+ */
10760+
10761+ error = pack_subrsbs(rsb, NULL, fill);
10762+
10763+ return error;
10764+
10765+ out:
10766+ up_write(&rsb->res_lock);
10767+ return error;
10768+}
10769+
10770+/*
10771+ * Given an RSB, return the next RSB that should be sent to a new master.
10772+ */
10773+
10d56c87
AM
10774+static struct dlm_rsb *next_remastered_rsb(struct dlm_ls *ls,
10775+ struct dlm_rsb *rsb)
4bf12011 10776+{
10777+ struct list_head *tmp, *start, *end;
10d56c87 10778+ struct dlm_rsb *r;
4bf12011 10779+
10780+ if (!rsb)
10781+ start = ls->ls_rootres.next;
10782+ else
10783+ start = rsb->res_rootlist.next;
10784+
10785+ end = &ls->ls_rootres;
10786+
10787+ for (tmp = start; tmp != end; tmp = tmp->next) {
10d56c87 10788+ r = list_entry(tmp, struct dlm_rsb, res_rootlist);
4bf12011 10789+
10790+ if (test_bit(RESFL_NEW_MASTER, &r->res_flags)) {
10791+ if (r->res_nodeid && lkbs_to_remaster(r)) {
10792+ expect_new_lkids(r);
10793+ return r;
10794+ } else
10795+ clear_bit(RESFL_NEW_MASTER, &r->res_flags);
10796+ }
10797+ }
10798+
10799+ return NULL;
10800+}
10801+
10802+/*
10803+ * Given an rcom buffer, fill it with RSB's that need to be sent to a single
10804+ * new master node. In the case where all the data to send to one node
10805+ * requires multiple messages, this function needs to resume filling each
10806+ * successive buffer from the point where it left off when the previous buffer
10807+ * filled up.
10808+ */
10809+
10d56c87
AM
10810+static void fill_rcom_buffer(struct dlm_ls *ls, rcom_fill_t *fill,
10811+ uint32_t *nodeid)
4bf12011 10812+{
10d56c87 10813+ struct dlm_rsb *rsb, *prev_rsb = fill->rsb;
4bf12011 10814+ int error;
10815+
10816+ fill->offset = 0;
10817+
10818+ if (!prev_rsb) {
10819+
10820+ /*
10821+ * The first time this function is called.
10822+ */
10823+
10824+ rsb = next_remastered_rsb(ls, NULL);
10825+ if (!rsb)
10826+ goto no_more;
10827+
10828+ } else if (fill->subrsb || fill->lkb) {
10829+
10830+ /*
10831+ * Continue packing an rsb tree that was partially packed last
10832+ * time (fill->subrsb/lkb indicates where packing of last block
10833+ * left off)
10834+ */
10835+
10836+ rsb = prev_rsb;
10837+ *nodeid = rsb->res_nodeid;
10838+
10839+ error = pack_rsb_tree_remaining(ls, rsb, fill);
10840+ if (error == -ENOSPC)
10841+ goto more;
10842+
10843+ rsb = next_remastered_rsb(ls, prev_rsb);
10844+ if (!rsb)
10845+ goto no_more;
10846+
10847+ if (rsb->res_nodeid != prev_rsb->res_nodeid)
10848+ goto more;
10849+ } else {
10850+ rsb = prev_rsb;
10851+ }
10852+
10853+ /*
10854+ * Pack rsb trees into the buffer until we run out of space, run out of
10855+ * new rsb's or hit a new nodeid.
10856+ */
10857+
10858+ *nodeid = rsb->res_nodeid;
10859+
10860+ for (;;) {
10861+ error = pack_rsb_tree(ls, rsb, fill);
10862+ if (error == -ENOSPC)
10863+ goto more;
10864+
10865+ prev_rsb = rsb;
10866+
10867+ rsb = next_remastered_rsb(ls, prev_rsb);
10868+ if (!rsb)
10869+ goto no_more;
10870+
10871+ if (rsb->res_nodeid != prev_rsb->res_nodeid)
10872+ goto more;
10873+ }
10874+
10875+ more:
10876+ fill->more = 1;
10877+ fill->rsb = rsb;
10878+ return;
10879+
10880+ no_more:
10881+ fill->more = 0;
10882+}
10883+
10884+/*
10885+ * Send lkb's (and subrsb/lkbs) for remastered root rsbs to new masters.
10886+ */
10887+
10d56c87 10888+int rebuild_rsbs_send(struct dlm_ls *ls)
4bf12011 10889+{
10d56c87 10890+ struct dlm_rcom *rc;
4bf12011 10891+ rcom_fill_t fill;
10892+ uint32_t nodeid;
10893+ int error;
10894+
10d56c87 10895+ DLM_ASSERT(recover_list_empty(ls),);
4bf12011 10896+
10897+ log_all(ls, "rebuild locks");
10898+
10899+ error = -ENOMEM;
10900+ rc = allocate_rcom_buffer(ls);
10901+ if (!rc)
10902+ goto ret;
10903+
10904+ error = 0;
10905+ memset(&fill, 0, sizeof(rcom_fill_t));
10906+ fill.outbuf = rc->rc_buf;
10d56c87 10907+ fill.maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
4bf12011 10908+
10909+ do {
10910+ fill_rcom_buffer(ls, &fill, &nodeid);
10911+ if (!fill.offset)
10912+ break;
10913+
10914+ rc->rc_datalen = fill.offset;
10915+ error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKS, rc, 0);
10916+ if (error)
10917+ goto out;
10918+
10919+ schedule();
10d56c87 10920+ error = dlm_recovery_stopped(ls);
4bf12011 10921+ if (error)
10922+ goto out;
10923+ }
10924+ while (fill.more);
10925+
10d56c87 10926+ error = dlm_wait_function(ls, &recover_list_empty);
4bf12011 10927+
10928+ log_all(ls, "rebuilt %d locks", fill.count);
10929+
10930+ out:
10931+ rebuild_freemem(ls);
10932+ free_rcom_buffer(rc);
10933+
10934+ ret:
10935+ return error;
10936+}
10937+
10d56c87
AM
10938+static struct dlm_rsb *find_by_remasterid(struct dlm_ls *ls, int remasterid,
10939+ struct dlm_rsb *rootrsb)
4bf12011 10940+{
10d56c87 10941+ struct dlm_rsb *rsb;
4bf12011 10942+
10d56c87 10943+ DLM_ASSERT(rootrsb,);
4bf12011 10944+
10945+ if (rootrsb->res_remasterid == remasterid) {
10946+ rsb = rootrsb;
10947+ goto out;
10948+ }
10949+
10950+ list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) {
10951+ if (rsb->res_remasterid == remasterid)
10952+ goto out;
10953+ }
10954+ rsb = NULL;
10955+
10956+ out:
10957+ return rsb;
10958+}
10959+
10960+/*
10961+ * Search a queue for the given remote lock id (remlkid).
10962+ */
10963+
10d56c87
AM
10964+static struct dlm_lkb *search_remlkid(struct list_head *statequeue, int nodeid,
10965+ int remid)
4bf12011 10966+{
10d56c87 10967+ struct dlm_lkb *lkb;
4bf12011 10968+
10969+ list_for_each_entry(lkb, statequeue, lkb_statequeue) {
10970+ if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) {
10971+ return lkb;
10972+ }
10973+ }
10974+
10975+ return NULL;
10976+}
10977+
10978+/*
10979+ * Given a remote lock ID (and a parent resource), return the local LKB for it
10980+ * Hopefully we dont need to do this too often on deep lock trees. This is
10981+ * VERY suboptimal for anything but the smallest lock trees. It searches the
10982+ * lock tree for an LKB with the remote id "remid" and the node "nodeid" and
10983+ * returns the LKB address. OPTIMISATION: we should keep a list of these while
10984+ * we are building up the remastered LKBs
10985+ */
10986+
10d56c87
AM
10987+static struct dlm_lkb *find_by_remlkid(struct dlm_rsb *rootrsb, int nodeid,
10988+ int remid)
4bf12011 10989+{
10d56c87
AM
10990+ struct dlm_lkb *lkb;
10991+ struct dlm_rsb *rsb;
4bf12011 10992+
10993+ lkb = search_remlkid(&rootrsb->res_grantqueue, nodeid, remid);
10994+ if (lkb)
10995+ goto out;
10996+
10997+ lkb = search_remlkid(&rootrsb->res_convertqueue, nodeid, remid);
10998+ if (lkb)
10999+ goto out;
11000+
11001+ lkb = search_remlkid(&rootrsb->res_waitqueue, nodeid, remid);
11002+ if (lkb)
11003+ goto out;
11004+
11005+ list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) {
11006+ lkb = search_remlkid(&rsb->res_grantqueue, nodeid, remid);
11007+ if (lkb)
11008+ goto out;
11009+
11010+ lkb = search_remlkid(&rsb->res_convertqueue, nodeid, remid);
11011+ if (lkb)
11012+ goto out;
11013+
11014+ lkb = search_remlkid(&rsb->res_waitqueue, nodeid, remid);
11015+ if (lkb)
11016+ goto out;
11017+ }
11018+ lkb = NULL;
11019+
11020+ out:
11021+ return lkb;
11022+}
11023+
11024+/*
11025+ * Unpack an LKB from a remaster operation
11026+ */
11027+
10d56c87
AM
11028+static int deserialise_lkb(struct dlm_ls *ls, int rem_nodeid,
11029+ struct dlm_rsb *rootrsb, char *buf, int *ptr,
11030+ char *outbuf, int *outoffp)
4bf12011 11031+{
10d56c87
AM
11032+ struct dlm_lkb *lkb;
11033+ struct dlm_rsb *rsb;
4bf12011 11034+ int error = -ENOMEM, parentid, rsb_rmid, remote_lkid, status, temp;
11035+
11036+ remote_lkid = get_int(buf, ptr);
11037+
11038+ rsb_rmid = get_int(buf, ptr);
11039+ rsb = find_by_remasterid(ls, rsb_rmid, rootrsb);
10d56c87 11040+ DLM_ASSERT(rsb, printk("no RSB for remasterid %d\n", rsb_rmid););
4bf12011 11041+
11042+ /*
11043+ * We could have received this lkb already from a previous recovery
11044+ * that was interrupted. If so, just return the lkid to the remote
11045+ * node.
11046+ */
11047+ lkb = find_by_remlkid(rsb, rem_nodeid, remote_lkid);
11048+ if (lkb)
11049+ goto put_lkid;
11050+
11051+ lkb = create_lkb(rsb->res_ls);
11052+ if (!lkb)
11053+ goto out;
11054+
11055+ lkb->lkb_remid = remote_lkid;
11056+ lkb->lkb_flags = get_int(buf, ptr);
11057+ status = get_int(buf, ptr);
11058+ lkb->lkb_rqmode = get_char(buf, ptr);
11059+ lkb->lkb_grmode = get_char(buf, ptr);
11060+ atomic_set(&lkb->lkb_childcnt, get_int(buf, ptr));
11061+
11062+ parentid = get_int(buf, ptr);
11063+ lkb->lkb_bastaddr = (void *) (long) get_int(buf, ptr);
11064+
11065+ if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) {
11066+ lkb->lkb_lvbptr = allocate_lvb(ls);
11067+ if (!lkb->lkb_lvbptr)
11068+ goto out;
11069+ get_bytes(lkb->lkb_lvbptr, &temp, buf, ptr);
11070+ }
11071+
11072+ if (lkb->lkb_flags & GDLM_LKFLG_RANGE) {
11073+ uint64_t start, end;
11074+
11075+ /* Don't need to keep the range flag, for comms use only */
11076+ lkb->lkb_flags &= ~GDLM_LKFLG_RANGE;
11077+ start = get_int64(buf, ptr);
11078+ end = get_int64(buf, ptr);
11079+
11080+ lkb->lkb_range = allocate_range(rsb->res_ls);
11081+ if (!lkb->lkb_range)
11082+ goto out;
11083+
11084+ switch (status) {
11085+ case GDLM_LKSTS_CONVERT:
11086+ lkb->lkb_range[RQ_RANGE_START] = start;
11087+ lkb->lkb_range[RQ_RANGE_END] = end;
11088+ start = get_int64(buf, ptr);
11089+ end = get_int64(buf, ptr);
11090+ lkb->lkb_range[GR_RANGE_START] = start;
11091+ lkb->lkb_range[GR_RANGE_END] = end;
11092+
11093+ case GDLM_LKSTS_WAITING:
11094+ lkb->lkb_range[RQ_RANGE_START] = start;
11095+ lkb->lkb_range[RQ_RANGE_END] = end;
11096+ break;
11097+
11098+ case GDLM_LKSTS_GRANTED:
11099+ lkb->lkb_range[GR_RANGE_START] = start;
11100+ lkb->lkb_range[GR_RANGE_END] = end;
11101+ break;
11102+ default:
10d56c87 11103+ DLM_ASSERT(0,);
4bf12011 11104+ }
11105+ }
11106+
11107+ /* Resolve local lock LKB address from parent ID */
11108+ if (parentid)
11109+ lkb->lkb_parent = find_by_remlkid(rootrsb, rem_nodeid,
11110+ parentid);
11111+
11112+ atomic_inc(&rsb->res_ref);
11113+ lkb->lkb_resource = rsb;
11114+
11115+ lkb->lkb_flags |= GDLM_LKFLG_MSTCPY;
11116+ lkb->lkb_nodeid = rem_nodeid;
11117+
11118+ /*
11119+ * Put the lkb on an RSB queue. An lkb that's in the midst of a
11120+ * conversion request (on the requesting node's lockqueue and has
11121+ * LQCONVERT set) should be put on the granted queue. The convert
11122+ * request will be resent by the requesting node.
11123+ */
11124+
11125+ if (lkb->lkb_flags & GDLM_LKFLG_LQCONVERT) {
11126+ lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT;
10d56c87 11127+ DLM_ASSERT(status == GDLM_LKSTS_CONVERT,
4bf12011 11128+ printk("status=%d\n", status););
11129+ lkb->lkb_rqmode = DLM_LOCK_IV;
11130+ status = GDLM_LKSTS_GRANTED;
11131+ }
11132+
11133+ lkb_enqueue(rsb, lkb, status);
11134+
11135+ /*
11136+ * Update the rsb lvb if the lkb's lvb is up to date (grmode > NL).
11137+ */
11138+
11139+ if ((lkb->lkb_flags & GDLM_LKFLG_VALBLK)
11140+ && lkb->lkb_grmode > DLM_LOCK_NL) {
11141+ if (!rsb->res_lvbptr)
11142+ rsb->res_lvbptr = allocate_lvb(rsb->res_ls);
11143+ if (!rsb->res_lvbptr)
11144+ goto out;
11145+ memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN);
11146+ }
11147+
11148+ /*
11149+ * Clear flags that may have been sent over that are only relevant in
11150+ * the context of the sender.
11151+ */
11152+
5cdbd17b
AM
11153+ lkb->lkb_flags &= ~(GDLM_LKFLG_DELETED | GDLM_LKFLG_LQRESEND |
11154+ GDLM_LKFLG_NOREBUILD | GDLM_LKFLG_DEMOTED);
4bf12011 11155+
11156+ put_lkid:
11157+ /* Return the new LKID to the caller's buffer */
11158+ put_int(lkb->lkb_id, outbuf, outoffp);
11159+ put_int(lkb->lkb_remid, outbuf, outoffp);
11160+ error = 0;
11161+
11162+ out:
11163+ return error;
11164+}
11165+
10d56c87
AM
11166+static struct dlm_rsb *deserialise_rsb(struct dlm_ls *ls, int nodeid,
11167+ struct dlm_rsb *rootrsb, char *buf,
11168+ int *ptr)
4bf12011 11169+{
11170+ int length;
11171+ int remasterid;
11172+ int parent_remasterid;
11173+ char name[DLM_RESNAME_MAXLEN];
11174+ int error;
10d56c87
AM
11175+ struct dlm_rsb *parent = NULL;
11176+ struct dlm_rsb *rsb;
4bf12011 11177+
11178+ get_bytes(name, &length, buf, ptr);
11179+ remasterid = get_int(buf, ptr);
11180+ parent_remasterid = get_int(buf, ptr);
11181+
11182+ if (parent_remasterid)
11183+ parent = find_by_remasterid(ls, parent_remasterid, rootrsb);
11184+
11185+ /*
11186+ * The rsb reference from this find_or_create_rsb() will keep the rsb
11187+ * around while we add new lkb's to it from deserialise_lkb. Each of
11188+ * the lkb's will add an rsb reference. The reference added here is
11189+ * removed by release_rsb() after all lkb's are added.
11190+ */
11191+
11192+ error = find_or_create_rsb(ls, parent, name, length, 1, &rsb);
10d56c87 11193+ DLM_ASSERT(!error,);
4bf12011 11194+
11195+ /* There is a case where the above needs to create the RSB. */
11196+ if (rsb->res_nodeid == -1)
11197+ rsb->res_nodeid = our_nodeid();
11198+
11199+ rsb->res_remasterid = remasterid;
11200+
11201+ return rsb;
11202+}
11203+
11204+/*
11205+ * Processing at the receiving end of a NEWLOCKS message from a node in
11206+ * rebuild_rsbs_send(). Rebuild a remastered lock tree. Nodeid is the remote
11207+ * node whose locks we are now mastering. For a reply we need to send back the
11208+ * new lockids of the remastered locks so that remote ops can find them.
11209+ */
11210+
10d56c87 11211+int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len)
4bf12011 11212+{
10d56c87
AM
11213+ struct dlm_rcom *rc;
11214+ struct dlm_rsb *rsb = NULL;
4bf12011 11215+ rebuild_node_t *rnode;
11216+ char *outbuf;
11217+ int outptr, ptr = 0, error = -ENOMEM;
11218+
11219+ rnode = find_rebuild_root(ls, nodeid);
11220+ if (!rnode)
11221+ goto out;
11222+
11223+ /*
11224+ * Allocate a buffer for the reply message which is a list of remote
11225+ * lock IDs and their (new) local lock ids. It will always be big
11226+ * enough to fit <n> ID pairs if it already fit <n> LKBs.
11227+ */
11228+
11229+ rc = allocate_rcom_buffer(ls);
11230+ if (!rc)
11231+ goto out;
11232+ outbuf = rc->rc_buf;
11233+ outptr = 0;
11234+
11235+ /*
11236+ * Unpack RSBs and LKBs, saving new LKB id's in outbuf as they're
11237+ * created. Each deserialise_rsb adds an rsb reference that must be
11238+ * removed with release_rsb once all new lkb's for an rsb have been
11239+ * added.
11240+ */
11241+
11242+ while (ptr < len) {
11243+ int type;
11244+
11245+ type = get_char(buf, &ptr);
11246+
11247+ switch (type) {
11248+ case REMASTER_ROOTRSB:
11249+ if (rsb)
11250+ release_rsb(rsb);
11251+ rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf,
11252+ &ptr);
11253+ rnode->rootrsb = rsb;
11254+ break;
11255+
11256+ case REMASTER_RSB:
11257+ if (rsb)
11258+ release_rsb(rsb);
11259+ rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf,
11260+ &ptr);
11261+ break;
11262+
11263+ case REMASTER_LKB:
11264+ deserialise_lkb(ls, nodeid, rnode->rootrsb, buf, &ptr,
11265+ outbuf, &outptr);
11266+ break;
11267+
11268+ default:
10d56c87 11269+ DLM_ASSERT(0, printk("type=%d nodeid=%u ptr=%d "
4bf12011 11270+ "len=%d\n", type, nodeid, ptr,
11271+ len););
11272+ }
11273+ }
11274+
11275+ if (rsb)
11276+ release_rsb(rsb);
11277+
11278+ /*
11279+ * Reply with the new lock IDs.
11280+ */
11281+
11282+ rc->rc_datalen = outptr;
11283+ error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKIDS, rc, 0);
11284+
11285+ free_rcom_buffer(rc);
11286+
11287+ out:
11288+ return error;
11289+}
11290+
11291+/*
11292+ * Processing for a NEWLOCKIDS message. Called when we get the reply from the
11293+ * new master telling us what the new remote lock IDs are for the remastered
11294+ * locks
11295+ */
11296+
10d56c87 11297+int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len)
4bf12011 11298+{
11299+ int offset = 0;
11300+
11301+ if (len == 1)
11302+ len = 0;
11303+
11304+ while (offset < len) {
11305+ int remote_id;
11306+ int local_id;
10d56c87 11307+ struct dlm_lkb *lkb;
4bf12011 11308+
11309+ if (offset + 8 > len) {
11310+ log_error(ls, "rebuild_rsbs_lkids_recv: bad data "
11311+ "length nodeid=%d offset=%d len=%d",
11312+ nodeid, offset, len);
11313+ break;
11314+ }
11315+
11316+ remote_id = get_int(buf, &offset);
11317+ local_id = get_int(buf, &offset);
11318+
11319+ lkb = find_lock_by_id(ls, local_id);
11320+ if (lkb) {
11321+ lkb->lkb_remid = remote_id;
11322+ have_new_lkid(lkb);
11323+ } else {
11324+ log_error(ls, "rebuild_rsbs_lkids_recv: unknown lkid "
11325+ "nodeid=%d id=%x remid=%x offset=%d len=%d",
11326+ nodeid, local_id, remote_id, offset, len);
11327+ }
11328+ }
11329+
11330+ if (recover_list_empty(ls))
11331+ wake_up(&ls->ls_wait_general);
11332+
11333+ return 0;
11334+}
11335diff -urN linux-orig/cluster/dlm/rebuild.h linux-patched/cluster/dlm/rebuild.h
11336--- linux-orig/cluster/dlm/rebuild.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 11337+++ linux-patched/cluster/dlm/rebuild.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 11338@@ -0,0 +1,22 @@
11339+/******************************************************************************
11340+*******************************************************************************
11341+**
11342+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
11343+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
11344+**
11345+** This copyrighted material is made available to anyone wishing to use,
11346+** modify, copy, or redistribute it subject to the terms and conditions
11347+** of the GNU General Public License v.2.
11348+**
11349+*******************************************************************************
11350+******************************************************************************/
11351+
11352+#ifndef __REBUILD_DOT_H__
11353+#define __REBUILD_DOT_H__
11354+
10d56c87
AM
11355+int rebuild_rsbs_send(struct dlm_ls *ls);
11356+int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len);
11357+int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len);
11358+int rebuild_freemem(struct dlm_ls *ls);
4bf12011 11359+
11360+#endif /* __REBUILD_DOT_H__ */
11361diff -urN linux-orig/cluster/dlm/reccomms.c linux-patched/cluster/dlm/reccomms.c
11362--- linux-orig/cluster/dlm/reccomms.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
11363+++ linux-patched/cluster/dlm/reccomms.c 2004-07-13 18:57:22.000000000 +0800
11364@@ -0,0 +1,504 @@
4bf12011 11365+/******************************************************************************
11366+*******************************************************************************
11367+**
11368+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
11369+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
11370+**
11371+** This copyrighted material is made available to anyone wishing to use,
11372+** modify, copy, or redistribute it subject to the terms and conditions
11373+** of the GNU General Public License v.2.
11374+**
11375+*******************************************************************************
11376+******************************************************************************/
11377+
11378+#include "dlm_internal.h"
11379+#include "lowcomms.h"
11380+#include "midcomms.h"
11381+#include "reccomms.h"
11382+#include "nodes.h"
11383+#include "lockspace.h"
11384+#include "recover.h"
11385+#include "dir.h"
11386+#include "config.h"
11387+#include "rebuild.h"
11388+#include "memory.h"
11389+
11390+/* Running on the basis that only a single recovery communication will be done
11391+ * at a time per lockspace */
11392+
10d56c87 11393+static void rcom_process_message(struct dlm_ls * ls, uint32_t nodeid, struct dlm_rcom * rc);
4bf12011 11394+
11395+/*
11396+ * Track per-node progress/stats during recovery to help debugging.
11397+ */
11398+
10d56c87 11399+void rcom_log(struct dlm_ls *ls, int nodeid, struct dlm_rcom *rc, int send)
4bf12011 11400+{
10d56c87 11401+ struct dlm_csb *csb;
4bf12011 11402+ int found = 0;
11403+
10d56c87
AM
11404+ list_for_each_entry(csb, &ls->ls_nodes, list) {
11405+ if (csb->node->nodeid == nodeid) {
4bf12011 11406+ found = TRUE;
11407+ break;
11408+ }
11409+ }
11410+
11411+ if (!found)
11412+ return;
11413+
11414+ if (rc->rc_subcmd == RECCOMM_RECOVERNAMES) {
11415+ if (send) {
10d56c87
AM
11416+ csb->names_send_count++;
11417+ csb->names_send_msgid = rc->rc_msgid;
4bf12011 11418+ } else {
10d56c87
AM
11419+ csb->names_recv_count++;
11420+ csb->names_recv_msgid = rc->rc_msgid;
4bf12011 11421+ }
11422+ } else if (rc->rc_subcmd == RECCOMM_NEWLOCKS) {
11423+ if (send) {
10d56c87
AM
11424+ csb->locks_send_count++;
11425+ csb->locks_send_msgid = rc->rc_msgid;
4bf12011 11426+ } else {
10d56c87
AM
11427+ csb->locks_recv_count++;
11428+ csb->locks_recv_msgid = rc->rc_msgid;
4bf12011 11429+ }
11430+ }
11431+}
11432+
10d56c87 11433+void rcom_log_clear(struct dlm_ls *ls)
4bf12011 11434+{
10d56c87 11435+ struct dlm_csb *csb;
4bf12011 11436+
10d56c87
AM
11437+ list_for_each_entry(csb, &ls->ls_nodes, list) {
11438+ csb->names_send_count = 0;
11439+ csb->names_send_msgid = 0;
11440+ csb->names_recv_count = 0;
11441+ csb->names_recv_msgid = 0;
11442+ csb->locks_send_count = 0;
11443+ csb->locks_send_msgid = 0;
11444+ csb->locks_recv_count = 0;
11445+ csb->locks_recv_msgid = 0;
4bf12011 11446+ }
11447+}
11448+
10d56c87 11449+static int rcom_response(struct dlm_ls *ls)
4bf12011 11450+{
11451+ return test_bit(LSFL_RECCOMM_READY, &ls->ls_flags);
11452+}
11453+
11454+/**
11455+ * rcom_send_message - send or request recovery data
11456+ * @ls: the lockspace
11457+ * @nodeid: node to which the message is sent
11458+ * @type: type of recovery message
11459+ * @rc: the rc buffer to send
11460+ * @need_reply: wait for reply if this is set
11461+ *
11462+ * Using this interface
11463+ * i) Allocate an rc buffer:
11464+ * rc = allocate_rcom_buffer(ls);
11465+ * ii) Copy data to send beginning at rc->rc_buf:
11466+ * memcpy(rc->rc_buf, mybuf, mylen);
11467+ * iii) Set rc->rc_datalen to the number of bytes copied in (ii):
11468+ * rc->rc_datalen = mylen
11469+ * iv) Submit the rc to this function:
11470+ * rcom_send_message(rc);
11471+ *
10d56c87
AM
11472+ * The max value of "mylen" is dlm_config.buffer_size - sizeof(struct
11473+ * dlm_rcom). If more data must be passed in one send, use
11474+ * rcom_expand_buffer() which incrementally increases the size of the rc buffer
11475+ * by dlm_config.buffer_size bytes.
4bf12011 11476+ *
11477+ * Any data returned for the message (when need_reply is set) will saved in
11478+ * rc->rc_buf when this function returns and rc->rc_datalen will be set to the
11479+ * number of bytes copied into rc->rc_buf.
11480+ *
11481+ * Returns: 0 on success, -EXXX on failure
11482+ */
11483+
10d56c87
AM
11484+int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type,
11485+ struct dlm_rcom *rc, int need_reply)
4bf12011 11486+{
11487+ int error = 0;
11488+
11489+ if (!rc->rc_datalen)
11490+ rc->rc_datalen = 1;
11491+
11492+ /*
11493+ * Fill in the header.
11494+ */
11495+
11496+ rc->rc_header.rh_cmd = GDLM_REMCMD_RECOVERMESSAGE;
11497+ rc->rc_header.rh_lockspace = ls->ls_global_id;
10d56c87 11498+ rc->rc_header.rh_length = sizeof(struct dlm_rcom) + rc->rc_datalen - 1;
4bf12011 11499+ rc->rc_subcmd = type;
11500+ rc->rc_msgid = ++ls->ls_rcom_msgid;
11501+
11502+ rcom_log(ls, nodeid, rc, 1);
11503+
11504+ /*
11505+ * When a reply is received, the reply data goes back into this buffer.
11506+ * Synchronous rcom requests (need_reply=1) are serialised because of
11507+ * the single ls_rcom.
11508+ */
11509+
11510+ if (need_reply) {
11511+ down(&ls->ls_rcom_lock);
11512+ ls->ls_rcom = rc;
11513+ }
11514+
11515+ /*
11516+ * After sending the message we'll wait at the end of this function to
11517+ * get a reply. The READY flag will be set when the reply has been
11518+ * received and requested data has been copied into
11519+ * ls->ls_rcom->rc_buf;
11520+ */
11521+
10d56c87 11522+ DLM_ASSERT(!test_bit(LSFL_RECCOMM_READY, &ls->ls_flags),);
4bf12011 11523+
11524+ /*
11525+ * The WAIT bit indicates that we're waiting for and willing to accept a
11526+ * reply. Any replies are ignored unless this bit is set.
11527+ */
11528+
11529+ set_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags);
11530+
11531+ /*
11532+ * Process the message locally.
11533+ */
11534+
11535+ if (nodeid == our_nodeid()) {
11536+ rcom_process_message(ls, nodeid, rc);
11537+ goto out;
11538+ }
11539+
11540+ /*
11541+ * Send the message.
11542+ */
11543+
11544+ log_debug(ls, "rcom send %d to %u id %u", type, nodeid, rc->rc_msgid);
11545+
10d56c87 11546+ error = midcomms_send_message(nodeid, (struct dlm_header *) rc,
4bf12011 11547+ GFP_KERNEL);
10d56c87 11548+ DLM_ASSERT(error >= 0, printk("error = %d\n", error););
4bf12011 11549+ error = 0;
11550+
11551+ /*
11552+ * Wait for a reply. Once a reply is processed from midcomms, the
10d56c87 11553+ * READY bit will be set and we'll be awoken (dlm_wait_function will
4bf12011 11554+ * return 0).
11555+ */
11556+
11557+ if (need_reply) {
10d56c87 11558+ error = dlm_wait_function(ls, &rcom_response);
4bf12011 11559+ if (error)
11560+ log_debug(ls, "rcom wait error %d", error);
11561+ }
11562+
11563+ out:
11564+ clear_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags);
11565+ clear_bit(LSFL_RECCOMM_READY, &ls->ls_flags);
11566+
11567+ if (need_reply)
11568+ up(&ls->ls_rcom_lock);
11569+
11570+ return error;
11571+}
11572+
11573+/*
11574+ * Runs in same context as midcomms.
11575+ */
11576+
10d56c87 11577+static void rcom_process_message(struct dlm_ls *ls, uint32_t nodeid, struct dlm_rcom *rc)
4bf12011 11578+{
10d56c87
AM
11579+ struct dlm_rcom rc_stack;
11580+ struct dlm_rcom *reply = NULL;
4bf12011 11581+ int status, datalen, maxlen;
10d56c87 11582+ uint32_t r_nodeid, be_nodeid;
4bf12011 11583+
11584+ if (!ls)
11585+ return;
11586+
11587+ rcom_log(ls, nodeid, rc, 0);
11588+
10d56c87 11589+ if (dlm_recovery_stopped(ls) && (rc->rc_subcmd != RECCOMM_STATUS)) {
4bf12011 11590+ log_error(ls, "ignoring recovery message %x from %u",
11591+ rc->rc_subcmd, nodeid);
11592+ return;
11593+ }
11594+
11595+ switch (rc->rc_subcmd) {
11596+
11597+ case RECCOMM_STATUS:
11598+
10d56c87 11599+ memset(&rc_stack, 0, sizeof(struct dlm_rcom));
4bf12011 11600+ reply = &rc_stack;
11601+
11602+ reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY;
11603+ reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace;
11604+ reply->rc_subcmd = rc->rc_subcmd;
11605+ reply->rc_msgid = rc->rc_msgid;
11606+ reply->rc_buf[0] = 0;
11607+
11608+ if (test_bit(LSFL_RESDIR_VALID, &ls->ls_flags))
11609+ reply->rc_buf[0] |= RESDIR_VALID;
11610+
11611+ if (test_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags))
11612+ reply->rc_buf[0] |= RESDIR_ALL_VALID;
11613+
11614+ if (test_bit(LSFL_NODES_VALID, &ls->ls_flags))
11615+ reply->rc_buf[0] |= NODES_VALID;
11616+
11617+ if (test_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags))
11618+ reply->rc_buf[0] |= NODES_ALL_VALID;
11619+
11620+ reply->rc_datalen = 1;
11621+ reply->rc_header.rh_length =
10d56c87 11622+ sizeof(struct dlm_rcom) + reply->rc_datalen - 1;
4bf12011 11623+
11624+ log_debug(ls, "rcom status %x to %u", reply->rc_buf[0], nodeid);
11625+ break;
11626+
11627+ case RECCOMM_RECOVERNAMES:
11628+
11629+ reply = allocate_rcom_buffer(ls);
10d56c87
AM
11630+ DLM_ASSERT(reply,);
11631+ maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
4bf12011 11632+
11633+ reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY;
11634+ reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace;
11635+ reply->rc_subcmd = rc->rc_subcmd;
11636+ reply->rc_msgid = rc->rc_msgid;
11637+
11638+ /*
11639+ * The other node wants a bunch of resource names. The name of
11640+ * the resource to begin with is in rc->rc_buf.
11641+ */
11642+
10d56c87
AM
11643+ datalen = dlm_dir_rebuild_send(ls, rc->rc_buf, rc->rc_datalen,
11644+ reply->rc_buf, maxlen, nodeid);
4bf12011 11645+
11646+ reply->rc_datalen = datalen;
11647+ reply->rc_header.rh_length =
10d56c87 11648+ sizeof(struct dlm_rcom) + reply->rc_datalen - 1;
4bf12011 11649+
11650+ log_debug(ls, "rcom names len %d to %u id %u", datalen, nodeid,
11651+ reply->rc_msgid);
11652+ break;
11653+
11654+ case RECCOMM_GETMASTER:
11655+
11656+ reply = allocate_rcom_buffer(ls);
10d56c87 11657+ DLM_ASSERT(reply,);
4bf12011 11658+
11659+ reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY;
11660+ reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace;
11661+ reply->rc_subcmd = rc->rc_subcmd;
11662+ reply->rc_msgid = rc->rc_msgid;
11663+
11664+ /*
11665+ * The other node wants to know the master of a named resource.
11666+ */
11667+
10d56c87
AM
11668+ status = dlm_dir_lookup_recovery(ls, nodeid, rc->rc_buf,
11669+ rc->rc_datalen, &r_nodeid);
4bf12011 11670+ if (status != 0) {
11671+ free_rcom_buffer(reply);
11672+ reply = NULL;
11673+ return;
11674+ }
10d56c87 11675+ be_nodeid = cpu_to_be32(r_nodeid);
4bf12011 11676+ memcpy(reply->rc_buf, &be_nodeid, sizeof(uint32_t));
11677+ reply->rc_datalen = sizeof(uint32_t);
11678+ reply->rc_header.rh_length =
10d56c87 11679+ sizeof(struct dlm_rcom) + reply->rc_datalen - 1;
4bf12011 11680+ break;
11681+
11682+ case RECCOMM_BULKLOOKUP:
11683+
11684+ reply = allocate_rcom_buffer(ls);
10d56c87 11685+ DLM_ASSERT(reply,);
4bf12011 11686+
11687+ reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY;
11688+ reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace;
11689+ reply->rc_subcmd = rc->rc_subcmd;
11690+ reply->rc_msgid = rc->rc_msgid;
11691+
11692+ /*
11693+ * This is a bulk version of the above and just returns a
11694+ * buffer full of node ids to match the resources
11695+ */
11696+
11697+ datalen = bulk_master_lookup(ls, nodeid, rc->rc_buf,
11698+ rc->rc_datalen, reply->rc_buf);
11699+ if (datalen < 0) {
11700+ free_rcom_buffer(reply);
11701+ reply = NULL;
11702+ return;
11703+ }
11704+
11705+ reply->rc_datalen = datalen;
11706+ reply->rc_header.rh_length =
10d56c87 11707+ sizeof(struct dlm_rcom) + reply->rc_datalen - 1;
4bf12011 11708+ break;
11709+
11710+ /*
11711+ * These RECCOMM messages don't need replies.
11712+ */
11713+
11714+ case RECCOMM_NEWLOCKS:
11715+ rebuild_rsbs_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen);
11716+ break;
11717+
11718+ case RECCOMM_NEWLOCKIDS:
11719+ rebuild_rsbs_lkids_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen);
11720+ break;
11721+
11722+ case RECCOMM_REMRESDATA:
10d56c87 11723+ remove_resdata(ls, nodeid, rc->rc_buf, rc->rc_datalen);
4bf12011 11724+ break;
11725+
11726+ default:
10d56c87 11727+ DLM_ASSERT(0, printk("cmd=%x\n", rc->rc_subcmd););
4bf12011 11728+ }
11729+
11730+ if (reply) {
11731+ if (nodeid == our_nodeid()) {
10d56c87 11732+ DLM_ASSERT(rc == ls->ls_rcom,);
4bf12011 11733+ memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen);
11734+ rc->rc_datalen = reply->rc_datalen;
11735+ } else {
11736+ midcomms_send_message(nodeid,
10d56c87 11737+ (struct dlm_header *) reply,
4bf12011 11738+ GFP_KERNEL);
11739+ }
11740+
11741+ if (reply != &rc_stack)
11742+ free_rcom_buffer(reply);
11743+ }
11744+}
11745+
10d56c87
AM
11746+static void process_reply_sync(struct dlm_ls *ls, uint32_t nodeid,
11747+ struct dlm_rcom *reply)
4bf12011 11748+{
10d56c87 11749+ struct dlm_rcom *rc = ls->ls_rcom;
4bf12011 11750+
11751+ if (!test_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags)) {
11752+ log_error(ls, "unexpected rcom reply nodeid=%u", nodeid);
11753+ return;
11754+ }
11755+
11756+ if (reply->rc_msgid != le32_to_cpu(rc->rc_msgid)) {
11757+ log_error(ls, "unexpected rcom msgid %x/%x nodeid=%u",
11758+ reply->rc_msgid, le32_to_cpu(rc->rc_msgid), nodeid);
11759+ return;
11760+ }
11761+
11762+ memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen);
11763+ rc->rc_datalen = reply->rc_datalen;
11764+
11765+ /*
11766+ * Tell the thread waiting in rcom_send_message() that it can go ahead.
11767+ */
11768+
11769+ set_bit(LSFL_RECCOMM_READY, &ls->ls_flags);
11770+ wake_up(&ls->ls_wait_general);
11771+}
11772+
10d56c87
AM
11773+static void process_reply_async(struct dlm_ls *ls, uint32_t nodeid,
11774+ struct dlm_rcom *reply)
4bf12011 11775+{
11776+ restbl_rsb_update_recv(ls, nodeid, reply->rc_buf, reply->rc_datalen,
11777+ reply->rc_msgid);
11778+}
11779+
11780+/*
11781+ * Runs in same context as midcomms.
11782+ */
11783+
10d56c87
AM
11784+static void rcom_process_reply(struct dlm_ls *ls, uint32_t nodeid,
11785+ struct dlm_rcom *reply)
4bf12011 11786+{
10d56c87 11787+ if (dlm_recovery_stopped(ls)) {
4bf12011 11788+ log_error(ls, "ignoring recovery reply %x from %u",
11789+ reply->rc_subcmd, nodeid);
11790+ return;
11791+ }
11792+
11793+ switch (reply->rc_subcmd) {
11794+ case RECCOMM_GETMASTER:
11795+ process_reply_async(ls, nodeid, reply);
11796+ break;
11797+ case RECCOMM_STATUS:
11798+ case RECCOMM_NEWLOCKS:
11799+ case RECCOMM_NEWLOCKIDS:
11800+ case RECCOMM_RECOVERNAMES:
11801+ process_reply_sync(ls, nodeid, reply);
11802+ break;
11803+ default:
11804+ log_error(ls, "unknown rcom reply subcmd=%x nodeid=%u",
11805+ reply->rc_subcmd, nodeid);
11806+ }
11807+}
11808+
11809+
10d56c87 11810+static int send_ls_not_ready(uint32_t nodeid, struct dlm_header *header)
4bf12011 11811+{
11812+ struct writequeue_entry *wq;
10d56c87
AM
11813+ struct dlm_rcom *rc = (struct dlm_rcom *) header;
11814+ struct dlm_rcom *reply;
4bf12011 11815+
10d56c87 11816+ wq = lowcomms_get_buffer(nodeid, sizeof(struct dlm_rcom), GFP_KERNEL,
4bf12011 11817+ (char **)&reply);
11818+ if (!wq)
11819+ return -ENOMEM;
11820+
11821+ reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY;
11822+ reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace;
11823+ reply->rc_subcmd = rc->rc_subcmd;
11824+ reply->rc_msgid = rc->rc_msgid;
11825+ reply->rc_buf[0] = 0;
11826+
11827+ reply->rc_datalen = 1;
10d56c87 11828+ reply->rc_header.rh_length = sizeof(struct dlm_rcom) + reply->rc_datalen - 1;
4bf12011 11829+
10d56c87 11830+ midcomms_send_buffer((struct dlm_header *)reply, wq);
4bf12011 11831+ return 0;
11832+}
11833+
11834+
11835+/*
11836+ * Runs in same context as midcomms. Both recovery requests and recovery
11837+ * replies come through this function.
11838+ */
11839+
10d56c87 11840+void process_recovery_comm(uint32_t nodeid, struct dlm_header *header)
4bf12011 11841+{
10d56c87
AM
11842+ struct dlm_ls *ls = find_lockspace_by_global_id(header->rh_lockspace);
11843+ struct dlm_rcom *rc = (struct dlm_rcom *) header;
4bf12011 11844+
11845+ /* If the lockspace doesn't exist then still send a status message
11846+ back, it's possible that it just doesn't have it's global_id
11847+ yet. */
11848+ if (!ls) {
11849+ send_ls_not_ready(nodeid, header);
11850+ return;
11851+ }
11852+
11853+ switch (header->rh_cmd) {
11854+ case GDLM_REMCMD_RECOVERMESSAGE:
11855+ down_read(&ls->ls_rec_rsblist);
11856+ rcom_process_message(ls, nodeid, rc);
11857+ up_read(&ls->ls_rec_rsblist);
11858+ break;
11859+
11860+ case GDLM_REMCMD_RECOVERREPLY:
11861+ rcom_process_reply(ls, nodeid, rc);
11862+ break;
11863+
11864+ default:
10d56c87 11865+ DLM_ASSERT(0, printk("cmd=%x\n", header->rh_cmd););
4bf12011 11866+ }
11867+}
11868+
11869diff -urN linux-orig/cluster/dlm/reccomms.h linux-patched/cluster/dlm/reccomms.h
11870--- linux-orig/cluster/dlm/reccomms.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 11871+++ linux-patched/cluster/dlm/reccomms.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 11872@@ -0,0 +1,37 @@
11873+/******************************************************************************
11874+*******************************************************************************
11875+**
11876+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
11877+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
11878+**
11879+** This copyrighted material is made available to anyone wishing to use,
11880+** modify, copy, or redistribute it subject to the terms and conditions
11881+** of the GNU General Public License v.2.
11882+**
11883+*******************************************************************************
11884+******************************************************************************/
11885+
11886+#ifndef __RECCOMMS_DOT_H__
11887+#define __RECCOMMS_DOT_H__
11888+
11889+/* Bit flags */
11890+
11891+#define RESDIR_VALID (1)
11892+#define RESDIR_ALL_VALID (2)
11893+#define NODES_VALID (4)
11894+#define NODES_ALL_VALID (8)
11895+
11896+#define RECCOMM_STATUS (1)
11897+#define RECCOMM_RECOVERNAMES (2)
11898+#define RECCOMM_GETMASTER (3)
11899+#define RECCOMM_BULKLOOKUP (4)
11900+#define RECCOMM_NEWLOCKS (5)
11901+#define RECCOMM_NEWLOCKIDS (6)
11902+#define RECCOMM_REMRESDATA (7)
11903+
10d56c87
AM
11904+int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type,
11905+ struct dlm_rcom *rc, int need_reply);
11906+void process_recovery_comm(uint32_t nodeid, struct dlm_header *header);
11907+void rcom_log_clear(struct dlm_ls *ls);
4bf12011 11908+
11909+#endif
11910diff -urN linux-orig/cluster/dlm/recover.c linux-patched/cluster/dlm/recover.c
11911--- linux-orig/cluster/dlm/recover.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
11912+++ linux-patched/cluster/dlm/recover.c 2004-07-13 18:57:22.000000000 +0800
11913@@ -0,0 +1,610 @@
4bf12011 11914+/******************************************************************************
11915+*******************************************************************************
11916+**
11917+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
11918+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
11919+**
11920+** This copyrighted material is made available to anyone wishing to use,
11921+** modify, copy, or redistribute it subject to the terms and conditions
11922+** of the GNU General Public License v.2.
11923+**
11924+*******************************************************************************
11925+******************************************************************************/
11926+
11927+#include "dlm_internal.h"
11928+#include "reccomms.h"
11929+#include "dir.h"
11930+#include "locking.h"
11931+#include "rsb.h"
11932+#include "lockspace.h"
11933+#include "lkb.h"
11934+#include "nodes.h"
11935+#include "config.h"
11936+#include "ast.h"
11937+#include "memory.h"
11938+
11939+/*
11940+ * Called in recovery routines to check whether the recovery process has been
11941+ * interrupted/stopped by another transition. A recovery in-process will abort
11942+ * if the lockspace is "stopped" so that a new recovery process can start from
11943+ * the beginning when the lockspace is "started" again.
11944+ */
11945+
10d56c87 11946+int dlm_recovery_stopped(struct dlm_ls *ls)
4bf12011 11947+{
11948+ return test_bit(LSFL_LS_STOP, &ls->ls_flags);
11949+}
11950+
10d56c87 11951+static void dlm_wait_timer_fn(unsigned long data)
4bf12011 11952+{
10d56c87 11953+ struct dlm_ls *ls = (struct dlm_ls *) data;
4bf12011 11954+
11955+ wake_up(&ls->ls_wait_general);
11956+}
11957+
11958+/*
11959+ * Wait until given function returns non-zero or lockspace is stopped (LS_STOP
11960+ * set due to failure of a node in ls_nodes). When another function thinks it
11961+ * could have completed the waited-on task, they should wake up ls_wait_general
11962+ * to get an immediate response rather than waiting for the timer to detect the
11963+ * result. A timer wakes us up periodically while waiting to see if we should
11964+ * abort due to a node failure.
11965+ */
11966+
10d56c87 11967+int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls * ls))
4bf12011 11968+{
11969+ struct timer_list timer;
11970+ int error = 0;
11971+
11972+ init_timer(&timer);
10d56c87 11973+ timer.function = dlm_wait_timer_fn;
4bf12011 11974+ timer.data = (long) ls;
11975+
11976+ for (;;) {
11977+ mod_timer(&timer, jiffies + (5 * HZ));
11978+
11979+ wchan_cond_sleep_intr(ls->ls_wait_general,
11980+ !testfn(ls) &&
11981+ !test_bit(LSFL_LS_STOP, &ls->ls_flags));
11982+
11983+ if (timer_pending(&timer))
11984+ del_timer(&timer);
11985+
11986+ if (testfn(ls))
11987+ break;
11988+
11989+ if (test_bit(LSFL_LS_STOP, &ls->ls_flags)) {
11990+ error = -1;
11991+ break;
11992+ }
11993+ }
11994+
11995+ return error;
11996+}
11997+
10d56c87 11998+int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status)
4bf12011 11999+{
10d56c87
AM
12000+ struct dlm_rcom rc_stack, *rc;
12001+ struct dlm_csb *csb;
4bf12011 12002+ int status;
12003+ int error = 0;
12004+
10d56c87 12005+ memset(&rc_stack, 0, sizeof(struct dlm_rcom));
4bf12011 12006+ rc = &rc_stack;
12007+ rc->rc_datalen = 0;
12008+
10d56c87 12009+ list_for_each_entry(csb, &ls->ls_nodes, list) {
4bf12011 12010+ for (;;) {
10d56c87 12011+ error = dlm_recovery_stopped(ls);
4bf12011 12012+ if (error)
12013+ goto out;
12014+
10d56c87 12015+ error = rcom_send_message(ls, csb->node->nodeid,
4bf12011 12016+ RECCOMM_STATUS, rc, 1);
12017+ if (error)
12018+ goto out;
12019+
12020+ status = rc->rc_buf[0];
12021+ if (status & wait_status)
12022+ break;
12023+ else {
12024+ set_current_state(TASK_INTERRUPTIBLE);
12025+ schedule_timeout(HZ >> 1);
12026+ }
12027+ }
12028+ }
12029+
12030+ out:
12031+ return error;
12032+}
12033+
10d56c87 12034+int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status)
4bf12011 12035+{
10d56c87 12036+ struct dlm_rcom rc_stack, *rc;
4bf12011 12037+ uint32_t nodeid = ls->ls_low_nodeid;
12038+ int status;
12039+ int error = 0;
12040+
10d56c87 12041+ memset(&rc_stack, 0, sizeof(struct dlm_rcom));
4bf12011 12042+ rc = &rc_stack;
12043+ rc->rc_datalen = 0;
12044+
12045+ for (;;) {
10d56c87 12046+ error = dlm_recovery_stopped(ls);
4bf12011 12047+ if (error)
12048+ goto out;
12049+
12050+ error = rcom_send_message(ls, nodeid, RECCOMM_STATUS, rc, 1);
12051+ if (error)
12052+ break;
12053+
12054+ status = rc->rc_buf[0];
12055+ if (status & wait_status)
12056+ break;
12057+ else {
12058+ set_current_state(TASK_INTERRUPTIBLE);
12059+ schedule_timeout(HZ >> 1);
12060+ }
12061+ }
12062+
12063+ out:
12064+ return error;
12065+}
12066+
10d56c87 12067+static int purge_queue(struct dlm_ls *ls, struct list_head *queue)
4bf12011 12068+{
10d56c87
AM
12069+ struct dlm_lkb *lkb, *safe;
12070+ struct dlm_rsb *rsb;
4bf12011 12071+ int count = 0;
12072+
12073+ list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) {
12074+ if (!lkb->lkb_nodeid)
12075+ continue;
12076+
10d56c87 12077+ DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY,);
4bf12011 12078+
12079+ if (in_nodes_gone(ls, lkb->lkb_nodeid)) {
12080+ list_del(&lkb->lkb_statequeue);
12081+
12082+ rsb = lkb->lkb_resource;
12083+ lkb->lkb_status = 0;
12084+
12085+ if (lkb->lkb_status == GDLM_LKSTS_CONVERT
12086+ && &lkb->lkb_duetime)
12087+ remove_from_deadlockqueue(lkb);
12088+
12089+ release_lkb(ls, lkb);
12090+ release_rsb(rsb);
12091+ count++;
12092+ }
12093+ }
12094+
12095+ return count;
12096+}
12097+
12098+/*
12099+ * Go through local restbl and for each rsb we're master of, clear out any
12100+ * lkb's held by departed nodes.
12101+ */
12102+
10d56c87 12103+int restbl_lkb_purge(struct dlm_ls *ls)
4bf12011 12104+{
12105+ struct list_head *tmp2, *safe2;
12106+ int count = 0;
10d56c87 12107+ struct dlm_rsb *rootrsb, *safe, *rsb;
4bf12011 12108+
12109+ log_all(ls, "purge locks of departed nodes");
12110+
12111+ list_for_each_entry_safe(rootrsb, safe, &ls->ls_rootres, res_rootlist) {
12112+
4bf12011 12113+ if (rootrsb->res_nodeid)
12114+ continue;
12115+
12116+ hold_rsb(rootrsb);
12117+ down_write(&rootrsb->res_lock);
12118+
12119+ /* This traverses the subreslist in reverse order so we purge
12120+ * the children before their parents. */
12121+
12122+ for (tmp2 = rootrsb->res_subreslist.prev, safe2 = tmp2->prev;
12123+ tmp2 != &rootrsb->res_subreslist;
12124+ tmp2 = safe2, safe2 = safe2->prev) {
10d56c87 12125+ rsb = list_entry(tmp2, struct dlm_rsb, res_subreslist);
4bf12011 12126+
12127+ hold_rsb(rsb);
12128+ purge_queue(ls, &rsb->res_grantqueue);
12129+ purge_queue(ls, &rsb->res_convertqueue);
12130+ purge_queue(ls, &rsb->res_waitqueue);
12131+ release_rsb(rsb);
12132+ }
12133+ count += purge_queue(ls, &rootrsb->res_grantqueue);
12134+ count += purge_queue(ls, &rootrsb->res_convertqueue);
12135+ count += purge_queue(ls, &rootrsb->res_waitqueue);
12136+
12137+ up_write(&rootrsb->res_lock);
12138+ release_rsb(rootrsb);
12139+ }
12140+
12141+ log_all(ls, "purged %d locks", count);
12142+
12143+ return 0;
12144+}
12145+
12146+/*
12147+ * Grant any locks that have become grantable after a purge
12148+ */
12149+
10d56c87 12150+int restbl_grant_after_purge(struct dlm_ls *ls)
4bf12011 12151+{
10d56c87 12152+ struct dlm_rsb *root, *rsb, *safe;
4bf12011 12153+ int error = 0;
12154+
12155+ down_write(&ls->ls_gap_rsblist);
12156+
12157+ list_for_each_entry_safe(root, safe, &ls->ls_rootres, res_rootlist) {
12158+ /* only the rsb master grants locks */
12159+ if (root->res_nodeid)
12160+ continue;
12161+
12162+ if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) {
12163+ log_debug(ls, "restbl_grant_after_purge aborted");
12164+ error = -EINTR;
12165+ up_write(&ls->ls_gap_rsblist);
12166+ goto out;
12167+ }
12168+
12169+ down_write(&root->res_lock);
12170+ grant_pending_locks(root);
12171+ up_write(&root->res_lock);
12172+
12173+ list_for_each_entry(rsb, &root->res_subreslist, res_subreslist){
12174+ down_write(&rsb->res_lock);
12175+ grant_pending_locks(rsb);
12176+ up_write(&rsb->res_lock);
12177+ }
12178+ }
12179+ up_write(&ls->ls_gap_rsblist);
12180+ wake_astd();
12181+ out:
12182+ return error;
12183+}
12184+
12185+/*
12186+ * Set the lock master for all LKBs in a lock queue
12187+ */
12188+
12189+static void set_lock_master(struct list_head *queue, int nodeid)
12190+{
10d56c87 12191+ struct dlm_lkb *lkb;
4bf12011 12192+
12193+ list_for_each_entry(lkb, queue, lkb_statequeue) {
12194+ /* Don't muck around with pre-exising sublocks */
12195+ if (!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY))
12196+ lkb->lkb_nodeid = nodeid;
12197+ }
12198+}
12199+
10d56c87 12200+static void set_master_lkbs(struct dlm_rsb *rsb)
4bf12011 12201+{
12202+ set_lock_master(&rsb->res_grantqueue, rsb->res_nodeid);
12203+ set_lock_master(&rsb->res_convertqueue, rsb->res_nodeid);
12204+ set_lock_master(&rsb->res_waitqueue, rsb->res_nodeid);
12205+}
12206+
12207+/*
12208+ * This rsb struct is now the master so it is responsible for keeping the
12209+ * latest rsb. Find if any current lkb's have an up to date copy of the lvb to
12210+ * be used as the rsb copy. An equivalent step occurs as new lkb's arrive for
12211+ * this rsb in deserialise_lkb.
12212+ */
12213+
10d56c87 12214+static void set_rsb_lvb(struct dlm_rsb *rsb)
4bf12011 12215+{
10d56c87 12216+ struct dlm_lkb *lkb;
4bf12011 12217+
12218+ list_for_each_entry(lkb, &rsb->res_grantqueue, lkb_statequeue) {
12219+
12220+ if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) &&
12221+ (lkb->lkb_flags & GDLM_LKFLG_VALBLK) &&
12222+ (lkb->lkb_grmode > DLM_LOCK_NL))
12223+ {
12224+ if (!rsb->res_lvbptr)
12225+ rsb->res_lvbptr = allocate_lvb(rsb->res_ls);
12226+
12227+ memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN);
12228+ return;
12229+ }
12230+ }
12231+
12232+ list_for_each_entry(lkb, &rsb->res_convertqueue, lkb_statequeue) {
12233+
12234+ if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) &&
12235+ (lkb->lkb_flags & GDLM_LKFLG_VALBLK) &&
12236+ (lkb->lkb_grmode > DLM_LOCK_NL))
12237+ {
12238+ if (!rsb->res_lvbptr)
12239+ rsb->res_lvbptr = allocate_lvb(rsb->res_ls);
12240+
12241+ memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN);
12242+ return;
12243+ }
12244+ }
12245+}
12246+
12247+/*
12248+ * Propogate the new master nodeid to locks, subrsbs, sublocks.
12249+ * The NEW_MASTER flag tells rebuild_rsbs_send() which rsb's to consider.
12250+ */
12251+
10d56c87 12252+static void set_new_master(struct dlm_rsb *rsb)
4bf12011 12253+{
10d56c87 12254+ struct dlm_rsb *subrsb;
4bf12011 12255+
12256+ down_write(&rsb->res_lock);
12257+
12258+ if (rsb->res_nodeid == our_nodeid()) {
12259+ rsb->res_nodeid = 0;
12260+ set_rsb_lvb(rsb);
12261+ }
12262+
12263+ set_master_lkbs(rsb);
12264+
12265+ list_for_each_entry(subrsb, &rsb->res_subreslist, res_subreslist) {
12266+ subrsb->res_nodeid = rsb->res_nodeid;
12267+ set_master_lkbs(subrsb);
12268+ }
12269+
12270+ up_write(&rsb->res_lock);
12271+
12272+ set_bit(RESFL_NEW_MASTER, &rsb->res_flags);
12273+}
12274+
12275+/*
12276+ * The recover_list contains all the rsb's for which we've requested the new
12277+ * master nodeid. As replies are returned from the resource directories the
12278+ * rsb's are removed from the list. When the list is empty we're done.
12279+ *
12280+ * The recover_list is later similarly used for all rsb's for which we've sent
12281+ * new lkb's and need to receive new corresponding lkid's.
12282+ */
12283+
10d56c87 12284+int recover_list_empty(struct dlm_ls *ls)
4bf12011 12285+{
12286+ int empty;
12287+
12288+ spin_lock(&ls->ls_recover_list_lock);
12289+ empty = list_empty(&ls->ls_recover_list);
12290+ spin_unlock(&ls->ls_recover_list_lock);
12291+
12292+ return empty;
12293+}
12294+
10d56c87 12295+int recover_list_count(struct dlm_ls *ls)
4bf12011 12296+{
12297+ int count;
12298+
12299+ spin_lock(&ls->ls_recover_list_lock);
12300+ count = ls->ls_recover_list_count;
12301+ spin_unlock(&ls->ls_recover_list_lock);
12302+
12303+ return count;
12304+}
12305+
10d56c87 12306+void recover_list_add(struct dlm_rsb *rsb)
4bf12011 12307+{
10d56c87 12308+ struct dlm_ls *ls = rsb->res_ls;
4bf12011 12309+
12310+ spin_lock(&ls->ls_recover_list_lock);
12311+ if (!test_and_set_bit(RESFL_RECOVER_LIST, &rsb->res_flags)) {
12312+ list_add_tail(&rsb->res_recover_list, &ls->ls_recover_list);
12313+ ls->ls_recover_list_count++;
12314+ hold_rsb(rsb);
12315+ }
12316+ spin_unlock(&ls->ls_recover_list_lock);
12317+}
12318+
10d56c87 12319+void recover_list_del(struct dlm_rsb *rsb)
4bf12011 12320+{
10d56c87 12321+ struct dlm_ls *ls = rsb->res_ls;
4bf12011 12322+
12323+ spin_lock(&ls->ls_recover_list_lock);
12324+ clear_bit(RESFL_RECOVER_LIST, &rsb->res_flags);
12325+ list_del(&rsb->res_recover_list);
12326+ ls->ls_recover_list_count--;
12327+ spin_unlock(&ls->ls_recover_list_lock);
12328+
12329+ release_rsb(rsb);
12330+}
12331+
10d56c87 12332+static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, int msgid)
4bf12011 12333+{
10d56c87 12334+ struct dlm_rsb *rsb = NULL;
4bf12011 12335+
12336+ spin_lock(&ls->ls_recover_list_lock);
12337+
12338+ list_for_each_entry(rsb, &ls->ls_recover_list, res_recover_list) {
12339+ if (rsb->res_recover_msgid == msgid)
12340+ goto rec_found;
12341+ }
12342+ rsb = NULL;
12343+
12344+ rec_found:
12345+ spin_unlock(&ls->ls_recover_list_lock);
12346+ return rsb;
12347+}
12348+
12349+#if 0
10d56c87 12350+static void recover_list_clear(struct dlm_ls *ls)
4bf12011 12351+{
10d56c87 12352+ struct dlm_rsb *rsb;
4bf12011 12353+
12354+
12355+ spin_lock(&ls->ls_recover_list_lock);
12356+
12357+ while (!list_empty(&ls->ls_recover_list)) {
10d56c87 12358+ rsb = list_entry(ls->ls_recover_list.next, struct dlm_rsb,
4bf12011 12359+ res_recover_list);
12360+ list_del(&rsb->res_recover_list);
12361+ ls->ls_recover_list_count--;
12362+ }
12363+ spin_unlock(&ls->ls_recover_list_lock);
12364+
12365+}
12366+#endif
12367+
10d56c87 12368+static int rsb_master_lookup(struct dlm_rsb *rsb, struct dlm_rcom *rc)
4bf12011 12369+{
10d56c87
AM
12370+ struct dlm_ls *ls = rsb->res_ls;
12371+ uint32_t dir_nodeid, r_nodeid;
4bf12011 12372+ int error;
12373+
12374+ dir_nodeid = get_directory_nodeid(rsb);
12375+
12376+ if (dir_nodeid == our_nodeid()) {
10d56c87
AM
12377+ error = dlm_dir_lookup_recovery(ls, dir_nodeid, rsb->res_name,
12378+ rsb->res_length, &r_nodeid);
4bf12011 12379+ if (error)
12380+ goto fail;
12381+
10d56c87 12382+ rsb->res_nodeid = r_nodeid;
4bf12011 12383+ set_new_master(rsb);
12384+ } else {
12385+ /* As we are the only thread doing recovery this
12386+ should be safe. if not then we need to use a different
12387+ ID somehow. We must set it in the RSB before rcom_send_msg
12388+ completes cos we may get a reply quite quickly.
12389+ */
12390+ rsb->res_recover_msgid = ls->ls_rcom_msgid + 1;
12391+
12392+ recover_list_add(rsb);
12393+
12394+ memcpy(rc->rc_buf, rsb->res_name, rsb->res_length);
12395+ rc->rc_datalen = rsb->res_length;
12396+
12397+ error = rcom_send_message(ls, dir_nodeid, RECCOMM_GETMASTER,
12398+ rc, 0);
12399+ if (error)
12400+ goto fail;
12401+ }
12402+
12403+ fail:
12404+ return error;
12405+}
12406+
12407+/*
12408+ * Go through local root resources and for each rsb which has a master which
12409+ * has departed, get the new master nodeid from the resdir. The resdir will
12410+ * assign mastery to the first node to look up the new master. That means
12411+ * we'll discover in this lookup if we're the new master of any rsb's.
12412+ *
12413+ * We fire off all the resdir requests individually and asynchronously to the
12414+ * correct resdir node. The replies are processed in rsb_master_recv().
12415+ */
12416+
10d56c87 12417+int restbl_rsb_update(struct dlm_ls *ls)
4bf12011 12418+{
10d56c87
AM
12419+ struct dlm_rsb *rsb, *safe;
12420+ struct dlm_rcom *rc;
4bf12011 12421+ int error = -ENOMEM;
12422+ int count = 0;
12423+
12424+ log_all(ls, "update remastered resources");
12425+
12426+ rc = allocate_rcom_buffer(ls);
12427+ if (!rc)
12428+ goto out;
12429+
12430+ list_for_each_entry_safe(rsb, safe, &ls->ls_rootres, res_rootlist) {
12431+ if (!rsb->res_nodeid)
12432+ continue;
12433+
10d56c87 12434+ error = dlm_recovery_stopped(ls);
4bf12011 12435+ if (error)
12436+ goto out_free;
12437+
12438+ if (in_nodes_gone(ls, rsb->res_nodeid)) {
12439+ error = rsb_master_lookup(rsb, rc);
12440+ if (error)
12441+ goto out_free;
12442+ count++;
12443+ }
12444+ }
12445+
10d56c87 12446+ error = dlm_wait_function(ls, &recover_list_empty);
4bf12011 12447+
12448+ log_all(ls, "updated %d resources", count);
12449+
12450+ out_free:
12451+ free_rcom_buffer(rc);
12452+
12453+ out:
12454+ return error;
12455+}
12456+
10d56c87
AM
12457+int restbl_rsb_update_recv(struct dlm_ls *ls, uint32_t nodeid, char *buf,
12458+ int length, int msgid)
4bf12011 12459+{
10d56c87 12460+ struct dlm_rsb *rsb;
4bf12011 12461+ uint32_t be_nodeid;
12462+
12463+ rsb = recover_list_find(ls, msgid);
12464+ if (!rsb) {
12465+ log_error(ls, "restbl_rsb_update_recv rsb not found %d", msgid);
12466+ goto out;
12467+ }
12468+
12469+ memcpy(&be_nodeid, buf, sizeof(uint32_t));
12470+ rsb->res_nodeid = be32_to_cpu(be_nodeid);
12471+ set_new_master(rsb);
12472+ recover_list_del(rsb);
12473+
12474+ if (recover_list_empty(ls))
12475+ wake_up(&ls->ls_wait_general);
12476+
12477+ out:
12478+ return 0;
12479+}
12480+
12481+/*
12482+ * This function not used any longer.
12483+ */
12484+
10d56c87 12485+int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen,
4bf12011 12486+ char *outbuf)
12487+{
12488+ char *inbufptr, *outbufptr;
12489+
12490+ /*
12491+ * The other node wants nodeids matching the resource names in inbuf.
12492+ * The resource names are packed into inbuf as
12493+ * [len1][name1][len2][name2]... where lenX is 1 byte and nameX is
12494+ * lenX bytes. Matching nodeids are packed into outbuf in order
12495+ * [nodeid1][nodeid2]...
12496+ */
12497+
12498+ inbufptr = inbuf;
12499+ outbufptr = outbuf;
12500+
12501+ while (inbufptr < inbuf + inlen) {
10d56c87 12502+ uint32_t r_nodeid, be_nodeid;
4bf12011 12503+ int status;
12504+
10d56c87
AM
12505+ status = dlm_dir_lookup_recovery(ls, nodeid, inbufptr + 1,
12506+ *inbufptr, &r_nodeid);
4bf12011 12507+ if (status != 0)
12508+ goto fail;
12509+
12510+ inbufptr += *inbufptr + 1;
12511+
10d56c87 12512+ be_nodeid = cpu_to_be32(r_nodeid);
4bf12011 12513+ memcpy(outbufptr, &be_nodeid, sizeof(uint32_t));
12514+ outbufptr += sizeof(uint32_t);
12515+
12516+ /* add assertion that outbufptr - outbuf is not > than ... */
12517+ }
12518+
12519+ return (outbufptr - outbuf);
12520+
12521+ fail:
12522+ return -1;
12523+}
12524diff -urN linux-orig/cluster/dlm/recover.h linux-patched/cluster/dlm/recover.h
12525--- linux-orig/cluster/dlm/recover.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
12526+++ linux-patched/cluster/dlm/recover.h 2004-07-13 18:57:22.000000000 +0800
12527@@ -0,0 +1,33 @@
4bf12011 12528+/******************************************************************************
12529+*******************************************************************************
12530+**
12531+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
12532+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
12533+**
12534+** This copyrighted material is made available to anyone wishing to use,
12535+** modify, copy, or redistribute it subject to the terms and conditions
12536+** of the GNU General Public License v.2.
12537+**
12538+*******************************************************************************
12539+******************************************************************************/
12540+
12541+#ifndef __RECOVER_DOT_H__
12542+#define __RECOVER_DOT_H__
12543+
10d56c87
AM
12544+int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls * ls));
12545+int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status);
12546+int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status);
12547+int dlm_recovery_stopped(struct dlm_ls *ls);
12548+int recover_list_empty(struct dlm_ls *ls);
12549+int recover_list_count(struct dlm_ls *ls);
12550+void recover_list_add(struct dlm_rsb *rsb);
12551+void recover_list_del(struct dlm_rsb *rsb);
12552+int restbl_lkb_purge(struct dlm_ls *ls);
12553+void restbl_grant_after_purge(struct dlm_ls *ls);
12554+int restbl_rsb_update(struct dlm_ls *ls);
12555+int restbl_rsb_update_recv(struct dlm_ls *ls, int nodeid, char *buf, int len,
4bf12011 12556+ int msgid);
10d56c87 12557+int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen,
4bf12011 12558+ char *outbuf);
12559+
12560+#endif /* __RECOVER_DOT_H__ */
12561diff -urN linux-orig/cluster/dlm/recoverd.c linux-patched/cluster/dlm/recoverd.c
12562--- linux-orig/cluster/dlm/recoverd.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
12563+++ linux-patched/cluster/dlm/recoverd.c 2004-07-13 18:57:22.000000000 +0800
12564@@ -0,0 +1,693 @@
4bf12011 12565+/******************************************************************************
12566+*******************************************************************************
12567+**
12568+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
12569+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
12570+**
12571+** This copyrighted material is made available to anyone wishing to use,
12572+** modify, copy, or redistribute it subject to the terms and conditions
12573+** of the GNU General Public License v.2.
12574+**
12575+*******************************************************************************
12576+******************************************************************************/
12577+
12578+#include "dlm_internal.h"
12579+#include "nodes.h"
12580+#include "dir.h"
12581+#include "ast.h"
12582+#include "recover.h"
12583+#include "lockspace.h"
12584+#include "lowcomms.h"
12585+#include "lockqueue.h"
12586+#include "lkb.h"
12587+#include "rebuild.h"
12588+
12589+/*
12590+ * next_move actions
12591+ */
12592+
12593+#define DO_STOP (1)
12594+#define DO_START (2)
12595+#define DO_FINISH (3)
12596+#define DO_FINISH_STOP (4)
12597+#define DO_FINISH_START (5)
12598+
12599+/*
12600+ * recoverd_flags for thread
12601+ */
12602+
12603+#define THREAD_STOP (0)
12604+
12605+/*
12606+ * local thread variables
12607+ */
12608+
12609+static unsigned long recoverd_flags;
12610+static struct completion recoverd_run;
12611+static wait_queue_head_t recoverd_wait;
12612+static struct task_struct *recoverd_task;
12613+
12614+/*
10d56c87 12615+ * Queue of lockspaces (dlm_recover structs) which need to be
4bf12011 12616+ * started/recovered
12617+ */
12618+
12619+static struct list_head recoverd_start_queue;
12620+static atomic_t recoverd_start_count;
12621+
12622+extern struct list_head lslist;
12623+extern spinlock_t lslist_lock;
12624+
12625+void dlm_recoverd_init(void)
12626+{
12627+ INIT_LIST_HEAD(&recoverd_start_queue);
12628+ atomic_set(&recoverd_start_count, 0);
12629+
12630+ init_completion(&recoverd_run);
12631+ init_waitqueue_head(&recoverd_wait);
12632+ memset(&recoverd_flags, 0, sizeof(unsigned long));
12633+}
12634+
10d56c87 12635+static int enable_locking(struct dlm_ls *ls, int event_id)
4bf12011 12636+{
12637+ int error = 0;
12638+
12639+ spin_lock(&ls->ls_recover_lock);
12640+ if (ls->ls_last_stop < event_id) {
12641+ set_bit(LSFL_LS_RUN, &ls->ls_flags);
12642+ up_write(&ls->ls_in_recovery);
12643+ } else {
12644+ error = -EINTR;
12645+ log_debug(ls, "enable_locking: abort %d", event_id);
12646+ }
12647+ spin_unlock(&ls->ls_recover_lock);
12648+ return error;
12649+}
12650+
10d56c87 12651+static int ls_first_start(struct dlm_ls *ls, struct dlm_recover *rv)
4bf12011 12652+{
12653+ int error;
12654+
10d56c87 12655+ log_all(ls, "recover event %u (first)", rv->event_id);
4bf12011 12656+
12657+ kcl_global_service_id(ls->ls_local_id, &ls->ls_global_id);
12658+
10d56c87 12659+ error = ls_nodes_init(ls, rv);
4bf12011 12660+ if (error) {
12661+ log_error(ls, "nodes_init failed %d", error);
12662+ goto out;
12663+ }
12664+
10d56c87 12665+ error = dlm_dir_rebuild_local(ls);
4bf12011 12666+ if (error) {
10d56c87 12667+ log_error(ls, "dlm_dir_rebuild_local failed %d", error);
4bf12011 12668+ goto out;
12669+ }
12670+
10d56c87 12671+ error = dlm_dir_rebuild_wait(ls);
4bf12011 12672+ if (error) {
10d56c87 12673+ log_error(ls, "dlm_dir_rebuild_wait failed %d", error);
4bf12011 12674+ goto out;
12675+ }
12676+
10d56c87
AM
12677+ log_all(ls, "recover event %u done", rv->event_id);
12678+ kcl_start_done(ls->ls_local_id, rv->event_id);
4bf12011 12679+
12680+ out:
12681+ return error;
12682+}
12683+
12684+/*
12685+ * We are given here a new group of nodes which are in the lockspace. We first
12686+ * figure out the differences in ls membership from when we were last running.
12687+ * If nodes from before are gone, then there will be some lock recovery to do.
12688+ * If there are only nodes which have joined, then there's no lock recovery.
12689+ *
12690+ * note: cman requires an rc to finish starting on an revent (where nodes die)
12691+ * before it allows an sevent (where nodes join) to be processed. This means
12692+ * that we won't get start1 with nodeA gone, stop/cancel, start2 with nodeA
12693+ * joined.
12694+ */
12695+
10d56c87 12696+static int ls_reconfig(struct dlm_ls *ls, struct dlm_recover *rv)
4bf12011 12697+{
12698+ int error, neg = 0;
12699+
10d56c87 12700+ log_all(ls, "recover event %u", rv->event_id);
4bf12011 12701+
12702+ /*
12703+ * Add or remove nodes from the lockspace's ls_nodes list.
12704+ */
12705+
10d56c87 12706+ error = ls_nodes_reconfig(ls, rv, &neg);
4bf12011 12707+ if (error) {
12708+ log_error(ls, "nodes_reconfig failed %d", error);
12709+ goto fail;
12710+ }
12711+
12712+ /*
12713+ * Rebuild our own share of the resdir by collecting from all other
12714+ * nodes rsb name/master pairs for which the name hashes to us.
12715+ */
12716+
10d56c87 12717+ error = dlm_dir_rebuild_local(ls);
4bf12011 12718+ if (error) {
10d56c87 12719+ log_error(ls, "dlm_dir_rebuild_local failed %d", error);
4bf12011 12720+ goto fail;
12721+ }
12722+
12723+ /*
12724+ * Purge resdir-related requests that are being held in requestqueue.
12725+ * All resdir requests from before recovery started are invalid now due
12726+ * to the resdir rebuild and will be resent by the requesting nodes.
12727+ */
12728+
12729+ purge_requestqueue(ls);
12730+ set_bit(LSFL_REQUEST_WARN, &ls->ls_flags);
12731+
12732+ /*
12733+ * Wait for all nodes to complete resdir rebuild.
12734+ */
12735+
10d56c87 12736+ error = dlm_dir_rebuild_wait(ls);
4bf12011 12737+ if (error) {
10d56c87 12738+ log_error(ls, "dlm_dir_rebuild_wait failed %d", error);
4bf12011 12739+ goto fail;
12740+ }
12741+
12742+ /*
12743+ * Mark our own lkb's waiting in the lockqueue for remote replies from
12744+ * nodes that are now departed. These will be resent to the new
12745+ * masters in resend_cluster_requests. Also mark resdir lookup
12746+ * requests for resending.
12747+ */
12748+
12749+ lockqueue_lkb_mark(ls);
12750+
10d56c87 12751+ error = dlm_recovery_stopped(ls);
4bf12011 12752+ if (error)
12753+ goto fail;
12754+
12755+ if (neg) {
12756+ /*
12757+ * Clear lkb's for departed nodes. This can't fail since it
12758+ * doesn't involve communicating with other nodes.
12759+ */
12760+
12761+ down_write(&ls->ls_rec_rsblist);
12762+ restbl_lkb_purge(ls);
12763+ up_write(&ls->ls_rec_rsblist);
12764+
12765+ down_read(&ls->ls_rec_rsblist);
12766+
12767+ /*
12768+ * Get new master id's for rsb's of departed nodes. This fails
12769+ * if we can't communicate with other nodes.
12770+ */
12771+
12772+ error = restbl_rsb_update(ls);
12773+ if (error) {
12774+ log_error(ls, "restbl_rsb_update failed %d", error);
12775+ goto fail_up;
12776+ }
12777+
12778+ /*
12779+ * Send our lkb info to new masters. This fails if we can't
12780+ * communicate with a node.
12781+ */
12782+
12783+ error = rebuild_rsbs_send(ls);
12784+ if (error) {
12785+ log_error(ls, "rebuild_rsbs_send failed %d", error);
12786+ goto fail_up;
12787+ }
12788+ up_read(&ls->ls_rec_rsblist);
12789+ }
12790+
12791+ clear_bit(LSFL_REQUEST_WARN, &ls->ls_flags);
12792+
10d56c87
AM
12793+ log_all(ls, "recover event %u done", rv->event_id);
12794+ kcl_start_done(ls->ls_local_id, rv->event_id);
4bf12011 12795+ return 0;
12796+
12797+ fail_up:
12798+ up_read(&ls->ls_rec_rsblist);
12799+ fail:
10d56c87 12800+ log_all(ls, "recover event %d error %d", rv->event_id, error);
4bf12011 12801+ return error;
12802+}
12803+
10d56c87 12804+static void clear_finished_nodes(struct dlm_ls *ls, int finish_event)
4bf12011 12805+{
10d56c87 12806+ struct dlm_csb *csb, *safe;
4bf12011 12807+
10d56c87
AM
12808+ list_for_each_entry_safe(csb, safe, &ls->ls_nodes_gone, list) {
12809+ if (csb->gone_event <= finish_event) {
12810+ list_del(&csb->list);
4bf12011 12811+ release_csb(csb);
12812+ }
12813+ }
12814+}
12815+
12816+/*
12817+ * Between calls to this routine for a ls, there can be multiple stop/start
12818+ * events from cman where every start but the latest is cancelled by stops.
12819+ * There can only be a single finish from cman because every finish requires us
12820+ * to call start_done. A single finish event could be followed by multiple
12821+ * stop/start events. This routine takes any combination of events from cman
12822+ * and boils them down to one course of action.
12823+ */
12824+
10d56c87
AM
12825+static int next_move(struct dlm_ls *ls, struct dlm_recover **rv_out,
12826+ int *finish_out)
4bf12011 12827+{
12828+ LIST_HEAD(events);
12829+ unsigned int cmd = 0, stop, start, finish;
12830+ unsigned int last_stop, last_start, last_finish;
10d56c87 12831+ struct dlm_recover *rv = NULL, *start_rv = NULL;
4bf12011 12832+
12833+ /*
12834+ * Grab the current state of cman/sm events.
12835+ */
12836+
12837+ spin_lock(&ls->ls_recover_lock);
12838+
12839+ stop = test_and_clear_bit(LSFL_LS_STOP, &ls->ls_flags) ? 1 : 0;
12840+ start = test_and_clear_bit(LSFL_LS_START, &ls->ls_flags) ? 1 : 0;
12841+ finish = test_and_clear_bit(LSFL_LS_FINISH, &ls->ls_flags) ? 1 : 0;
12842+
12843+ last_stop = ls->ls_last_stop;
12844+ last_start = ls->ls_last_start;
12845+ last_finish = ls->ls_last_finish;
12846+
12847+ while (!list_empty(&ls->ls_recover)) {
10d56c87
AM
12848+ rv = list_entry(ls->ls_recover.next, struct dlm_recover, list);
12849+ list_del(&rv->list);
12850+ list_add_tail(&rv->list, &events);
4bf12011 12851+ }
12852+ spin_unlock(&ls->ls_recover_lock);
12853+
12854+ log_debug(ls, "move flags %u,%u,%u ids %u,%u,%u", stop, start, finish,
12855+ last_stop, last_start, last_finish);
12856+
12857+ /*
12858+ * Toss start events which have since been cancelled.
12859+ */
12860+
12861+ while (!list_empty(&events)) {
10d56c87
AM
12862+ DLM_ASSERT(start,);
12863+ rv = list_entry(events.next, struct dlm_recover, list);
12864+ list_del(&rv->list);
12865+
12866+ if (rv->event_id <= last_stop) {
12867+ log_debug(ls, "move skip event %u", rv->event_id);
12868+ kfree(rv->nodeids);
12869+ kfree(rv);
12870+ rv = NULL;
4bf12011 12871+ } else {
10d56c87
AM
12872+ log_debug(ls, "move use event %u", rv->event_id);
12873+ DLM_ASSERT(!start_rv,);
12874+ start_rv = rv;
4bf12011 12875+ }
12876+ }
12877+
12878+ /*
12879+ * Eight possible combinations of events.
12880+ */
12881+
12882+ /* 0 */
12883+ if (!stop && !start && !finish) {
10d56c87 12884+ DLM_ASSERT(!start_rv,);
4bf12011 12885+ cmd = 0;
12886+ goto out;
12887+ }
12888+
12889+ /* 1 */
12890+ if (!stop && !start && finish) {
10d56c87
AM
12891+ DLM_ASSERT(!start_rv,);
12892+ DLM_ASSERT(last_start > last_stop,);
12893+ DLM_ASSERT(last_finish == last_start,);
4bf12011 12894+ cmd = DO_FINISH;
12895+ *finish_out = last_finish;
12896+ goto out;
12897+ }
12898+
12899+ /* 2 */
12900+ if (!stop && start && !finish) {
10d56c87
AM
12901+ DLM_ASSERT(start_rv,);
12902+ DLM_ASSERT(last_start > last_stop,);
4bf12011 12903+ cmd = DO_START;
10d56c87 12904+ *rv_out = start_rv;
4bf12011 12905+ goto out;
12906+ }
12907+
12908+ /* 3 */
12909+ if (!stop && start && finish) {
10d56c87 12910+ DLM_ASSERT(0, printk("finish and start with no stop\n"););
4bf12011 12911+ }
12912+
12913+ /* 4 */
12914+ if (stop && !start && !finish) {
10d56c87
AM
12915+ DLM_ASSERT(!start_rv,);
12916+ DLM_ASSERT(last_start == last_stop,);
4bf12011 12917+ cmd = DO_STOP;
12918+ goto out;
12919+ }
12920+
12921+ /* 5 */
12922+ if (stop && !start && finish) {
10d56c87
AM
12923+ DLM_ASSERT(!start_rv,);
12924+ DLM_ASSERT(last_finish == last_start,);
12925+ DLM_ASSERT(last_stop == last_start,);
4bf12011 12926+ cmd = DO_FINISH_STOP;
12927+ *finish_out = last_finish;
12928+ goto out;
12929+ }
12930+
12931+ /* 6 */
12932+ if (stop && start && !finish) {
10d56c87
AM
12933+ if (start_rv) {
12934+ DLM_ASSERT(last_start > last_stop,);
4bf12011 12935+ cmd = DO_START;
10d56c87 12936+ *rv_out = start_rv;
4bf12011 12937+ } else {
10d56c87 12938+ DLM_ASSERT(last_stop == last_start,);
4bf12011 12939+ cmd = DO_STOP;
12940+ }
12941+ goto out;
12942+ }
12943+
12944+ /* 7 */
12945+ if (stop && start && finish) {
10d56c87
AM
12946+ if (start_rv) {
12947+ DLM_ASSERT(last_start > last_stop,);
12948+ DLM_ASSERT(last_start > last_finish,);
4bf12011 12949+ cmd = DO_FINISH_START;
12950+ *finish_out = last_finish;
10d56c87 12951+ *rv_out = start_rv;
4bf12011 12952+ } else {
10d56c87
AM
12953+ DLM_ASSERT(last_start == last_stop,);
12954+ DLM_ASSERT(last_start > last_finish,);
4bf12011 12955+ cmd = DO_FINISH_STOP;
12956+ *finish_out = last_finish;
12957+ }
12958+ goto out;
12959+ }
12960+
12961+ out:
12962+ return cmd;
12963+}
12964+
12965+/*
12966+ * This function decides what to do given every combination of current
12967+ * lockspace state and next lockspace state.
12968+ */
12969+
10d56c87 12970+static void do_ls_recovery(struct dlm_ls *ls)
4bf12011 12971+{
10d56c87 12972+ struct dlm_recover *rv = NULL;
4bf12011 12973+ int error, cur_state, next_state = 0, do_now, finish_event = 0;
12974+
10d56c87 12975+ do_now = next_move(ls, &rv, &finish_event);
4bf12011 12976+ if (!do_now)
12977+ goto out;
12978+
12979+ cur_state = ls->ls_state;
12980+ next_state = 0;
12981+
10d56c87 12982+ DLM_ASSERT(!test_bit(LSFL_LS_RUN, &ls->ls_flags),
4bf12011 12983+ log_error(ls, "curstate=%d donow=%d", cur_state, do_now););
12984+
12985+ /*
12986+ * LSST_CLEAR - we're not in any recovery state. We can get a stop or
12987+ * a stop and start which equates with a START.
12988+ */
12989+
12990+ if (cur_state == LSST_CLEAR) {
12991+ switch (do_now) {
12992+ case DO_STOP:
12993+ next_state = LSST_WAIT_START;
12994+ break;
12995+
12996+ case DO_START:
10d56c87 12997+ error = ls_reconfig(ls, rv);
4bf12011 12998+ if (error)
12999+ next_state = LSST_WAIT_START;
13000+ else
13001+ next_state = LSST_RECONFIG_DONE;
13002+ break;
13003+
13004+ case DO_FINISH: /* invalid */
13005+ case DO_FINISH_STOP: /* invalid */
13006+ case DO_FINISH_START: /* invalid */
13007+ default:
10d56c87 13008+ DLM_ASSERT(0,);
4bf12011 13009+ }
13010+ goto out;
13011+ }
13012+
13013+ /*
13014+ * LSST_WAIT_START - we're not running because of getting a stop or
13015+ * failing a start. We wait in this state for another stop/start or
13016+ * just the next start to begin another reconfig attempt.
13017+ */
13018+
13019+ if (cur_state == LSST_WAIT_START) {
13020+ switch (do_now) {
13021+ case DO_STOP:
13022+ break;
13023+
13024+ case DO_START:
10d56c87 13025+ error = ls_reconfig(ls, rv);
4bf12011 13026+ if (error)
13027+ next_state = LSST_WAIT_START;
13028+ else
13029+ next_state = LSST_RECONFIG_DONE;
13030+ break;
13031+
13032+ case DO_FINISH: /* invalid */
13033+ case DO_FINISH_STOP: /* invalid */
13034+ case DO_FINISH_START: /* invalid */
13035+ default:
10d56c87 13036+ DLM_ASSERT(0,);
4bf12011 13037+ }
13038+ goto out;
13039+ }
13040+
13041+ /*
13042+ * LSST_RECONFIG_DONE - we entered this state after successfully
13043+ * completing ls_reconfig and calling kcl_start_done. We expect to get
13044+ * a finish if everything goes ok. A finish could be followed by stop
13045+ * or stop/start before we get here to check it. Or a finish may never
13046+ * happen, only stop or stop/start.
13047+ */
13048+
13049+ if (cur_state == LSST_RECONFIG_DONE) {
13050+ switch (do_now) {
13051+ case DO_FINISH:
13052+ clear_finished_nodes(ls, finish_event);
13053+ next_state = LSST_CLEAR;
13054+
13055+ error = enable_locking(ls, finish_event);
13056+ if (error)
13057+ break;
13058+
13059+ error = process_requestqueue(ls);
13060+ if (error)
13061+ break;
13062+
13063+ error = resend_cluster_requests(ls);
13064+ if (error)
13065+ break;
13066+
13067+ restbl_grant_after_purge(ls);
13068+
13069+ log_all(ls, "recover event %u finished", finish_event);
13070+ break;
13071+
13072+ case DO_STOP:
13073+ next_state = LSST_WAIT_START;
13074+ break;
13075+
13076+ case DO_FINISH_STOP:
13077+ clear_finished_nodes(ls, finish_event);
13078+ next_state = LSST_WAIT_START;
13079+ break;
13080+
13081+ case DO_FINISH_START:
13082+ clear_finished_nodes(ls, finish_event);
13083+ /* fall into DO_START */
13084+
13085+ case DO_START:
10d56c87 13086+ error = ls_reconfig(ls, rv);
4bf12011 13087+ if (error)
13088+ next_state = LSST_WAIT_START;
13089+ else
13090+ next_state = LSST_RECONFIG_DONE;
13091+ break;
13092+
13093+ default:
10d56c87 13094+ DLM_ASSERT(0,);
4bf12011 13095+ }
13096+ goto out;
13097+ }
13098+
13099+ /*
13100+ * LSST_INIT - state after ls is created and before it has been
13101+ * started. A start operation will cause the ls to be started for the
13102+ * first time. A failed start will cause to just wait in INIT for
13103+ * another stop/start.
13104+ */
13105+
13106+ if (cur_state == LSST_INIT) {
13107+ switch (do_now) {
13108+ case DO_START:
10d56c87 13109+ error = ls_first_start(ls, rv);
4bf12011 13110+ if (!error)
13111+ next_state = LSST_INIT_DONE;
13112+ break;
13113+
13114+ case DO_STOP:
13115+ break;
13116+
13117+ case DO_FINISH: /* invalid */
13118+ case DO_FINISH_STOP: /* invalid */
13119+ case DO_FINISH_START: /* invalid */
13120+ default:
10d56c87 13121+ DLM_ASSERT(0,);
4bf12011 13122+ }
13123+ goto out;
13124+ }
13125+
13126+ /*
13127+ * LSST_INIT_DONE - after the first start operation is completed
13128+ * successfully and kcl_start_done() called. If there are no errors, a
13129+ * finish will arrive next and we'll move to LSST_CLEAR.
13130+ */
13131+
13132+ if (cur_state == LSST_INIT_DONE) {
13133+ switch (do_now) {
13134+ case DO_STOP:
13135+ case DO_FINISH_STOP:
13136+ next_state = LSST_WAIT_START;
13137+ break;
13138+
13139+ case DO_START:
13140+ case DO_FINISH_START:
10d56c87 13141+ error = ls_reconfig(ls, rv);
4bf12011 13142+ if (error)
13143+ next_state = LSST_WAIT_START;
13144+ else
13145+ next_state = LSST_RECONFIG_DONE;
13146+ break;
13147+
13148+ case DO_FINISH:
13149+ next_state = LSST_CLEAR;
13150+ enable_locking(ls, finish_event);
13151+ log_all(ls, "recover event %u finished", finish_event);
13152+ break;
13153+
13154+ default:
10d56c87 13155+ DLM_ASSERT(0,);
4bf12011 13156+ }
13157+ goto out;
13158+ }
13159+
13160+ out:
13161+ if (next_state)
13162+ ls->ls_state = next_state;
13163+
10d56c87
AM
13164+ if (rv) {
13165+ kfree(rv->nodeids);
13166+ kfree(rv);
4bf12011 13167+ }
13168+}
13169+
10d56c87 13170+static __inline__ struct dlm_ls *get_work(int clear)
4bf12011 13171+{
10d56c87 13172+ struct dlm_ls *ls;
4bf12011 13173+
13174+ spin_lock(&lslist_lock);
13175+
13176+ list_for_each_entry(ls, &lslist, ls_list) {
13177+ if (clear) {
13178+ if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
13179+ goto got_work;
13180+
13181+ } else {
13182+ if (test_bit(LSFL_WORK, &ls->ls_flags))
13183+ goto got_work;
13184+ }
13185+ }
13186+ ls = NULL;
13187+
13188+ got_work:
13189+ spin_unlock(&lslist_lock);
13190+
13191+ return ls;
13192+}
13193+
13194+/*
13195+ * Thread which does recovery for all lockspaces.
13196+ */
13197+
13198+static int dlm_recoverd(void *arg)
13199+{
10d56c87 13200+ struct dlm_ls *ls;
4bf12011 13201+
13202+ daemonize("dlm_recoverd");
13203+ recoverd_task = current;
13204+ complete(&recoverd_run);
13205+
13206+ while (!test_bit(THREAD_STOP, &recoverd_flags)) {
13207+ wchan_cond_sleep_intr(recoverd_wait, !get_work(0));
13208+ if ((ls = get_work(1)))
13209+ do_ls_recovery(ls);
13210+ }
13211+
13212+ complete(&recoverd_run);
13213+ return 0;
13214+}
13215+
13216+/*
13217+ * Mark a specific lockspace as needing work and wake up the thread to do it.
13218+ */
13219+
10d56c87 13220+void dlm_recoverd_kick(struct dlm_ls *ls)
4bf12011 13221+{
13222+ set_bit(LSFL_WORK, &ls->ls_flags);
13223+ wake_up(&recoverd_wait);
13224+}
13225+
13226+/*
10d56c87 13227+ * Start the recoverd thread when dlm is started (before any lockspaces).
4bf12011 13228+ */
13229+
10d56c87 13230+int dlm_recoverd_start(void)
4bf12011 13231+{
13232+ int error;
13233+
13234+ clear_bit(THREAD_STOP, &recoverd_flags);
13235+ error = kernel_thread(dlm_recoverd, NULL, 0);
13236+ if (error < 0)
13237+ goto out;
13238+
13239+ error = 0;
13240+ wait_for_completion(&recoverd_run);
13241+
13242+ out:
13243+ return error;
13244+}
13245+
13246+/*
10d56c87 13247+ * Stop the recoverd thread when dlm is shut down (all lockspaces are gone).
4bf12011 13248+ */
13249+
10d56c87 13250+int dlm_recoverd_stop(void)
4bf12011 13251+{
13252+ set_bit(THREAD_STOP, &recoverd_flags);
13253+ wake_up(&recoverd_wait);
13254+ wait_for_completion(&recoverd_run);
13255+
13256+ return 0;
13257+}
13258diff -urN linux-orig/cluster/dlm/recoverd.h linux-patched/cluster/dlm/recoverd.h
13259--- linux-orig/cluster/dlm/recoverd.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 13260+++ linux-patched/cluster/dlm/recoverd.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 13261@@ -0,0 +1,22 @@
13262+/******************************************************************************
13263+*******************************************************************************
13264+**
13265+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13266+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13267+**
13268+** This copyrighted material is made available to anyone wishing to use,
13269+** modify, copy, or redistribute it subject to the terms and conditions
13270+** of the GNU General Public License v.2.
13271+**
13272+*******************************************************************************
13273+******************************************************************************/
13274+
13275+#ifndef __RECOVERD_DOT_H__
13276+#define __RECOVERD_DOT_H__
13277+
13278+void dlm_recoverd_init(void);
10d56c87
AM
13279+void dlm_recoverd_kick(struct dlm_ls *ls);
13280+int dlm_recoverd_start(void);
13281+int dlm_recoverd_stop(void);
4bf12011 13282+
13283+#endif /* __RECOVERD_DOT_H__ */
13284diff -urN linux-orig/cluster/dlm/rsb.c linux-patched/cluster/dlm/rsb.c
13285--- linux-orig/cluster/dlm/rsb.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
13286+++ linux-patched/cluster/dlm/rsb.c 2004-07-13 18:57:22.000000000 +0800
13287@@ -0,0 +1,319 @@
4bf12011 13288+/******************************************************************************
13289+*******************************************************************************
13290+**
13291+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13292+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13293+**
13294+** This copyrighted material is made available to anyone wishing to use,
13295+** modify, copy, or redistribute it subject to the terms and conditions
13296+** of the GNU General Public License v.2.
13297+**
13298+*******************************************************************************
13299+******************************************************************************/
13300+
13301+#include "dlm_internal.h"
13302+#include "locking.h"
13303+#include "memory.h"
13304+#include "lockqueue.h"
13305+#include "nodes.h"
13306+#include "dir.h"
13307+#include "util.h"
13308+
10d56c87
AM
13309+static struct dlm_rsb *search_hashchain(struct list_head *head,
13310+ struct dlm_rsb *parent,
13311+ char *name, int namelen)
4bf12011 13312+{
10d56c87 13313+ struct dlm_rsb *r;
4bf12011 13314+
13315+ list_for_each_entry(r, head, res_hashchain) {
13316+ if ((parent == r->res_parent) && (namelen == r->res_length) &&
13317+ (memcmp(name, r->res_name, namelen) == 0)) {
13318+ atomic_inc(&r->res_ref);
13319+ return r;
13320+ }
13321+ }
13322+
13323+ return NULL;
13324+}
13325+
13326+/*
13327+ * A way to arbitrarily hold onto an rsb which we already have a reference to
13328+ * to make sure it doesn't go away. Opposite of release_rsb().
13329+ */
13330+
10d56c87 13331+void hold_rsb(struct dlm_rsb *r)
4bf12011 13332+{
13333+ atomic_inc(&r->res_ref);
13334+}
13335+
13336+/*
13337+ * release_rsb() - Decrement reference count on rsb struct. Free the rsb
13338+ * struct when there are zero references. Every lkb for the rsb adds a
13339+ * reference. When ref is zero there can be no more lkb's for the rsb, on the
13340+ * queue's or anywhere else.
13341+ */
13342+
10d56c87 13343+void release_rsb(struct dlm_rsb *r)
4bf12011 13344+{
10d56c87 13345+ struct dlm_ls *ls = r->res_ls;
4bf12011 13346+ int removed = FALSE;
13347+
10d56c87
AM
13348+ write_lock(&ls->ls_rsbtbl[r->res_bucket].lock);
13349+ if (atomic_dec_and_test(&r->res_ref)) {
13350+ DLM_ASSERT(list_empty(&r->res_grantqueue), print_rsb(r););
13351+ DLM_ASSERT(list_empty(&r->res_waitqueue), print_rsb(r););
13352+ DLM_ASSERT(list_empty(&r->res_convertqueue), print_rsb(r););
4bf12011 13353+ removed = TRUE;
13354+ list_del(&r->res_hashchain);
13355+ }
10d56c87 13356+ write_unlock(&ls->ls_rsbtbl[r->res_bucket].lock);
4bf12011 13357+
10d56c87
AM
13358+ if (!removed)
13359+ return;
4bf12011 13360+
10d56c87
AM
13361+ down_read(&ls->ls_gap_rsblist);
13362+ if (r->res_parent)
13363+ list_del(&r->res_subreslist);
13364+ else
13365+ list_del(&r->res_rootlist);
13366+ up_read(&ls->ls_gap_rsblist);
4bf12011 13367+
10d56c87
AM
13368+ if (r->res_parent)
13369+ goto out;
13370+ if (r->res_nodeid && r->res_nodeid != -1)
13371+ goto out;
13372+ if (r->res_nodeid == -1 && !test_bit(RESFL_MASTER, &r->res_flags))
13373+ goto out;
4bf12011 13374+
10d56c87
AM
13375+ if (get_directory_nodeid(r) != our_nodeid())
13376+ remote_remove_resdata(r->res_ls, get_directory_nodeid(r),
13377+ r->res_name, r->res_length);
13378+ else
13379+ remove_resdata(r->res_ls, our_nodeid(), r->res_name,
13380+ r->res_length);
13381+ out:
13382+ if (r->res_lvbptr)
13383+ free_lvb(r->res_lvbptr);
13384+
13385+ free_rsb(r);
13386+}
13387+
13388+struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb)
13389+{
13390+ struct dlm_rsb *r = lkb->lkb_resource;
13391+
13392+ write_lock(&ls->ls_rsbtbl[r->res_bucket].lock);
13393+ if (!r->res_parent && atomic_read(&r->res_ref) == 1)
13394+ r->res_nodeid = -1;
13395+ write_unlock(&ls->ls_rsbtbl[r->res_bucket].lock);
13396+
13397+ return r;
4bf12011 13398+}
13399+
13400+/*
13401+ * find_or_create_rsb() - Get an rsb struct, or create one if it doesn't exist.
13402+ * If the rsb exists, its ref count is incremented by this function. If it
13403+ * doesn't exist, it's created with a ref count of one.
13404+ */
13405+
10d56c87
AM
13406+int find_or_create_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name,
13407+ int namelen, int create, struct dlm_rsb **rp)
4bf12011 13408+{
10d56c87
AM
13409+ uint32_t bucket;
13410+ struct dlm_rsb *r, *tmp;
4bf12011 13411+ int error = -ENOMEM;
13412+
10d56c87 13413+ DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
4bf12011 13414+
10d56c87
AM
13415+ bucket = dlm_hash(name, namelen);
13416+ bucket &= (ls->ls_rsbtbl_size - 1);
4bf12011 13417+
10d56c87
AM
13418+ read_lock(&ls->ls_rsbtbl[bucket].lock);
13419+ r = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, namelen);
13420+ read_unlock(&ls->ls_rsbtbl[bucket].lock);
4bf12011 13421+
13422+ if (r)
13423+ goto out_set;
13424+ if (!create) {
13425+ *rp = NULL;
13426+ goto out;
13427+ }
13428+
13429+ r = allocate_rsb(ls, namelen);
13430+ if (!r)
13431+ goto fail;
13432+
13433+ INIT_LIST_HEAD(&r->res_subreslist);
13434+ INIT_LIST_HEAD(&r->res_grantqueue);
13435+ INIT_LIST_HEAD(&r->res_convertqueue);
13436+ INIT_LIST_HEAD(&r->res_waitqueue);
13437+
13438+ memcpy(r->res_name, name, namelen);
13439+ r->res_length = namelen;
13440+ r->res_ls = ls;
13441+ init_rwsem(&r->res_lock);
13442+ atomic_set(&r->res_ref, 1);
10d56c87 13443+ r->res_bucket = bucket;
4bf12011 13444+
13445+ if (parent) {
13446+ r->res_parent = parent;
13447+ r->res_depth = parent->res_depth + 1;
13448+ r->res_root = parent->res_root;
13449+ r->res_nodeid = parent->res_nodeid;
13450+ } else {
13451+ r->res_parent = NULL;
13452+ r->res_depth = 1;
13453+ r->res_root = r;
13454+ r->res_nodeid = -1;
13455+ }
13456+
10d56c87
AM
13457+ write_lock(&ls->ls_rsbtbl[bucket].lock);
13458+ tmp = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, namelen);
4bf12011 13459+ if (tmp) {
10d56c87 13460+ write_unlock(&ls->ls_rsbtbl[bucket].lock);
4bf12011 13461+ free_rsb(r);
13462+ r = tmp;
13463+ } else {
10d56c87
AM
13464+ list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
13465+ write_unlock(&ls->ls_rsbtbl[bucket].lock);
4bf12011 13466+
13467+ down_read(&ls->ls_gap_rsblist);
13468+ if (parent)
13469+ list_add_tail(&r->res_subreslist,
13470+ &r->res_root->res_subreslist);
13471+ else
13472+ list_add(&r->res_rootlist, &ls->ls_rootres);
13473+ up_read(&ls->ls_gap_rsblist);
13474+ }
13475+
13476+ out_set:
13477+ *rp = r;
13478+
13479+ out:
13480+ error = 0;
13481+
13482+ fail:
13483+ return error;
13484+}
13485+
13486+/*
13487+ * Add a LKB to a resource's grant/convert/wait queue. in order
13488+ */
13489+
13490+void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode)
13491+{
10d56c87 13492+ struct dlm_lkb *lkb = NULL;
4bf12011 13493+
13494+ list_for_each_entry(lkb, head, lkb_statequeue) {
13495+ if (lkb->lkb_rqmode < mode)
13496+ break;
13497+ }
13498+
13499+ if (!lkb) {
13500+ /* No entries in the queue, we are alone */
13501+ list_add_tail(new, head);
13502+ } else {
13503+ __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue);
13504+ }
13505+}
13506+
13507+/*
13508+ * The rsb res_lock must be held in write when this function is called.
13509+ */
13510+
10d56c87 13511+void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type)
4bf12011 13512+{
10d56c87
AM
13513+ DLM_ASSERT(!lkb->lkb_status,
13514+ print_lkb(lkb);
13515+ print_rsb(r););
4bf12011 13516+
13517+ lkb->lkb_status = type;
13518+
13519+ switch (type) {
13520+ case GDLM_LKSTS_WAITING:
13521+ list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue);
13522+ break;
13523+
13524+ case GDLM_LKSTS_GRANTED:
13525+ lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue,
13526+ lkb->lkb_grmode);
13527+ break;
13528+
13529+ case GDLM_LKSTS_CONVERT:
13530+ if (lkb->lkb_lockqueue_flags & DLM_LKF_EXPEDITE)
13531+ list_add(&lkb->lkb_statequeue, &r->res_convertqueue);
13532+
13533+ else
13534+ if (lkb->lkb_lockqueue_flags & DLM_LKF_QUECVT)
13535+ list_add_tail(&lkb->lkb_statequeue,
13536+ &r->res_convertqueue);
13537+ else
13538+ lkb_add_ordered(&lkb->lkb_statequeue,
13539+ &r->res_convertqueue, lkb->lkb_rqmode);
13540+ break;
13541+
13542+ default:
10d56c87 13543+ DLM_ASSERT(0,);
4bf12011 13544+ }
13545+}
13546+
10d56c87 13547+void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type)
4bf12011 13548+{
13549+ down_write(&r->res_lock);
13550+ lkb_enqueue(r, lkb, type);
13551+ up_write(&r->res_lock);
13552+}
13553+
13554+/*
13555+ * The rsb res_lock must be held in write when this function is called.
13556+ */
13557+
10d56c87 13558+int lkb_dequeue(struct dlm_lkb *lkb)
4bf12011 13559+{
13560+ int status = lkb->lkb_status;
13561+
13562+ if (!status)
13563+ goto out;
13564+
13565+ lkb->lkb_status = 0;
13566+ list_del(&lkb->lkb_statequeue);
13567+
13568+ out:
13569+ return status;
13570+}
13571+
10d56c87 13572+int res_lkb_dequeue(struct dlm_lkb *lkb)
4bf12011 13573+{
13574+ int status;
13575+
13576+ down_write(&lkb->lkb_resource->res_lock);
13577+ status = lkb_dequeue(lkb);
13578+ up_write(&lkb->lkb_resource->res_lock);
13579+
13580+ return status;
13581+}
13582+
13583+/*
13584+ * The rsb res_lock must be held in write when this function is called.
13585+ */
13586+
10d56c87 13587+int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type)
4bf12011 13588+{
13589+ int status;
13590+
13591+ status = lkb_dequeue(lkb);
13592+ lkb_enqueue(r, lkb, type);
13593+
13594+ return status;
13595+}
13596+
10d56c87 13597+int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type)
4bf12011 13598+{
13599+ int status;
13600+
13601+ down_write(&r->res_lock);
13602+ status = lkb_swqueue(r, lkb, type);
13603+ up_write(&r->res_lock);
13604+
13605+ return status;
13606+}
13607diff -urN linux-orig/cluster/dlm/rsb.h linux-patched/cluster/dlm/rsb.h
13608--- linux-orig/cluster/dlm/rsb.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 13609+++ linux-patched/cluster/dlm/rsb.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 13610@@ -0,0 +1,30 @@
13611+/******************************************************************************
13612+*******************************************************************************
13613+**
13614+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13615+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13616+**
13617+** This copyrighted material is made available to anyone wishing to use,
13618+** modify, copy, or redistribute it subject to the terms and conditions
13619+** of the GNU General Public License v.2.
13620+**
13621+*******************************************************************************
13622+******************************************************************************/
13623+
13624+#ifndef __RSB_DOT_H__
13625+#define __RSB_DOT_H__
13626+
13627+void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode);
10d56c87
AM
13628+void release_rsb(struct dlm_rsb *r);
13629+void hold_rsb(struct dlm_rsb *r);
13630+int find_or_create_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name,
13631+ int namelen, int create, struct dlm_rsb **rp);
13632+struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb);
13633+void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type);
13634+void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type);
13635+int lkb_dequeue(struct dlm_lkb *lkb);
13636+int res_lkb_dequeue(struct dlm_lkb *lkb);
13637+int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type);
13638+int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type);
4bf12011 13639+
13640+#endif /* __RSB_DOT_H__ */
13641diff -urN linux-orig/cluster/dlm/util.c linux-patched/cluster/dlm/util.c
13642--- linux-orig/cluster/dlm/util.c 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
13643+++ linux-patched/cluster/dlm/util.c 2004-07-13 18:57:22.000000000 +0800
13644@@ -0,0 +1,190 @@
4bf12011 13645+/******************************************************************************
13646+*******************************************************************************
13647+**
13648+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13649+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13650+**
13651+** This copyrighted material is made available to anyone wishing to use,
13652+** modify, copy, or redistribute it subject to the terms and conditions
13653+** of the GNU General Public License v.2.
13654+**
13655+*******************************************************************************
13656+******************************************************************************/
13657+
13658+#include "dlm_internal.h"
13659+
13660+static const uint32_t crc_32_tab[] = {
13661+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
13662+ 0xe963a535, 0x9e6495a3,
13663+ 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd,
13664+ 0xe7b82d07, 0x90bf1d91,
13665+ 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb,
13666+ 0xf4d4b551, 0x83d385c7,
13667+ 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
13668+ 0xfa0f3d63, 0x8d080df5,
13669+ 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447,
13670+ 0xd20d85fd, 0xa50ab56b,
13671+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75,
13672+ 0xdcd60dcf, 0xabd13d59,
13673+ 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
13674+ 0xcfba9599, 0xb8bda50f,
13675+ 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11,
13676+ 0xc1611dab, 0xb6662d3d,
13677+ 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
13678+ 0x9fbfe4a5, 0xe8b8d433,
13679+ 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
13680+ 0x91646c97, 0xe6635c01,
13681+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b,
13682+ 0x8208f4c1, 0xf50fc457,
13683+ 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49,
13684+ 0x8cd37cf3, 0xfbd44c65,
13685+ 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
13686+ 0xa4d1c46d, 0xd3d6f4fb,
13687+ 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
13688+ 0xaa0a4c5f, 0xdd0d7cc9,
13689+ 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3,
13690+ 0xb966d409, 0xce61e49f,
13691+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
13692+ 0xb7bd5c3b, 0xc0ba6cad,
13693+ 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af,
13694+ 0x04db2615, 0x73dc1683,
13695+ 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d,
13696+ 0x0a00ae27, 0x7d079eb1,
13697+ 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
13698+ 0x196c3671, 0x6e6b06e7,
13699+ 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9,
13700+ 0x17b7be43, 0x60b08ed5,
13701+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767,
13702+ 0x3fb506dd, 0x48b2364b,
13703+ 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
13704+ 0x316e8eef, 0x4669be79,
13705+ 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703,
13706+ 0x220216b9, 0x5505262f,
13707+ 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
13708+ 0x2cd99e8b, 0x5bdeae1d,
13709+ 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
13710+ 0x72076785, 0x05005713,
13711+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d,
13712+ 0x7cdcefb7, 0x0bdbdf21,
13713+ 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b,
13714+ 0x6fb077e1, 0x18b74777,
13715+ 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
13716+ 0x616bffd3, 0x166ccf45,
13717+ 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
13718+ 0x4969474d, 0x3e6e77db,
13719+ 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5,
13720+ 0x47b2cf7f, 0x30b5ffe9,
13721+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
13722+ 0x54de5729, 0x23d967bf,
13723+ 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1,
13724+ 0x5a05df1b, 0x2d02ef8d
13725+};
13726+
13727+/**
10d56c87 13728+ * dlm_hash - hash an array of data
4bf12011 13729+ * @data: the data to be hashed
13730+ * @len: the length of data to be hashed
13731+ *
13732+ * Copied from GFS.
13733+ *
13734+ * Take some data and convert it to a 32-bit hash.
13735+ *
13736+ * The hash function is a 32-bit CRC of the data. The algorithm uses
13737+ * the crc_32_tab table above.
13738+ *
13739+ * This may not be the fastest hash function, but it does a fair bit better
13740+ * at providing uniform results than the others I've looked at. That's
13741+ * really important for efficient directories.
13742+ *
13743+ * Returns: the hash
13744+ */
13745+
10d56c87 13746+uint32_t dlm_hash(const char *data, int len)
4bf12011 13747+{
13748+ uint32_t hash = 0xFFFFFFFF;
13749+
13750+ for (; len--; data++)
13751+ hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8);
13752+
13753+ hash = ~hash;
13754+
13755+ return hash;
13756+}
13757+
10d56c87 13758+uint32_t dlm_next_power2(uint32_t val)
4bf12011 13759+{
13760+ uint32_t x;
13761+
13762+ for (x = 1; x < val; x <<= 1) ;
13763+
13764+ return x;
13765+}
13766+
10d56c87
AM
13767+void print_lkb(struct dlm_lkb *lkb)
13768+{
13769+ printk("dlm: lkb\n"
13770+ "id %x\n"
13771+ "remid %x\n"
13772+ "flags %x\n"
13773+ "status %x\n"
13774+ "rqmode %d\n"
13775+ "grmode %d\n"
13776+ "nodeid %u\n"
13777+ "lqstate %x\n"
13778+ "lqflags %x\n",
13779+ lkb->lkb_id,
13780+ lkb->lkb_remid,
13781+ lkb->lkb_flags,
13782+ lkb->lkb_status,
13783+ lkb->lkb_rqmode,
13784+ lkb->lkb_grmode,
13785+ lkb->lkb_nodeid,
13786+ lkb->lkb_lockqueue_state,
13787+ lkb->lkb_lockqueue_flags);
13788+}
13789+
13790+void print_rsb(struct dlm_rsb *r)
13791+{
13792+ printk("dlm: rsb\n"
13793+ "name \"%s\"\n"
13794+ "nodeid %u\n"
13795+ "ref %u\n",
13796+ r->res_name,
13797+ r->res_nodeid,
13798+ atomic_read(&r->res_ref));
13799+}
13800+
13801+void print_request(struct dlm_request *req)
13802+{
13803+ printk("dlm: request\n"
13804+ "rh_cmd %u\n"
13805+ "rh_lkid %x\n"
13806+ "remlkid %x\n"
13807+ "flags %x\n"
13808+ "status %u\n"
13809+ "rqmode %u\n",
13810+ req->rr_header.rh_cmd,
13811+ req->rr_header.rh_lkid,
13812+ req->rr_remlkid,
13813+ req->rr_flags,
13814+ req->rr_status,
13815+ req->rr_rqmode);
13816+}
13817+
13818+void print_reply(struct dlm_reply *rp)
13819+{
13820+ printk("dlm: reply\n"
13821+ "rh_cmd %u\n"
13822+ "rh_lkid %x\n"
13823+ "lockstate %u\n"
13824+ "nodeid %u\n"
13825+ "status %u\n"
13826+ "lkid %x\n",
13827+ rp->rl_header.rh_cmd,
13828+ rp->rl_header.rh_lkid,
13829+ rp->rl_lockstate,
13830+ rp->rl_nodeid,
13831+ rp->rl_status,
13832+ rp->rl_lkid);
4bf12011 13833+}
10d56c87 13834+
4bf12011 13835diff -urN linux-orig/cluster/dlm/util.h linux-patched/cluster/dlm/util.h
13836--- linux-orig/cluster/dlm/util.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
13837+++ linux-patched/cluster/dlm/util.h 2004-07-13 18:57:22.000000000 +0800
13838@@ -0,0 +1,25 @@
4bf12011 13839+/******************************************************************************
13840+*******************************************************************************
13841+**
13842+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13843+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13844+**
13845+** This copyrighted material is made available to anyone wishing to use,
13846+** modify, copy, or redistribute it subject to the terms and conditions
13847+** of the GNU General Public License v.2.
13848+**
13849+*******************************************************************************
13850+******************************************************************************/
13851+
13852+#ifndef __UTIL_DOT_H__
13853+#define __UTIL_DOT_H__
13854+
10d56c87
AM
13855+uint32_t dlm_hash(const char *data, int len);
13856+uint32_t dlm_next_power2(uint32_t val);
4bf12011 13857+
10d56c87
AM
13858+void print_lkb(struct dlm_lkb *lkb);
13859+void print_rsb(struct dlm_rsb *r);
13860+void print_request(struct dlm_request *req);
13861+void print_reply(struct dlm_reply *rp);
4bf12011 13862+
13863+#endif
13864diff -urN linux-orig/include/cluster/dlm.h linux-patched/include/cluster/dlm.h
13865--- linux-orig/include/cluster/dlm.h 1970-01-01 07:30:00.000000000 +0730
10d56c87
AM
13866+++ linux-patched/include/cluster/dlm.h 2004-07-13 18:57:22.000000000 +0800
13867@@ -0,0 +1,412 @@
4bf12011 13868+/******************************************************************************
13869+*******************************************************************************
13870+**
13871+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
13872+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
13873+**
13874+** This copyrighted material is made available to anyone wishing to use,
13875+** modify, copy, or redistribute it subject to the terms and conditions
13876+** of the GNU General Public License v.2.
13877+**
13878+*******************************************************************************
13879+******************************************************************************/
13880+
13881+#ifndef __DLM_DOT_H__
13882+#define __DLM_DOT_H__
13883+
13884+/*
13885+ * Interface to DLM - routines and structures to use DLM lockspaces.
13886+ */
13887+
13888+/*
13889+ * Lock Modes
13890+ */
13891+
13892+#define DLM_LOCK_IV (-1) /* invalid */
13893+#define DLM_LOCK_NL (0) /* null */
13894+#define DLM_LOCK_CR (1) /* concurrent read */
13895+#define DLM_LOCK_CW (2) /* concurrent write */
13896+#define DLM_LOCK_PR (3) /* protected read */
13897+#define DLM_LOCK_PW (4) /* protected write */
13898+#define DLM_LOCK_EX (5) /* exclusive */
13899+
13900+/*
13901+ * Maximum size in bytes of a dlm_lock name
13902+ */
13903+
13904+#define DLM_RESNAME_MAXLEN (64)
13905+
13906+/*
13907+ * Size in bytes of Lock Value Block
13908+ */
13909+
13910+#define DLM_LVB_LEN (32)
13911+
13912+/*
13913+ * Flags to dlm_new_lockspace
13914+ *
13915+ * DLM_LSF_NOTIMERS
13916+ *
13917+ * Do not subject locks in this lockspace to time-outs.
13918+ *
10d56c87
AM
13919+ * DLM_LSF_NOCONVGRANT
13920+ *
13921+ * Do not grant new locks unless the conversion queue is empty.
13922+ *
4bf12011 13923+ */
13924+
13925+#define DLM_LSF_NOTIMERS (1)
10d56c87 13926+#define DLM_LSF_NOCONVGRANT (2)
4bf12011 13927+
13928+/*
13929+ * Flags to dlm_lock
13930+ *
13931+ * DLM_LKF_NOQUEUE
13932+ *
13933+ * Do not queue the lock request on the wait queue if it cannot be granted
13934+ * immediately. If the lock cannot be granted because of this flag, DLM will
13935+ * either return -EAGAIN from the dlm_lock call or will return 0 from
13936+ * dlm_lock and -EAGAIN in the lock status block when the AST is executed.
13937+ *
13938+ * DLM_LKF_CONVERT
13939+ *
13940+ * Indicates a lock conversion request. For conversions the name and namelen
13941+ * are ignored and the lock ID in the LKSB is used to identify the lock.
13942+ *
13943+ * DLM_LKF_VALBLK
13944+ *
13945+ * Requests DLM to return the current contents of the lock value block in the
13946+ * lock status block. When this flag is set in a lock conversion from PW or EX
13947+ * modes, DLM assigns the value specified in the lock status block to the lock
13948+ * value block of the lock resource. The LVB is a DLM_LVB_LEN size array
13949+ * containing application-specific information.
13950+ *
13951+ * DLM_LKF_QUECVT
13952+ *
13953+ * Force a conversion lock request to the back of the convert queue. All other
13954+ * conversion requests ahead of it must be granted before it can be granted.
13955+ * This enforces a FIFO ordering on the convert queue. When this flag is set,
13956+ * indefinite postponement is averted. This flag is allowed only when
13957+ * converting a lock to a more restrictive mode.
13958+ *
13959+ * DLM_LKF_CANCEL
13960+ *
13961+ * Used to cancel a pending conversion (with dlm_unlock). Lock is returned to
13962+ * previously granted mode.
13963+ *
13964+ * DLM_LKF_IVVALBLK
13965+ *
13966+ * Invalidate/clear the lock value block.
13967+ *
13968+ * DLM_LKF_CONVDEADLK
13969+ *
13970+ * The granted mode of a lock being converted (from a non-NL mode) can be
13971+ * changed to NL in the process of acquiring the requested mode to avoid
13972+ * conversion deadlock.
13973+ *
13974+ * DLM_LKF_PERSISTENT
13975+ *
13976+ * Only relevant to locks originating in userspace. Signals to the ioctl.c code
13977+ * that this lock should not be unlocked when the process exits.
13978+ *
13979+ * DLM_LKF_NODLKWT
13980+ *
13981+ * This lock is not to be checked for conversion deadlocks.
13982+ *
13983+ * DLM_LKF_NODLCKBLK
13984+ *
13985+ * not yet implemented
13986+ *
13987+ * DLM_LKF_EXPEDITE
13988+ *
13989+ * If this lock conversion cannot be granted immediately it is to go to the
13990+ * head of the conversion queue regardless of its requested lock mode.
13991+ *
13992+ * DLM_LKF_NOQUEUEBAST
13993+ *
13994+ * Send blocking AST's before returning -EAGAIN to the caller. It is only
13995+ * used along with the NOQUEUE flag. Blocking AST's are not sent for failed
13996+ * NOQUEUE requests otherwise.
13997+ *
13998+ */
13999+
14000+#define DLM_LKF_NOQUEUE (0x00000001)
14001+#define DLM_LKF_CANCEL (0x00000002)
14002+#define DLM_LKF_CONVERT (0x00000004)
14003+#define DLM_LKF_VALBLK (0x00000008)
14004+#define DLM_LKF_QUECVT (0x00000010)
14005+#define DLM_LKF_IVVALBLK (0x00000020)
14006+#define DLM_LKF_CONVDEADLK (0x00000040)
14007+#define DLM_LKF_PERSISTENT (0x00000080)
14008+#define DLM_LKF_NODLCKWT (0x00000100)
14009+#define DLM_LKF_NODLCKBLK (0x00000200)
14010+#define DLM_LKF_EXPEDITE (0x00000400)
14011+#define DLM_LKF_NOQUEUEBAST (0x00000800)
14012+
14013+/*
14014+ * Some return codes that are not not in errno.h
14015+ */
14016+
14017+#define DLM_ECANCEL (0x10001)
14018+#define DLM_EUNLOCK (0x10002)
14019+
14020+typedef void dlm_lockspace_t;
14021+
14022+/*
14023+ * Lock range structure
14024+ */
14025+
14026+struct dlm_range {
14027+ uint64_t ra_start;
14028+ uint64_t ra_end;
14029+};
14030+
14031+/*
14032+ * Lock status block
14033+ *
14034+ * Use this structure to specify the contents of the lock value block. For a
14035+ * conversion request, this structure is used to specify the lock ID of the
14036+ * lock. DLM writes the status of the lock request and the lock ID assigned
14037+ * to the request in the lock status block.
14038+ *
14039+ * sb_lkid: the returned lock ID. It is set on new (non-conversion) requests.
14040+ * It is available when dlm_lock returns.
14041+ *
14042+ * sb_lvbptr: saves or returns the contents of the lock's LVB according to rules
14043+ * shown for the DLM_LKF_VALBLK flag.
14044+ *
14045+ * sb_flags: DLM_SBF_DEMOTED is returned if in the process of promoting a lock,
14046+ * it was first demoted to NL to avoid conversion deadlock.
14047+ *
14048+ * sb_status: the returned status of the lock request set prior to AST
14049+ * execution. Possible return values:
14050+ *
14051+ * 0 if lock request was successful
14052+ * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE
14053+ * -ENOMEM if there is no memory to process request
14054+ * -EINVAL if there are invalid parameters
14055+ * -DLM_EUNLOCK if unlock request was successful
14056+ * -DLM_ECANCEL ?
14057+ */
14058+
14059+#define DLM_SBF_DEMOTED (0x01)
14060+
14061+struct dlm_lksb {
14062+ int sb_status;
14063+ uint32_t sb_lkid;
14064+ char sb_flags;
14065+ char * sb_lvbptr;
14066+};
14067+
14068+/*
14069+ * These defines are the bits that make up the
14070+ * query code.
14071+ */
14072+
14073+/* Bits 0, 1, 2, the lock mode or DLM_LOCK_THIS, see DLM_LOCK_NL etc in
14074+ * dlm.h Ignored for DLM_QUERY_LOCKS_ALL */
14075+#define DLM_LOCK_THIS 0x0007
14076+#define DLM_QUERY_MODE_MASK 0x0007
14077+
14078+/* Bits 3, 4, 5 bitmap of queue(s) to query */
14079+#define DLM_QUERY_QUEUE_WAIT 0x0008
14080+#define DLM_QUERY_QUEUE_CONVERT 0x0010
14081+#define DLM_QUERY_QUEUE_GRANT 0x0020
14082+#define DLM_QUERY_QUEUE_GRANTED 0x0030 /* Shorthand */
14083+#define DLM_QUERY_QUEUE_ALL 0x0038 /* Shorthand */
14084+
14085+/* Bit 6, Return only the information that can be established without a network
14086+ * round-trip. The caller must be aware of the implications of this. Useful for
14087+ * just getting the master node id or resource name. */
14088+#define DLM_QUERY_LOCAL 0x0040
14089+
14090+/* Bits 8 up, query type */
14091+#define DLM_QUERY_LOCKS_HIGHER 0x0100
14092+#define DLM_QUERY_LOCKS_LOWER 0x0200
14093+#define DLM_QUERY_LOCKS_EQUAL 0x0300
14094+#define DLM_QUERY_LOCKS_BLOCKING 0x0400
14095+#define DLM_QUERY_LOCKS_NOTBLOCK 0x0500
14096+#define DLM_QUERY_LOCKS_ALL 0x0600
14097+#define DLM_QUERY_MASK 0x0F00
14098+
14099+/* GRMODE is the default for mode comparisons,
14100+ RQMODE might also be handy */
14101+#define DLM_QUERY_GRMODE 0x0000
14102+#define DLM_QUERY_RQMODE 0x1000
14103+
14104+/* Structures passed into and out of the query */
14105+
14106+struct dlm_lockinfo {
14107+ int lki_lkid; /* Lock ID on originating node */
14108+ int lki_mstlkid; /* Lock ID on master node */
14109+ int lki_parent;
14110+ int lki_node; /* Originating node (not master) */
14111+ uint8_t lki_state; /* Queue the lock is on */
14112+ uint8_t lki_grmode; /* Granted mode */
14113+ uint8_t lki_rqmode; /* Requested mode */
14114+ struct dlm_range lki_grrange; /* Granted range, if applicable */
14115+ struct dlm_range lki_rqrange; /* Requested range, if applicable */
14116+};
14117+
14118+struct dlm_resinfo {
14119+ int rsi_length;
14120+ int rsi_grantcount; /* No. of nodes on grant queue */
14121+ int rsi_convcount; /* No. of nodes on convert queue */
14122+ int rsi_waitcount; /* No. of nodes on wait queue */
14123+ int rsi_masternode; /* Master for this resource */
14124+ char rsi_name[DLM_RESNAME_MAXLEN]; /* Resource name */
14125+ char rsi_valblk[DLM_LVB_LEN]; /* Master's LVB contents, if applicable
14126+ */
14127+};
14128+
14129+struct dlm_queryinfo {
14130+ struct dlm_resinfo *gqi_resinfo;
14131+ struct dlm_lockinfo *gqi_lockinfo; /* This points to an array
14132+ * of structs */
14133+ int gqi_locksize; /* input */
14134+ int gqi_lockcount; /* output */
14135+};
14136+
14137+#ifdef __KERNEL__
14138+/*
14139+ * dlm_init
14140+ *
14141+ * Starts and initializes DLM threads and structures. Creation of the first
14142+ * lockspace will call this if it has not been called already.
14143+ *
14144+ * Returns: 0 if successful, -EXXX on error
14145+ */
14146+
14147+int dlm_init(void);
14148+
14149+/*
14150+ * dlm_release
14151+ *
14152+ * Stops DLM threads.
14153+ *
14154+ * Returns: 0 if successful, -EXXX on error
14155+ */
14156+
14157+int dlm_release(void);
14158+
14159+/*
14160+ * dlm_new_lockspace
14161+ *
14162+ * Starts a lockspace with the given name. If the named lockspace exists in
14163+ * the cluster, the calling node joins it.
14164+ */
14165+
14166+int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace,
14167+ int flags);
14168+
14169+/*
14170+ * dlm_release_lockspace
14171+ *
14172+ * Stop a lockspace.
14173+ */
14174+
14175+int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force);
14176+
14177+/*
14178+ * dlm_lock
14179+ *
14180+ * Make an asyncronous request to acquire or convert a lock on a named
14181+ * resource.
14182+ *
14183+ * lockspace: context for the request
14184+ * mode: the requested mode of the lock (DLM_LOCK_)
14185+ * lksb: lock status block for input and async return values
14186+ * flags: input flags (DLM_LKF_)
14187+ * name: name of the resource to lock, can be binary
14188+ * namelen: the length in bytes of the resource name (MAX_RESNAME_LEN)
14189+ * parent: the lock ID of a parent lock or 0 if none
14190+ * lockast: function DLM executes when it completes processing the request
14191+ * astarg: argument passed to lockast and bast functions
14192+ * bast: function DLM executes when this lock later blocks another request
14193+ *
14194+ * Returns:
14195+ * 0 if request is successfully queued for processing
14196+ * -EINVAL if any input parameters are invalid
14197+ * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE
14198+ * -ENOMEM if there is no memory to process request
14199+ * -ENOTCONN if there is a communication error
14200+ *
14201+ * If the call to dlm_lock returns an error then the operation has failed and
14202+ * the AST routine will not be called. If dlm_lock returns 0 it is still
14203+ * possible that the lock operation will fail. The AST routine will be called
14204+ * when the locking is complete and the status is returned in the lksb.
14205+ *
14206+ * If the AST routines or parameter are passed to a conversion operation then
14207+ * they will overwrite those values that were passed to a previous dlm_lock
14208+ * call.
14209+ *
14210+ * AST routines should not block (at least not for long), but may make
14211+ * any locking calls they please.
14212+ */
14213+
14214+int dlm_lock(dlm_lockspace_t *lockspace,
14215+ uint32_t mode,
14216+ struct dlm_lksb *lksb,
14217+ uint32_t flags,
14218+ void *name,
14219+ unsigned int namelen,
14220+ uint32_t parent,
14221+ void (*lockast) (void *astarg),
14222+ void *astarg,
14223+ void (*bast) (void *astarg, int mode),
14224+ struct dlm_range *range);
14225+
14226+/*
14227+ * dlm_unlock
14228+ *
14229+ * Asynchronously release a lock on a resource. The AST routine is called
14230+ * when the resource is successfully unlocked.
14231+ *
14232+ * lockspace: context for the request
14233+ * lkid: the lock ID as returned in the lksb
14234+ * flags: input flags (DLM_LKF_)
14235+ * lksb: if NULL the lksb parameter passed to last lock request is used
14236+ * astarg: if NULL, astarg in last lock request is used
14237+ *
14238+ * Returns:
14239+ * 0 if request is successfully queued for processing
14240+ * -EINVAL if any input parameters are invalid
14241+ * -ENOTEMPTY if the lock still has sublocks
14242+ * -EBUSY if the lock is waiting for a remote lock operation
14243+ * -ENOTCONN if there is a communication error
14244+ */
14245+
14246+extern int dlm_unlock(dlm_lockspace_t *lockspace,
14247+ uint32_t lkid,
14248+ uint32_t flags,
14249+ struct dlm_lksb *lksb,
14250+ void *astarg);
14251+
14252+/* Query interface
14253+ *
14254+ * Query the other holders of a resource, given a known lock ID
14255+ *
14256+ * lockspace: context for the request
14257+ * lksb: LKSB, sb_lkid contains the lock ID of a valid lock
14258+ * on the resource. sb_status will contain the status
14259+ * of the request on completion.
14260+ * query: query bitmap see DLM_QUERY_* above
14261+ * qinfo: pointer to dlm_queryinfo structure
14262+ * ast_routine: AST routine to call on completion
14263+ * artarg: argument to AST routine. It is "traditional"
14264+ * to put the qinfo pointer into lksb->sb_lvbptr
14265+ * and pass the lksb in here.
14266+ */
14267+extern int dlm_query(dlm_lockspace_t *lockspace,
14268+ struct dlm_lksb *lksb,
14269+ int query,
14270+ struct dlm_queryinfo *qinfo,
14271+ void (ast_routine(void *)),
14272+ void *astarg);
14273+
10d56c87
AM
14274+
14275+void dlm_debug_dump(void);
14276+
4bf12011 14277+#endif /* __KERNEL__ */
14278+
14279+#endif /* __DLM_DOT_H__ */
14280diff -urN linux-orig/include/cluster/dlm_device.h linux-patched/include/cluster/dlm_device.h
14281--- linux-orig/include/cluster/dlm_device.h 1970-01-01 07:30:00.000000000 +0730
10d56c87 14282+++ linux-patched/include/cluster/dlm_device.h 2004-07-13 18:57:22.000000000 +0800
4bf12011 14283@@ -0,0 +1,63 @@
14284+/******************************************************************************
14285+*******************************************************************************
14286+**
14287+** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
14288+** Copyright (C) 2004 Red Hat, Inc. All rights reserved.
14289+**
14290+** This copyrighted material is made available to anyone wishing to use,
14291+** modify, copy, or redistribute it subject to the terms and conditions
14292+** of the GNU General Public License v.2.
14293+**
14294+*******************************************************************************
14295+******************************************************************************/
14296+
14297+/* This is the device interface for dlm, most users will use a library
14298+ * interface.
14299+ */
14300+
14301+/* Version of the device interface */
14302+#define DLM_DEVICE_VERSION_MAJOR 2
14303+#define DLM_DEVICE_VERSION_MINOR 0
14304+#define DLM_DEVICE_VERSION_PATCH 0
14305+
14306+/* struct passed to the lock write */
14307+struct dlm_lock_params {
14308+ uint32_t version[3];
14309+ uint8_t cmd;
14310+ uint8_t mode;
14311+ uint16_t flags;
14312+ uint32_t lkid;
14313+ uint32_t parent;
14314+ struct dlm_range range;
14315+ uint8_t namelen;
14316+ void *astparam;
14317+ void *astaddr;
14318+ void *bastaddr;
14319+ struct dlm_lksb *lksb;
14320+ char name[1];
14321+};
14322+
14323+
14324+/* struct read from the "device" fd,
14325+ consists mainly of userspace pointers for the library to use */
14326+struct dlm_lock_result {
14327+ uint8_t cmd;
14328+ void *astparam;
14329+ void (*astaddr)(void *astparam);
14330+ struct dlm_lksb *user_lksb;
14331+ struct dlm_lksb lksb; /* But this has real data in it */
14332+ uint8_t bast_mode; /* Not yet used */
14333+};
14334+
14335+/* commands passed to the device */
14336+#define DLM_USER_LOCK 1
14337+#define DLM_USER_UNLOCK 2
14338+#define DLM_USER_QUERY 3
14339+
14340+/* Arbitrary length restriction */
14341+#define MAX_LS_NAME_LEN 64
14342+
14343+/* ioctls on the device */
14344+#define DLM_CREATE_LOCKSPACE _IOW('D', 0x01, char *)
14345+#define DLM_RELEASE_LOCKSPACE _IOW('D', 0x02, char *)
14346+#define DLM_FORCE_RELEASE_LOCKSPACE _IOW('D', 0x03, char *)
This page took 1.961558 seconds and 4 git commands to generate.