]>
Commit | Line | Data |
---|---|---|
4bf12011 | 1 | # Add DLM to the build system |
2 | diff -urN -p linux-2.6.7/cluster/Kconfig linux/cluster/Kconfig | |
3 | --- linux-2.6.7/cluster/Kconfig 2004-06-17 15:00:36.000000000 +0800 | |
4 | +++ linux/cluster/Kconfig 2004-06-17 15:00:57.000000000 +0800 | |
5 | @@ -10,4 +10,22 @@ config CLUSTER | |
6 | needed by all the other components. It provides membership services | |
7 | for those other subsystems. | |
8 | ||
9 | +config CLUSTER_DLM | |
10 | + tristate "Distributed Lock Manager" | |
11 | + depends on CLUSTER | |
12 | + ---help--- | |
13 | + A fully distributed lock manager, providing cluster-wide locking services | |
14 | + and protected lock namespaces for kernel and userland applications. | |
15 | + | |
16 | +config CLUSTER_DLM_PROCLOCKS | |
17 | + boolean "/proc/locks support for DLM" | |
18 | + depends on CLUSTER_DLM | |
19 | + depends on PROC_FS | |
20 | + ---help--- | |
21 | + If this option is enabled a file will appear in /proc/cluster/dlm_locks. | |
22 | + write into this "file" the name of a lockspace known to the DLM and then | |
23 | + read out a list of all the resources and locks in that lockspace that are | |
24 | + known to the local node. Note because the DLM is distributed this may not | |
25 | + be the full lock picture. | |
26 | + | |
27 | endmenu | |
28 | diff -urN -p linux-2.6.7/cluster/Makefile linux/cluster/Makefile | |
29 | --- linux-2.6.7/cluster/Makefile 2004-06-17 15:00:36.000000000 +0800 | |
30 | +++ linux/cluster/Makefile 2004-06-17 15:00:57.000000000 +0800 | |
31 | @@ -1,3 +1,4 @@ | |
32 | obj-y := nocluster.o | |
33 | ||
34 | obj-$(CONFIG_CLUSTER) += cman/ | |
35 | +obj-$(CONFIG_CLUSTER_DLM) += dlm/ | |
36 | diff -urN -p linux-2.6.7/cluster/dlm/Makefile linux/cluster/dlm/Makefile | |
37 | --- linux-2.6.7/cluster/dlm/Makefile 1970-01-01 07:30:00.000000000 +0730 | |
38 | +++ linux/cluster/dlm/Makefile 2004-06-17 15:00:57.000000000 +0800 | |
39 | @@ -0,0 +1,23 @@ | |
40 | +dlm-objs := ast.o \ | |
41 | + config.o \ | |
42 | + device.o \ | |
43 | + dir.o \ | |
44 | + lkb.o \ | |
45 | + locking.o \ | |
46 | + lockqueue.o \ | |
47 | + lockspace.o \ | |
48 | + lowcomms.o \ | |
49 | + main.o \ | |
50 | + memory.o \ | |
51 | + midcomms.o \ | |
52 | + nodes.o \ | |
53 | + proc.o \ | |
54 | + queries.o \ | |
55 | + rebuild.o \ | |
56 | + reccomms.o \ | |
57 | + recover.o \ | |
58 | + recoverd.o \ | |
59 | + rsb.o \ | |
60 | + util.o \ | |
61 | + | |
62 | +obj-$(CONFIG_CLUSTER_DLM) += dlm.o | |
63 | diff -urN linux-orig/cluster/dlm/ast.c linux-patched/cluster/dlm/ast.c | |
64 | --- linux-orig/cluster/dlm/ast.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
65 | +++ linux-patched/cluster/dlm/ast.c 2004-07-13 18:57:22.000000000 +0800 |
66 | @@ -0,0 +1,557 @@ | |
4bf12011 | 67 | +/****************************************************************************** |
68 | +******************************************************************************* | |
69 | +** | |
70 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
71 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
72 | +** | |
73 | +** This copyrighted material is made available to anyone wishing to use, | |
74 | +** modify, copy, or redistribute it subject to the terms and conditions | |
75 | +** of the GNU General Public License v.2. | |
76 | +** | |
77 | +******************************************************************************* | |
78 | +******************************************************************************/ | |
79 | + | |
80 | +/* | |
81 | + * This delivers ASTs and checks for dead remote requests and deadlocks. | |
82 | + */ | |
83 | + | |
84 | +#include <linux/timer.h> | |
85 | + | |
86 | +#include "dlm_internal.h" | |
87 | +#include "rsb.h" | |
88 | +#include "lockqueue.h" | |
89 | +#include "dir.h" | |
90 | +#include "locking.h" | |
91 | +#include "lkb.h" | |
92 | +#include "lowcomms.h" | |
93 | +#include "midcomms.h" | |
94 | +#include "ast.h" | |
95 | +#include "nodes.h" | |
96 | +#include "config.h" | |
10d56c87 | 97 | +#include "util.h" |
4bf12011 | 98 | + |
99 | +/* Wake up flags for astd */ | |
100 | +#define GDLMD_WAKE_ASTS 1 | |
101 | +#define GDLMD_WAKE_TIMER 2 | |
102 | + | |
103 | +static struct list_head _deadlockqueue; | |
104 | +static struct semaphore _deadlockqueue_lock; | |
105 | +static struct list_head _lockqueue; | |
106 | +static struct semaphore _lockqueue_lock; | |
107 | +static struct timer_list _lockqueue_timer; | |
108 | +static struct list_head _ast_queue; | |
109 | +static struct semaphore _ast_queue_lock; | |
110 | +static wait_queue_head_t _astd_waitchan; | |
111 | +static atomic_t _astd_running; | |
112 | +static long _astd_pid; | |
113 | +static unsigned long _astd_wakeflags; | |
114 | +static struct completion _astd_done; | |
115 | + | |
10d56c87 | 116 | +void add_to_lockqueue(struct dlm_lkb *lkb) |
4bf12011 | 117 | +{ |
118 | + /* Time stamp the entry so we know if it's been waiting too long */ | |
119 | + lkb->lkb_lockqueue_time = jiffies; | |
120 | + | |
121 | + down(&_lockqueue_lock); | |
122 | + list_add(&lkb->lkb_lockqueue, &_lockqueue); | |
123 | + up(&_lockqueue_lock); | |
124 | +} | |
125 | + | |
10d56c87 | 126 | +void remove_from_lockqueue(struct dlm_lkb *lkb) |
4bf12011 | 127 | +{ |
128 | + down(&_lockqueue_lock); | |
129 | + list_del(&lkb->lkb_lockqueue); | |
130 | + up(&_lockqueue_lock); | |
131 | +} | |
132 | + | |
10d56c87 | 133 | +void add_to_deadlockqueue(struct dlm_lkb *lkb) |
4bf12011 | 134 | +{ |
135 | + if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags)) | |
136 | + return; | |
137 | + lkb->lkb_duetime = jiffies; | |
138 | + down(&_deadlockqueue_lock); | |
139 | + list_add(&lkb->lkb_deadlockq, &_deadlockqueue); | |
140 | + up(&_deadlockqueue_lock); | |
141 | +} | |
142 | + | |
10d56c87 | 143 | +void remove_from_deadlockqueue(struct dlm_lkb *lkb) |
4bf12011 | 144 | +{ |
145 | + if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags)) | |
146 | + return; | |
147 | + | |
148 | + down(&_deadlockqueue_lock); | |
149 | + list_del(&lkb->lkb_deadlockq); | |
150 | + up(&_deadlockqueue_lock); | |
151 | + | |
152 | + /* Invalidate the due time */ | |
153 | + memset(&lkb->lkb_duetime, 0, sizeof(lkb->lkb_duetime)); | |
154 | +} | |
155 | + | |
4bf12011 | 156 | +/* |
5cdbd17b | 157 | + * deliver an AST to a user |
4bf12011 | 158 | + */ |
159 | + | |
10d56c87 | 160 | +static void deliver_ast(struct dlm_lkb *lkb, uint16_t ast_type) |
4bf12011 | 161 | +{ |
162 | + void (*cast) (long param) = lkb->lkb_astaddr; | |
163 | + void (*bast) (long param, int mode) = lkb->lkb_bastaddr; | |
164 | + | |
5cdbd17b AM |
165 | + if (ast_type == AST_BAST) { |
166 | + if (!bast) | |
167 | + return; | |
168 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
169 | + return; | |
4bf12011 | 170 | + bast(lkb->lkb_astparam, (int) lkb->lkb_bastmode); |
5cdbd17b AM |
171 | + } else { |
172 | + if (!cast) | |
173 | + return; | |
174 | + cast(lkb->lkb_astparam); | |
4bf12011 | 175 | + } |
4bf12011 | 176 | +} |
177 | + | |
178 | +/* | |
179 | + * Queue an AST for delivery, this will only deal with | |
180 | + * kernel ASTs, usermode API will piggyback on top of this. | |
181 | + * | |
182 | + * This can be called in either the user or DLM context. | |
10d56c87 | 183 | + * ASTs are queued EVEN IF we are already running in dlm_astd |
4bf12011 | 184 | + * context as we don't know what other locks are held (eg we could |
185 | + * be being called from a lock operation that was called from | |
186 | + * another AST! | |
187 | + * If the AST is to be queued remotely then a message is sent to | |
188 | + * the target system via midcomms. | |
189 | + */ | |
190 | + | |
10d56c87 | 191 | +void queue_ast(struct dlm_lkb *lkb, uint16_t flags, uint8_t rqmode) |
4bf12011 | 192 | +{ |
10d56c87 | 193 | + struct dlm_request req; |
4bf12011 | 194 | + |
195 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
196 | + /* | |
197 | + * Send a message to have an ast queued remotely. Note: we do | |
198 | + * not send remote completion asts, they are handled as part of | |
199 | + * remote lock granting. | |
200 | + */ | |
5cdbd17b | 201 | + if (flags & AST_BAST) { |
4bf12011 | 202 | + req.rr_header.rh_cmd = GDLM_REMCMD_SENDBAST; |
203 | + req.rr_header.rh_length = sizeof(req); | |
204 | + req.rr_header.rh_flags = 0; | |
205 | + req.rr_header.rh_lkid = lkb->lkb_id; | |
206 | + req.rr_header.rh_lockspace = | |
207 | + lkb->lkb_resource->res_ls->ls_global_id; | |
208 | + req.rr_status = lkb->lkb_retstatus; | |
209 | + req.rr_remlkid = lkb->lkb_remid; | |
210 | + req.rr_rqmode = rqmode; | |
211 | + | |
212 | + midcomms_send_message(lkb->lkb_nodeid, &req.rr_header, | |
5cdbd17b | 213 | + lkb->lkb_resource->res_ls->ls_allocation); |
4bf12011 | 214 | + } else if (lkb->lkb_retstatus == -EDEADLOCK) { |
215 | + /* | |
216 | + * We only queue remote Completion ASTs here for error | |
217 | + * completions that happen out of band. | |
218 | + * DEADLOCK is one such. | |
219 | + */ | |
4bf12011 | 220 | + req.rr_header.rh_cmd = GDLM_REMCMD_SENDCAST; |
221 | + req.rr_header.rh_length = sizeof(req); | |
222 | + req.rr_header.rh_flags = 0; | |
223 | + req.rr_header.rh_lkid = lkb->lkb_id; | |
224 | + req.rr_header.rh_lockspace = | |
225 | + lkb->lkb_resource->res_ls->ls_global_id; | |
226 | + req.rr_status = lkb->lkb_retstatus; | |
227 | + req.rr_remlkid = lkb->lkb_remid; | |
228 | + req.rr_rqmode = rqmode; | |
229 | + | |
230 | + midcomms_send_message(lkb->lkb_nodeid, &req.rr_header, | |
5cdbd17b | 231 | + lkb->lkb_resource->res_ls->ls_allocation); |
4bf12011 | 232 | + } |
233 | + } else { | |
234 | + /* | |
5cdbd17b | 235 | + * Prepare info that will be returned in ast/bast. |
4bf12011 | 236 | + */ |
237 | + | |
5cdbd17b | 238 | + if (flags & AST_BAST) { |
4bf12011 | 239 | + lkb->lkb_bastmode = rqmode; |
240 | + } else { | |
241 | + lkb->lkb_lksb->sb_status = lkb->lkb_retstatus; | |
242 | + | |
243 | + if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) | |
244 | + lkb->lkb_lksb->sb_flags = DLM_SBF_DEMOTED; | |
245 | + else | |
246 | + lkb->lkb_lksb->sb_flags = 0; | |
247 | + } | |
248 | + | |
4bf12011 | 249 | + down(&_ast_queue_lock); |
5cdbd17b AM |
250 | + if (lkb->lkb_astflags & AST_DEL) |
251 | + log_print("queue_ast on deleted lkb %x ast %x pid %u", | |
252 | + lkb->lkb_id, lkb->lkb_astflags, current->pid); | |
253 | + if (!(lkb->lkb_astflags & (AST_COMP | AST_BAST))) | |
4bf12011 | 254 | + list_add_tail(&lkb->lkb_astqueue, &_ast_queue); |
5cdbd17b | 255 | + lkb->lkb_astflags |= flags; |
4bf12011 | 256 | + up(&_ast_queue_lock); |
257 | + | |
258 | + /* It is the responsibility of the caller to call wake_astd() | |
259 | + * after it has finished other locking operations that request | |
260 | + * the ASTs to be delivered after */ | |
261 | + } | |
262 | +} | |
263 | + | |
264 | +/* | |
5cdbd17b | 265 | + * Process any LKBs on the AST queue. |
4bf12011 | 266 | + */ |
267 | + | |
268 | +static void process_asts(void) | |
269 | +{ | |
10d56c87 | 270 | + struct dlm_lkb *lkb; |
5cdbd17b | 271 | + uint16_t flags; |
4bf12011 | 272 | + |
5cdbd17b AM |
273 | + for (;;) { |
274 | + down(&_ast_queue_lock); | |
275 | + if (list_empty(&_ast_queue)) { | |
276 | + up(&_ast_queue_lock); | |
277 | + break; | |
278 | + } | |
279 | + | |
10d56c87 | 280 | + lkb = list_entry(_ast_queue.next, struct dlm_lkb, lkb_astqueue); |
5cdbd17b AM |
281 | + list_del(&lkb->lkb_astqueue); |
282 | + flags = lkb->lkb_astflags; | |
283 | + lkb->lkb_astflags = 0; | |
284 | + up(&_ast_queue_lock); | |
4bf12011 | 285 | + |
5cdbd17b AM |
286 | + if (flags & AST_COMP) |
287 | + deliver_ast(lkb, AST_COMP); | |
4bf12011 | 288 | + |
10d56c87 AM |
289 | + if (flags & AST_BAST) |
290 | + deliver_ast(lkb, AST_BAST); | |
4bf12011 | 291 | + |
5cdbd17b | 292 | + if (flags & AST_DEL) { |
10d56c87 AM |
293 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
294 | + struct dlm_ls *ls = rsb->res_ls; | |
4bf12011 | 295 | + |
10d56c87 | 296 | + DLM_ASSERT(lkb->lkb_astflags == 0, |
5cdbd17b | 297 | + printk("%x %x\n", lkb->lkb_id, lkb->lkb_astflags);); |
4bf12011 | 298 | + |
5cdbd17b AM |
299 | + down_read(&ls->ls_in_recovery); |
300 | + release_lkb(ls, lkb); | |
301 | + release_rsb(rsb); | |
302 | + up_read(&ls->ls_in_recovery); | |
303 | + } | |
304 | + | |
305 | + schedule(); | |
4bf12011 | 306 | + } |
4bf12011 | 307 | +} |
308 | + | |
10d56c87 | 309 | +void lockqueue_lkb_mark(struct dlm_ls *ls) |
4bf12011 | 310 | +{ |
10d56c87 | 311 | + struct dlm_lkb *lkb, *safe; |
4bf12011 | 312 | + int count = 0; |
313 | + | |
314 | + log_all(ls, "mark waiting requests"); | |
315 | + | |
316 | + down(&_lockqueue_lock); | |
317 | + | |
318 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
319 | + | |
320 | + if (lkb->lkb_resource->res_ls != ls) | |
321 | + continue; | |
322 | + | |
323 | + /* | |
324 | + * These lkb's are new and the master is being looked up. Mark | |
325 | + * the lkb request to be resent. Even if the destination node | |
326 | + * for the request is still living and has our request, it will | |
327 | + * purge all resdir requests in purge_requestqueue. If there's | |
328 | + * a reply to the LOOKUP request in our requestqueue (the reply | |
329 | + * arrived after ls_stop), it is invalid and will be discarded | |
330 | + * in purge_requestqueue, too. | |
331 | + */ | |
332 | + | |
333 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) { | |
10d56c87 AM |
334 | + DLM_ASSERT(lkb->lkb_nodeid == -1, |
335 | + print_lkb(lkb); | |
336 | + print_rsb(lkb->lkb_resource);); | |
4bf12011 | 337 | + |
338 | + lkb->lkb_flags |= GDLM_LKFLG_LQRESEND; | |
339 | + count++; | |
340 | + continue; | |
341 | + } | |
342 | + | |
343 | + /* | |
344 | + * These lkb's have an outstanding request to a bygone node. | |
345 | + * The request will be redirected to the new master node in | |
346 | + * resend_cluster_requests(). Don't mark the request for | |
347 | + * resending if there's a reply for it saved in the | |
348 | + * requestqueue. | |
349 | + */ | |
350 | + | |
351 | + if (in_nodes_gone(ls, lkb->lkb_nodeid) && | |
352 | + !reply_in_requestqueue(ls, lkb->lkb_id)) { | |
353 | + | |
354 | + lkb->lkb_flags |= GDLM_LKFLG_LQRESEND; | |
355 | + | |
356 | + /* | |
357 | + * Don't rebuild this lkb on a new rsb in | |
358 | + * rebuild_rsbs_send(). | |
359 | + */ | |
360 | + | |
10d56c87 AM |
361 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONDGRANT) { |
362 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_WAITING, | |
363 | + print_lkb(lkb); | |
364 | + print_rsb(lkb->lkb_resource);); | |
4bf12011 | 365 | + lkb->lkb_flags |= GDLM_LKFLG_NOREBUILD; |
366 | + } | |
367 | + | |
368 | + /* | |
369 | + * This flag indicates to the new master that his lkb | |
370 | + * is in the midst of a convert request and should be | |
371 | + * placed on the granted queue rather than the convert | |
372 | + * queue. We will resend this convert request to the | |
373 | + * new master. | |
374 | + */ | |
375 | + | |
10d56c87 AM |
376 | + else if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONVERT) { |
377 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT, | |
378 | + print_lkb(lkb); | |
379 | + print_rsb(lkb->lkb_resource);); | |
4bf12011 | 380 | + lkb->lkb_flags |= GDLM_LKFLG_LQCONVERT; |
381 | + } | |
382 | + | |
383 | + count++; | |
384 | + } | |
385 | + } | |
386 | + up(&_lockqueue_lock); | |
387 | + | |
388 | + log_all(ls, "marked %d requests", count); | |
389 | +} | |
390 | + | |
10d56c87 | 391 | +int resend_cluster_requests(struct dlm_ls *ls) |
4bf12011 | 392 | +{ |
10d56c87 | 393 | + struct dlm_lkb *lkb, *safe; |
4bf12011 | 394 | + int error = 0, state, count = 0; |
395 | + | |
396 | + log_all(ls, "resend marked requests"); | |
397 | + | |
398 | + down(&_lockqueue_lock); | |
399 | + | |
400 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
401 | + | |
402 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
403 | + log_debug(ls, "resend_cluster_requests: aborted"); | |
404 | + error = -EINTR; | |
405 | + break; | |
406 | + } | |
407 | + | |
408 | + if (lkb->lkb_resource->res_ls != ls) | |
409 | + continue; | |
410 | + | |
411 | + log_debug(ls, "resend_cluster_requests id=%x nodeid=%d " | |
412 | + "lqstate=%u flags=%x", lkb->lkb_id, lkb->lkb_nodeid, | |
413 | + lkb->lkb_lockqueue_state, lkb->lkb_flags); | |
414 | + | |
415 | + /* | |
416 | + * Resend/process the lockqueue lkb's (in-progres requests) | |
417 | + * that were flagged at the start of recovery in | |
418 | + * lockqueue_lkb_mark(). | |
419 | + */ | |
420 | + | |
421 | + if (lkb->lkb_flags & GDLM_LKFLG_LQRESEND) { | |
422 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQRESEND; | |
423 | + lkb->lkb_flags &= ~GDLM_LKFLG_NOREBUILD; | |
424 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT; | |
425 | + | |
426 | + if (lkb->lkb_nodeid == -1) { | |
427 | + /* | |
428 | + * Send lookup to new resdir node. | |
429 | + */ | |
430 | + lkb->lkb_lockqueue_time = jiffies; | |
431 | + send_cluster_request(lkb, | |
432 | + lkb->lkb_lockqueue_state); | |
433 | + } | |
434 | + | |
435 | + else if (lkb->lkb_nodeid != 0) { | |
436 | + /* | |
437 | + * There's a new RSB master (that's not us.) | |
438 | + */ | |
439 | + lkb->lkb_lockqueue_time = jiffies; | |
440 | + send_cluster_request(lkb, | |
441 | + lkb->lkb_lockqueue_state); | |
442 | + } | |
443 | + | |
444 | + else { | |
445 | + /* | |
446 | + * We are the new RSB master for this lkb | |
447 | + * request. | |
448 | + */ | |
449 | + state = lkb->lkb_lockqueue_state; | |
450 | + lkb->lkb_lockqueue_state = 0; | |
451 | + /* list_del equals remove_from_lockqueue() */ | |
452 | + list_del(&lkb->lkb_lockqueue); | |
10d56c87 | 453 | + process_remastered_lkb(ls, lkb, state); |
4bf12011 | 454 | + } |
455 | + | |
456 | + count++; | |
457 | + } | |
458 | + } | |
459 | + up(&_lockqueue_lock); | |
460 | + | |
461 | + log_all(ls, "resent %d requests", count); | |
462 | + return error; | |
463 | +} | |
464 | + | |
465 | +/* | |
466 | + * Process any LKBs on the Lock queue, this | |
467 | + * just looks at the entries to see if they have been | |
468 | + * on the queue too long and fails the requests if so. | |
469 | + */ | |
470 | + | |
471 | +static void process_lockqueue(void) | |
472 | +{ | |
10d56c87 AM |
473 | + struct dlm_lkb *lkb, *safe; |
474 | + struct dlm_ls *ls; | |
4bf12011 | 475 | + int count = 0; |
476 | + | |
477 | + down(&_lockqueue_lock); | |
478 | + | |
479 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
480 | + ls = lkb->lkb_resource->res_ls; | |
481 | + | |
482 | + if (test_bit(LSFL_NOTIMERS, &ls->ls_flags)) | |
483 | + continue; | |
484 | + | |
485 | + /* Don't time out locks that are in transition */ | |
486 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) | |
487 | + continue; | |
488 | + | |
489 | + if (check_timeout(lkb->lkb_lockqueue_time, | |
490 | + dlm_config.lock_timeout)) { | |
491 | + count++; | |
492 | + list_del(&lkb->lkb_lockqueue); | |
493 | + up(&_lockqueue_lock); | |
494 | + cancel_lockop(lkb, -ETIMEDOUT); | |
495 | + down(&_lockqueue_lock); | |
496 | + } | |
497 | + } | |
498 | + up(&_lockqueue_lock); | |
499 | + | |
500 | + if (count) | |
501 | + wake_astd(); | |
502 | + | |
503 | + if (atomic_read(&_astd_running)) | |
504 | + mod_timer(&_lockqueue_timer, | |
505 | + jiffies + ((dlm_config.lock_timeout >> 1) * HZ)); | |
506 | +} | |
507 | + | |
508 | +/* Look for deadlocks */ | |
509 | +static void process_deadlockqueue(void) | |
510 | +{ | |
10d56c87 | 511 | + struct dlm_lkb *lkb, *safe; |
4bf12011 | 512 | + |
513 | + down(&_deadlockqueue_lock); | |
514 | + | |
515 | + list_for_each_entry_safe(lkb, safe, &_deadlockqueue, lkb_deadlockq) { | |
10d56c87 | 516 | + struct dlm_lkb *kill_lkb; |
4bf12011 | 517 | + |
518 | + /* Only look at "due" locks */ | |
519 | + if (!check_timeout(lkb->lkb_duetime, dlm_config.deadlocktime)) | |
520 | + break; | |
521 | + | |
522 | + /* Don't look at locks that are in transition */ | |
523 | + if (!test_bit(LSFL_LS_RUN, | |
524 | + &lkb->lkb_resource->res_ls->ls_flags)) | |
525 | + continue; | |
526 | + | |
527 | + up(&_deadlockqueue_lock); | |
528 | + | |
529 | + /* Lock has hit due time, check for conversion deadlock */ | |
530 | + kill_lkb = conversion_deadlock_check(lkb); | |
531 | + if (kill_lkb) | |
532 | + cancel_conversion(kill_lkb, -EDEADLOCK); | |
533 | + | |
534 | + down(&_deadlockqueue_lock); | |
535 | + } | |
536 | + up(&_deadlockqueue_lock); | |
537 | +} | |
538 | + | |
539 | +static __inline__ int no_asts(void) | |
540 | +{ | |
541 | + int ret; | |
542 | + | |
543 | + down(&_ast_queue_lock); | |
544 | + ret = list_empty(&_ast_queue); | |
545 | + up(&_ast_queue_lock); | |
546 | + return ret; | |
547 | +} | |
548 | + | |
549 | +static void lockqueue_timer_fn(unsigned long arg) | |
550 | +{ | |
551 | + set_bit(GDLMD_WAKE_TIMER, &_astd_wakeflags); | |
552 | + wake_up(&_astd_waitchan); | |
553 | +} | |
554 | + | |
555 | +/* | |
556 | + * DLM daemon which delivers asts. | |
557 | + */ | |
558 | + | |
559 | +static int dlm_astd(void *data) | |
560 | +{ | |
561 | + daemonize("dlm_astd"); | |
562 | + | |
563 | + INIT_LIST_HEAD(&_lockqueue); | |
564 | + init_MUTEX(&_lockqueue_lock); | |
565 | + INIT_LIST_HEAD(&_deadlockqueue); | |
566 | + init_MUTEX(&_deadlockqueue_lock); | |
567 | + INIT_LIST_HEAD(&_ast_queue); | |
568 | + init_MUTEX(&_ast_queue_lock); | |
569 | + init_waitqueue_head(&_astd_waitchan); | |
570 | + complete(&_astd_done); | |
571 | + | |
572 | + /* | |
573 | + * Set a timer to check the lockqueue for dead locks (and deadlocks). | |
574 | + */ | |
575 | + | |
576 | + init_timer(&_lockqueue_timer); | |
577 | + _lockqueue_timer.function = lockqueue_timer_fn; | |
578 | + _lockqueue_timer.data = 0; | |
579 | + mod_timer(&_lockqueue_timer, | |
580 | + jiffies + ((dlm_config.lock_timeout >> 1) * HZ)); | |
581 | + | |
582 | + while (atomic_read(&_astd_running)) { | |
583 | + wchan_cond_sleep_intr(_astd_waitchan, no_asts()); | |
584 | + | |
585 | + if (test_and_clear_bit(GDLMD_WAKE_ASTS, &_astd_wakeflags)) | |
586 | + process_asts(); | |
587 | + | |
588 | + if (test_and_clear_bit(GDLMD_WAKE_TIMER, &_astd_wakeflags)) { | |
589 | + process_lockqueue(); | |
590 | + if (dlm_config.deadlocktime) | |
591 | + process_deadlockqueue(); | |
592 | + } | |
593 | + } | |
594 | + | |
595 | + if (timer_pending(&_lockqueue_timer)) | |
596 | + del_timer(&_lockqueue_timer); | |
597 | + | |
598 | + complete(&_astd_done); | |
599 | + | |
600 | + return 0; | |
601 | +} | |
602 | + | |
603 | +void wake_astd(void) | |
604 | +{ | |
605 | + set_bit(GDLMD_WAKE_ASTS, &_astd_wakeflags); | |
606 | + wake_up(&_astd_waitchan); | |
607 | +} | |
608 | + | |
609 | +int astd_start() | |
610 | +{ | |
611 | + init_completion(&_astd_done); | |
612 | + atomic_set(&_astd_running, 1); | |
613 | + _astd_pid = kernel_thread(dlm_astd, NULL, 0); | |
614 | + wait_for_completion(&_astd_done); | |
615 | + return 0; | |
616 | +} | |
617 | + | |
618 | +void astd_stop() | |
619 | +{ | |
620 | + atomic_set(&_astd_running, 0); | |
621 | + wake_astd(); | |
622 | + wait_for_completion(&_astd_done); | |
623 | +} | |
624 | diff -urN linux-orig/cluster/dlm/ast.h linux-patched/cluster/dlm/ast.h | |
625 | --- linux-orig/cluster/dlm/ast.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 626 | +++ linux-patched/cluster/dlm/ast.h 2004-07-13 18:57:22.000000000 +0800 |
5cdbd17b | 627 | @@ -0,0 +1,28 @@ |
4bf12011 | 628 | +/****************************************************************************** |
629 | +******************************************************************************* | |
630 | +** | |
631 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
632 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
633 | +** | |
634 | +** This copyrighted material is made available to anyone wishing to use, | |
635 | +** modify, copy, or redistribute it subject to the terms and conditions | |
636 | +** of the GNU General Public License v.2. | |
637 | +** | |
638 | +******************************************************************************* | |
639 | +******************************************************************************/ | |
640 | + | |
641 | +#ifndef __AST_DOT_H__ | |
642 | +#define __AST_DOT_H__ | |
643 | + | |
10d56c87 AM |
644 | +void lockqueue_lkb_mark(struct dlm_ls *ls); |
645 | +int resend_cluster_requests(struct dlm_ls *ls); | |
646 | +void add_to_lockqueue(struct dlm_lkb *lkb); | |
647 | +void remove_from_lockqueue(struct dlm_lkb *lkb); | |
648 | +void add_to_deadlockqueue(struct dlm_lkb *lkb); | |
649 | +void remove_from_deadlockqueue(struct dlm_lkb *lkb); | |
650 | +void queue_ast(struct dlm_lkb *lkb, uint16_t astflags, uint8_t rqmode); | |
4bf12011 | 651 | +void wake_astd(void); |
652 | +int astd_start(void); | |
653 | +void astd_stop(void); | |
654 | + | |
655 | +#endif /* __AST_DOT_H__ */ | |
656 | diff -urN linux-orig/cluster/dlm/config.c linux-patched/cluster/dlm/config.c | |
657 | --- linux-orig/cluster/dlm/config.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
658 | +++ linux-patched/cluster/dlm/config.c 2004-07-13 18:57:22.000000000 +0800 |
659 | @@ -0,0 +1,131 @@ | |
4bf12011 | 660 | +/****************************************************************************** |
661 | +******************************************************************************* | |
662 | +** | |
663 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
664 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
665 | +** | |
666 | +** This copyrighted material is made available to anyone wishing to use, | |
667 | +** modify, copy, or redistribute it subject to the terms and conditions | |
668 | +** of the GNU General Public License v.2. | |
669 | +** | |
670 | +******************************************************************************* | |
671 | +******************************************************************************/ | |
672 | + | |
673 | +#include <linux/module.h> | |
674 | +#include <linux/proc_fs.h> | |
675 | + | |
676 | +#include "dlm_internal.h" | |
677 | +#include "lowcomms.h" | |
678 | +#include "config.h" | |
679 | + | |
680 | +/* Config file defaults */ | |
681 | +#define DEFAULT_TCP_PORT 21064 | |
682 | +#define DEFAULT_LOCK_TIMEOUT 30 | |
683 | +#define DEFAULT_BUFFER_SIZE 4096 | |
10d56c87 AM |
684 | +#define DEFAULT_RSBTBL_SIZE 256 |
685 | +#define DEFAULT_LKBTBL_SIZE 1024 | |
686 | +#define DEFAULT_DIRTBL_SIZE 512 | |
4bf12011 | 687 | +#define DEFAULT_MAX_CONNECTIONS 128 |
688 | +#define DEFAULT_DEADLOCKTIME 10 | |
689 | + | |
690 | +struct config_info dlm_config = { | |
691 | + .tcp_port = DEFAULT_TCP_PORT, | |
692 | + .lock_timeout = DEFAULT_LOCK_TIMEOUT, | |
693 | + .buffer_size = DEFAULT_BUFFER_SIZE, | |
10d56c87 AM |
694 | + .rsbtbl_size = DEFAULT_RSBTBL_SIZE, |
695 | + .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | |
696 | + .dirtbl_size = DEFAULT_DIRTBL_SIZE, | |
4bf12011 | 697 | + .max_connections = DEFAULT_MAX_CONNECTIONS, |
698 | + .deadlocktime = DEFAULT_DEADLOCKTIME, | |
699 | +}; | |
700 | + | |
701 | + | |
702 | +static struct config_proc_info { | |
703 | + char *name; | |
704 | + int *value; | |
705 | +} config_proc[] = { | |
706 | + { | |
707 | + .name = "tcp_port", | |
708 | + .value = &dlm_config.tcp_port, | |
709 | + }, | |
710 | + { | |
711 | + .name = "lock_timeout", | |
712 | + .value = &dlm_config.lock_timeout, | |
713 | + }, | |
714 | + { | |
715 | + .name = "buffer_size", | |
716 | + .value = &dlm_config.buffer_size, | |
717 | + }, | |
718 | + { | |
10d56c87 AM |
719 | + .name = "rsbtbl_size", |
720 | + .value = &dlm_config.rsbtbl_size, | |
4bf12011 | 721 | + }, |
722 | + { | |
10d56c87 AM |
723 | + .name = "lkbtbl_size", |
724 | + .value = &dlm_config.lkbtbl_size, | |
725 | + }, | |
726 | + { | |
727 | + .name = "dirtbl_size", | |
728 | + .value = &dlm_config.dirtbl_size, | |
4bf12011 | 729 | + }, |
730 | + { | |
731 | + .name = "max_connections", | |
732 | + .value = &dlm_config.max_connections, | |
733 | + }, | |
734 | + { | |
735 | + .name = "deadlocktime", | |
736 | + .value = &dlm_config.deadlocktime, | |
10d56c87 | 737 | + } |
4bf12011 | 738 | +}; |
739 | +static struct proc_dir_entry *dlm_dir; | |
740 | + | |
741 | +static int dlm_config_read_proc(char *page, char **start, off_t off, int count, | |
742 | + int *eof, void *data) | |
743 | +{ | |
744 | + struct config_proc_info *cinfo = data; | |
745 | + return snprintf(page, count, "%d\n", *cinfo->value); | |
746 | +} | |
747 | + | |
748 | +static int dlm_config_write_proc(struct file *file, const char *buffer, | |
749 | + unsigned long count, void *data) | |
750 | +{ | |
751 | + struct config_proc_info *cinfo = data; | |
752 | + int value; | |
753 | + char *end; | |
754 | + | |
755 | + value = simple_strtoul(buffer, &end, 10); | |
756 | + if (*end) | |
757 | + *cinfo->value = value; | |
758 | + return count; | |
759 | +} | |
760 | + | |
761 | +int dlm_config_init(void) | |
762 | +{ | |
763 | + int i; | |
764 | + struct proc_dir_entry *pde; | |
765 | + | |
766 | + dlm_dir = proc_mkdir("cluster/config/dlm", 0); | |
767 | + if (!dlm_dir) | |
768 | + return -1; | |
769 | + | |
770 | + dlm_dir->owner = THIS_MODULE; | |
771 | + | |
772 | + for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) { | |
773 | + pde = create_proc_entry(config_proc[i].name, 0660, dlm_dir); | |
774 | + if (pde) { | |
775 | + pde->data = &config_proc[i]; | |
776 | + pde->write_proc = dlm_config_write_proc; | |
777 | + pde->read_proc = dlm_config_read_proc; | |
778 | + } | |
779 | + } | |
780 | + return 0; | |
781 | +} | |
782 | + | |
783 | +void dlm_config_exit(void) | |
784 | +{ | |
785 | + int i; | |
786 | + | |
787 | + for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) | |
788 | + remove_proc_entry(config_proc[i].name, dlm_dir); | |
789 | + remove_proc_entry("cluster/config/dlm", NULL); | |
790 | +} | |
791 | diff -urN linux-orig/cluster/dlm/config.h linux-patched/cluster/dlm/config.h | |
792 | --- linux-orig/cluster/dlm/config.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
793 | +++ linux-patched/cluster/dlm/config.h 2004-07-13 18:57:22.000000000 +0800 |
794 | @@ -0,0 +1,32 @@ | |
4bf12011 | 795 | +/****************************************************************************** |
796 | +******************************************************************************* | |
797 | +** | |
798 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
799 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
800 | +** | |
801 | +** This copyrighted material is made available to anyone wishing to use, | |
802 | +** modify, copy, or redistribute it subject to the terms and conditions | |
803 | +** of the GNU General Public License v.2. | |
804 | +** | |
805 | +******************************************************************************* | |
806 | +******************************************************************************/ | |
807 | + | |
808 | +#ifndef __CONFIG_DOT_H__ | |
809 | +#define __CONFIG_DOT_H__ | |
810 | + | |
811 | +struct config_info { | |
812 | + int tcp_port; | |
813 | + int lock_timeout; | |
814 | + int buffer_size; | |
10d56c87 AM |
815 | + int rsbtbl_size; |
816 | + int lkbtbl_size; | |
817 | + int dirtbl_size; | |
4bf12011 | 818 | + int max_connections; |
819 | + int deadlocktime; | |
820 | +}; | |
821 | + | |
822 | +extern struct config_info dlm_config; | |
823 | +extern int dlm_config_init(void); | |
824 | +extern void dlm_config_exit(void); | |
825 | + | |
826 | +#endif /* __CONFIG_DOT_H__ */ | |
827 | diff -urN linux-orig/cluster/dlm/device.c linux-patched/cluster/dlm/device.c | |
828 | --- linux-orig/cluster/dlm/device.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 829 | +++ linux-patched/cluster/dlm/device.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 830 | @@ -0,0 +1,1020 @@ |
831 | +/****************************************************************************** | |
832 | +******************************************************************************* | |
833 | +** | |
834 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
835 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
836 | +** | |
837 | +** This copyrighted material is made available to anyone wishing to use, | |
838 | +** modify, copy, or redistribute it subject to the terms and conditions | |
839 | +** of the GNU General Public License v.2. | |
840 | +** | |
841 | +******************************************************************************* | |
842 | +******************************************************************************/ | |
843 | + | |
844 | +/* | |
845 | + * device.c | |
846 | + * | |
847 | + * This is the userland interface to the DLM. | |
848 | + * | |
849 | + * The locking is done via a misc char device (find the | |
850 | + * registered minor number in /proc/misc). | |
851 | + * | |
852 | + * User code should not use this interface directly but | |
853 | + * call the library routines in libdlm.a instead. | |
854 | + * | |
855 | + */ | |
856 | + | |
857 | +#include <linux/miscdevice.h> | |
858 | +#include <linux/init.h> | |
859 | +#include <linux/wait.h> | |
860 | +#include <linux/module.h> | |
861 | +#include <linux/file.h> | |
862 | +#include <linux/fs.h> | |
863 | +#include <linux/poll.h> | |
864 | +#include <linux/signal.h> | |
865 | +#include <linux/spinlock.h> | |
866 | +#include <asm/ioctls.h> | |
867 | + | |
868 | +#include "dlm_internal.h" | |
869 | +#include "device.h" | |
870 | + | |
10d56c87 | 871 | +extern struct dlm_lkb *dlm_get_lkb(struct dlm_ls *, int); |
4bf12011 | 872 | +static struct file_operations _dlm_fops; |
873 | +static const char *name_prefix="dlm"; | |
874 | +static struct list_head user_ls_list; | |
875 | + | |
876 | +/* Flags in li_flags */ | |
877 | +#define LI_FLAG_COMPLETE 1 | |
878 | +#define LI_FLAG_FIRSTLOCK 2 | |
879 | + | |
880 | +struct lock_info { | |
881 | + uint8_t li_cmd; | |
882 | + struct dlm_lksb li_lksb; | |
883 | + wait_queue_head_t li_waitq; | |
884 | + unsigned long li_flags; | |
885 | + void __user *li_astparam; | |
886 | + void __user *li_astaddr; | |
887 | + void __user *li_bastaddr; | |
888 | + struct file_info *li_file; | |
889 | + struct dlm_lksb __user *li_user_lksb; | |
890 | + struct semaphore li_firstlock; | |
891 | + struct dlm_queryinfo *li_queryinfo; | |
892 | + struct dlm_queryinfo __user *li_user_queryinfo; | |
893 | +}; | |
894 | + | |
895 | +/* A queued AST no less */ | |
896 | +struct ast_info { | |
897 | + struct dlm_lock_result result; | |
898 | + struct dlm_queryinfo *queryinfo; | |
899 | + struct dlm_queryinfo __user *user_queryinfo; | |
900 | + struct list_head list; | |
901 | +}; | |
902 | + | |
903 | +/* One of these per userland lockspace */ | |
904 | +struct user_ls { | |
905 | + void *ls_lockspace; | |
906 | + atomic_t ls_refcnt; | |
907 | + long ls_flags; /* bit 1 means LS has been deleted */ | |
908 | + | |
909 | + /* Passed into misc_register() */ | |
910 | + struct miscdevice ls_miscinfo; | |
911 | + struct list_head ls_list; | |
912 | +}; | |
913 | + | |
914 | +/* misc_device info for the control device */ | |
915 | +static struct miscdevice ctl_device; | |
916 | + | |
917 | +/* | |
918 | + * Stuff we hang off the file struct. | |
919 | + * The first two are to cope with unlocking all the | |
920 | + * locks help by a process when it dies. | |
921 | + */ | |
922 | +struct file_info { | |
923 | + struct list_head fi_lkb_list; /* List of active lkbs */ | |
924 | + spinlock_t fi_lkb_lock; | |
925 | + struct list_head fi_ast_list; /* Queue of ASTs to be delivered */ | |
926 | + spinlock_t fi_ast_lock; | |
927 | + wait_queue_head_t fi_wait; | |
928 | + struct user_ls *fi_ls; | |
929 | + atomic_t fi_refcnt; /* Number of users */ | |
930 | + unsigned long fi_flags; /* Bit 1 means the device is open */ | |
931 | +}; | |
932 | + | |
933 | + | |
934 | +/* get and put ops for file_info. | |
935 | + Actually I don't really like "get" and "put", but everyone | |
936 | + else seems to use them and I can't think of anything | |
937 | + nicer at the moment */ | |
938 | +static void get_file_info(struct file_info *f) | |
939 | +{ | |
940 | + atomic_inc(&f->fi_refcnt); | |
941 | +} | |
942 | + | |
943 | +static void put_file_info(struct file_info *f) | |
944 | +{ | |
945 | + if (atomic_dec_and_test(&f->fi_refcnt)) | |
946 | + kfree(f); | |
947 | +} | |
948 | + | |
949 | +/* Find a lockspace struct given the device minor number */ | |
950 | +static struct user_ls *find_lockspace(int minor) | |
951 | +{ | |
952 | + struct user_ls *lsinfo; | |
953 | + | |
954 | + list_for_each_entry(lsinfo, &user_ls_list, ls_list) { | |
955 | + | |
956 | + if (lsinfo->ls_miscinfo.minor == minor) | |
957 | + return lsinfo; | |
958 | + } | |
959 | + return NULL; | |
960 | +} | |
961 | + | |
962 | +static void add_lockspace_to_list(struct user_ls *lsinfo) | |
963 | +{ | |
964 | + list_add(&lsinfo->ls_list, &user_ls_list); | |
965 | +} | |
966 | + | |
967 | +/* Register a lockspace with the DLM and create a misc | |
968 | + device for userland to access it */ | |
969 | +static int register_lockspace(char *name, struct user_ls **ls) | |
970 | +{ | |
971 | + struct user_ls *newls; | |
972 | + int status; | |
973 | + int namelen; | |
974 | + | |
975 | + namelen = strlen(name)+strlen(name_prefix)+2; | |
976 | + | |
977 | + newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL); | |
978 | + if (!newls) | |
979 | + return -ENOMEM; | |
980 | + memset(newls, 0, sizeof(struct user_ls)); | |
981 | + | |
982 | + newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL); | |
983 | + if (!newls->ls_miscinfo.name) { | |
984 | + kfree(newls); | |
985 | + return -ENOMEM; | |
986 | + } | |
987 | + snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", name_prefix, name); | |
988 | + | |
989 | + status = dlm_new_lockspace((char *)newls->ls_miscinfo.name+strlen(name_prefix)+1, | |
990 | + strlen(newls->ls_miscinfo.name) - strlen(name_prefix) - 1, | |
991 | + &newls->ls_lockspace, 0); | |
992 | + | |
993 | + if (status != 0) { | |
994 | + kfree(newls->ls_miscinfo.name); | |
995 | + kfree(newls); | |
996 | + return status; | |
997 | + } | |
998 | + | |
999 | + newls->ls_miscinfo.fops = &_dlm_fops; | |
1000 | + newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR; | |
1001 | + | |
1002 | + status = misc_register(&newls->ls_miscinfo); | |
1003 | + if (status) { | |
1004 | + log_print("failed to register misc device for %s", name); | |
1005 | + dlm_release_lockspace(newls->ls_lockspace, 0); | |
1006 | + kfree(newls->ls_miscinfo.name); | |
1007 | + kfree(newls); | |
1008 | + return status; | |
1009 | + } | |
1010 | + | |
1011 | + | |
1012 | + add_lockspace_to_list(newls); | |
1013 | + *ls = newls; | |
1014 | + return 0; | |
1015 | +} | |
1016 | + | |
1017 | +static int unregister_lockspace(struct user_ls *lsinfo, int force) | |
1018 | +{ | |
1019 | + int status; | |
1020 | + | |
1021 | + status = dlm_release_lockspace(lsinfo->ls_lockspace, force); | |
1022 | + if (status) | |
1023 | + return status; | |
1024 | + | |
1025 | + status = misc_deregister(&lsinfo->ls_miscinfo); | |
1026 | + if (status) | |
1027 | + return status; | |
1028 | + | |
1029 | + list_del(&lsinfo->ls_list); | |
1030 | + kfree(lsinfo->ls_miscinfo.name); | |
1031 | + kfree(lsinfo); | |
1032 | + | |
1033 | + return 0; | |
1034 | +} | |
1035 | + | |
1036 | +/* Add it to userland's AST queue */ | |
1037 | +static void add_to_astqueue(struct lock_info *li, void *astaddr) | |
1038 | +{ | |
1039 | + struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL); | |
1040 | + if (!ast) | |
1041 | + return; | |
1042 | + | |
1043 | + ast->result.astparam = li->li_astparam; | |
1044 | + ast->result.astaddr = astaddr; | |
1045 | + ast->result.user_lksb = li->li_user_lksb; | |
1046 | + ast->result.cmd = li->li_cmd; | |
1047 | + memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb)); | |
1048 | + | |
1049 | + /* These two will both be NULL for anything other than queries */ | |
1050 | + ast->queryinfo = li->li_queryinfo; | |
1051 | + ast->user_queryinfo = li->li_user_queryinfo; | |
1052 | + | |
1053 | + spin_lock(&li->li_file->fi_ast_lock); | |
1054 | + list_add_tail(&ast->list, &li->li_file->fi_ast_list); | |
1055 | + spin_unlock(&li->li_file->fi_ast_lock); | |
1056 | + wake_up_interruptible(&li->li_file->fi_wait); | |
1057 | +} | |
1058 | + | |
1059 | +static void bast_routine(void *param, int mode) | |
1060 | +{ | |
1061 | + struct lock_info *li = param; | |
1062 | + | |
1063 | + if (param) { | |
1064 | + add_to_astqueue(li, li->li_bastaddr); | |
1065 | + } | |
1066 | +} | |
1067 | + | |
1068 | +/* | |
1069 | + * This is the kernel's AST routine. | |
1070 | + * All lock, unlock & query operations complete here. | |
1071 | + * The only syncronous ops are those done during device close. | |
1072 | + */ | |
1073 | +static void ast_routine(void *param) | |
1074 | +{ | |
1075 | + struct lock_info *li = param; | |
1076 | + | |
1077 | + /* Param may be NULL if a persistent lock is unlocked by someone else */ | |
1078 | + if (!param) | |
1079 | + return; | |
1080 | + | |
1081 | + /* If it's an async request then post data to the user's AST queue. */ | |
1082 | + if (li->li_astaddr) { | |
1083 | + | |
1084 | + /* Only queue AST if the device is still open */ | |
1085 | + if (test_bit(1, &li->li_file->fi_flags)) | |
1086 | + add_to_astqueue(li, li->li_astaddr); | |
1087 | + | |
1088 | + /* If it's a new lock operation that failed, then | |
1089 | + * remove it from the owner queue and free the | |
1090 | + * lock_info. The DLM will not free the LKB until this | |
1091 | + * AST has completed. | |
1092 | + */ | |
1093 | + if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | |
1094 | + li->li_lksb.sb_status != 0) { | |
10d56c87 | 1095 | + struct dlm_lkb *lkb; |
4bf12011 | 1096 | + |
1097 | + /* Wait till dlm_lock() has finished */ | |
1098 | + down(&li->li_firstlock); | |
1099 | + lkb = dlm_get_lkb(li->li_file->fi_ls->ls_lockspace, li->li_lksb.sb_lkid); | |
1100 | + if (lkb) { | |
1101 | + spin_lock(&li->li_file->fi_lkb_lock); | |
1102 | + list_del(&lkb->lkb_ownerqueue); | |
1103 | + spin_unlock(&li->li_file->fi_lkb_lock); | |
1104 | + } | |
1105 | + up(&li->li_firstlock); | |
1106 | + put_file_info(li->li_file); | |
1107 | + kfree(li); | |
1108 | + return; | |
1109 | + } | |
1110 | + /* Free unlocks & queries */ | |
1111 | + if (li->li_lksb.sb_status == -DLM_EUNLOCK || | |
1112 | + li->li_cmd == DLM_USER_QUERY) { | |
1113 | + put_file_info(li->li_file); | |
1114 | + kfree(li); | |
1115 | + } | |
1116 | + } | |
1117 | + else { | |
1118 | + /* Syncronous request, just wake up the caller */ | |
1119 | + set_bit(LI_FLAG_COMPLETE, &li->li_flags); | |
1120 | + wake_up_interruptible(&li->li_waitq); | |
1121 | + } | |
1122 | +} | |
1123 | + | |
1124 | +/* | |
1125 | + * Wait for the lock op to complete and return the status. | |
1126 | + */ | |
1127 | +static int wait_for_ast(struct lock_info *li) | |
1128 | +{ | |
1129 | + /* Wait for the AST routine to complete */ | |
1130 | + set_task_state(current, TASK_INTERRUPTIBLE); | |
1131 | + while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags)) | |
1132 | + schedule(); | |
1133 | + | |
1134 | + set_task_state(current, TASK_RUNNING); | |
1135 | + | |
1136 | + return li->li_lksb.sb_status; | |
1137 | +} | |
1138 | + | |
1139 | + | |
1140 | +/* Open on control device */ | |
1141 | +static int dlm_ctl_open(struct inode *inode, struct file *file) | |
1142 | +{ | |
1143 | + return 0; | |
1144 | +} | |
1145 | + | |
1146 | +/* Close on control device */ | |
1147 | +static int dlm_ctl_close(struct inode *inode, struct file *file) | |
1148 | +{ | |
1149 | + return 0; | |
1150 | +} | |
1151 | + | |
1152 | +/* Open on lockspace device */ | |
1153 | +static int dlm_open(struct inode *inode, struct file *file) | |
1154 | +{ | |
1155 | + struct file_info *f; | |
1156 | + struct user_ls *lsinfo; | |
1157 | + | |
1158 | + lsinfo = find_lockspace(iminor(inode)); | |
1159 | + if (!lsinfo) | |
1160 | + return -ENOENT; | |
1161 | + | |
1162 | + f = kmalloc(sizeof(struct file_info), GFP_KERNEL); | |
1163 | + if (!f) | |
1164 | + return -ENOMEM; | |
1165 | + | |
1166 | + atomic_inc(&lsinfo->ls_refcnt); | |
1167 | + INIT_LIST_HEAD(&f->fi_lkb_list); | |
1168 | + INIT_LIST_HEAD(&f->fi_ast_list); | |
1169 | + spin_lock_init(&f->fi_ast_lock); | |
1170 | + spin_lock_init(&f->fi_lkb_lock); | |
1171 | + init_waitqueue_head(&f->fi_wait); | |
1172 | + f->fi_ls = lsinfo; | |
1173 | + atomic_set(&f->fi_refcnt, 1); | |
1174 | + set_bit(1, &f->fi_flags); | |
1175 | + | |
1176 | + file->private_data = f; | |
1177 | + | |
1178 | + return 0; | |
1179 | +} | |
1180 | + | |
1181 | +/* Check the user's version matches ours */ | |
1182 | +static int check_version(struct dlm_lock_params *params) | |
1183 | +{ | |
1184 | + if (params->version[0] != DLM_DEVICE_VERSION_MAJOR || | |
1185 | + (params->version[0] == DLM_DEVICE_VERSION_MAJOR && | |
1186 | + params->version[1] > DLM_DEVICE_VERSION_MINOR)) { | |
1187 | + | |
1188 | + log_print("version mismatch user (%d.%d.%d) kernel (%d.%d.%d)", | |
1189 | + params->version[0], | |
1190 | + params->version[1], | |
1191 | + params->version[2], | |
1192 | + DLM_DEVICE_VERSION_MAJOR, | |
1193 | + DLM_DEVICE_VERSION_MINOR, | |
1194 | + DLM_DEVICE_VERSION_PATCH); | |
1195 | + return -EINVAL; | |
1196 | + } | |
1197 | + return 0; | |
1198 | +} | |
1199 | + | |
1200 | +/* Close on lockspace device */ | |
1201 | +static int dlm_close(struct inode *inode, struct file *file) | |
1202 | +{ | |
1203 | + struct file_info *f = file->private_data; | |
1204 | + struct lock_info li; | |
1205 | + sigset_t tmpsig; | |
1206 | + sigset_t allsigs; | |
10d56c87 | 1207 | + struct dlm_lkb *lkb, *safe; |
4bf12011 | 1208 | + struct user_ls *lsinfo; |
1209 | + DECLARE_WAITQUEUE(wq, current); | |
1210 | + | |
1211 | + lsinfo = find_lockspace(iminor(inode)); | |
1212 | + if (!lsinfo) | |
1213 | + return -ENOENT; | |
1214 | + | |
1215 | + /* Mark this closed so that ASTs will not be delivered any more */ | |
1216 | + clear_bit(1, &f->fi_flags); | |
1217 | + | |
1218 | + /* Block signals while we are doing this */ | |
1219 | + sigfillset(&allsigs); | |
1220 | + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | |
1221 | + | |
1222 | + /* We use our own lock_info struct here, so that any | |
1223 | + * outstanding "real" ASTs will be delivered with the | |
1224 | + * corresponding "real" params, thus freeing the lock_info | |
1225 | + * that belongs the lock. This catches the corner case where | |
1226 | + * a lock is BUSY when we try to unlock it here | |
1227 | + */ | |
1228 | + memset(&li, 0, sizeof(li)); | |
1229 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | |
1230 | + init_waitqueue_head(&li.li_waitq); | |
1231 | + add_wait_queue(&li.li_waitq, &wq); | |
1232 | + | |
1233 | + /* | |
1234 | + * Free any outstanding locks, they are on the | |
1235 | + * list in LIFO order so there should be no problems | |
1236 | + * about unlocking parents before children. | |
1237 | + * Although we don't remove the lkbs from the list here | |
1238 | + * (what would be the point?), foreach_safe is needed | |
1239 | + * because the lkbs are freed during dlm_unlock operations | |
1240 | + */ | |
1241 | + list_for_each_entry_safe(lkb, safe, &f->fi_lkb_list, lkb_ownerqueue) { | |
1242 | + int status; | |
1243 | + int lock_status; | |
1244 | + int flags = 0; | |
1245 | + struct lock_info *old_li; | |
1246 | + | |
1247 | + /* Make a copy of this pointer. If all goes well we will | |
1248 | + * free it later. if not it will be left to the AST routine | |
1249 | + * to tidy up | |
1250 | + */ | |
1251 | + old_li = (struct lock_info *)lkb->lkb_astparam; | |
1252 | + | |
1253 | + /* Don't unlock persistent locks */ | |
1254 | + if (lkb->lkb_flags & GDLM_LKFLG_PERSISTENT) { | |
1255 | + list_del(&lkb->lkb_ownerqueue); | |
1256 | + | |
1257 | + /* But tidy our references in it */ | |
1258 | + kfree(old_li); | |
1259 | + lkb->lkb_astparam = (long)NULL; | |
1260 | + put_file_info(f); | |
1261 | + continue; | |
1262 | + } | |
1263 | + | |
1264 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | |
1265 | + | |
1266 | + /* If it's not granted then cancel the request. | |
1267 | + * If the lock was WAITING then it will be dropped, | |
1268 | + * if it was converting then it will be reverted to GRANTED, | |
1269 | + * then we will unlock it. | |
1270 | + */ | |
1271 | + lock_status = lkb->lkb_status; | |
1272 | + | |
1273 | + if (lock_status != GDLM_LKSTS_GRANTED) | |
1274 | + flags = DLM_LKF_CANCEL; | |
1275 | + | |
1276 | + status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, flags, &li.li_lksb, &li); | |
1277 | + | |
1278 | + /* Must wait for it to complete as the next lock could be its | |
1279 | + * parent */ | |
1280 | + if (status == 0) | |
1281 | + wait_for_ast(&li); | |
1282 | + | |
1283 | + /* If it was waiting for a conversion, it will | |
1284 | + now be granted so we can unlock it properly */ | |
1285 | + if (lock_status == GDLM_LKSTS_CONVERT) { | |
1286 | + | |
1287 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | |
1288 | + status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, 0, &li.li_lksb, &li); | |
1289 | + | |
1290 | + if (status == 0) | |
1291 | + wait_for_ast(&li); | |
1292 | + } | |
1293 | + /* Unlock suceeded, free the lock_info struct. */ | |
1294 | + if (status == 0) { | |
1295 | + kfree(old_li); | |
1296 | + put_file_info(f); | |
1297 | + } | |
1298 | + } | |
1299 | + | |
1300 | + remove_wait_queue(&li.li_waitq, &wq); | |
1301 | + | |
1302 | + /* If this is the last reference, and the lockspace has been deleted | |
1303 | + the free the struct */ | |
1304 | + if (atomic_dec_and_test(&lsinfo->ls_refcnt) && !lsinfo->ls_lockspace) { | |
1305 | + kfree(lsinfo); | |
1306 | + } | |
1307 | + | |
1308 | + /* Restore signals */ | |
1309 | + sigprocmask(SIG_SETMASK, &tmpsig, NULL); | |
1310 | + recalc_sigpending(); | |
1311 | + | |
1312 | + return 0; | |
1313 | +} | |
1314 | + | |
1315 | +/* | |
1316 | + * ioctls to create/remove lockspaces, and check how many | |
1317 | + * outstanding ASTs there are against a particular LS. | |
1318 | + */ | |
1319 | +static int dlm_ioctl(struct inode *inode, struct file *file, | |
1320 | + uint command, ulong u) | |
1321 | +{ | |
1322 | + struct file_info *fi = file->private_data; | |
1323 | + int status = -EINVAL; | |
1324 | + int count; | |
1325 | + struct list_head *tmp_list; | |
1326 | + | |
1327 | + switch (command) { | |
1328 | + | |
1329 | + /* Are there any ASTs for us to read? | |
1330 | + * Warning, this returns the number of messages (ASTs) | |
1331 | + * in the queue, NOT the number of bytes to read | |
1332 | + */ | |
1333 | + case FIONREAD: | |
1334 | + count = 0; | |
1335 | + spin_lock(&fi->fi_ast_lock); | |
1336 | + list_for_each(tmp_list, &fi->fi_ast_list) | |
1337 | + count++; | |
1338 | + spin_unlock(&fi->fi_ast_lock); | |
1339 | + status = put_user(count, (int *)u); | |
1340 | + break; | |
1341 | + | |
1342 | + default: | |
1343 | + return -ENOTTY; | |
1344 | + } | |
1345 | + | |
1346 | + return status; | |
1347 | +} | |
1348 | + | |
1349 | +/* | |
1350 | + * ioctls to create/remove lockspaces. | |
1351 | + */ | |
1352 | +static int dlm_ctl_ioctl(struct inode *inode, struct file *file, | |
1353 | + uint command, ulong u) | |
1354 | +{ | |
1355 | + int status = -EINVAL; | |
1356 | + char ls_name[MAX_LS_NAME_LEN]; | |
1357 | + struct user_ls *lsinfo; | |
1358 | + int force = 0; | |
1359 | + | |
1360 | + switch (command) { | |
1361 | + case DLM_CREATE_LOCKSPACE: | |
1362 | + if (!capable(CAP_SYS_ADMIN)) | |
1363 | + return -EPERM; | |
1364 | + | |
1365 | + if (strncpy_from_user(ls_name, (char*)u, MAX_LS_NAME_LEN) < 0) | |
1366 | + return -EFAULT; | |
1367 | + status = register_lockspace(ls_name, &lsinfo); | |
1368 | + | |
1369 | + /* If it succeeded then return the minor number */ | |
1370 | + if (status == 0) | |
1371 | + status = lsinfo->ls_miscinfo.minor; | |
1372 | + break; | |
1373 | + | |
1374 | + case DLM_FORCE_RELEASE_LOCKSPACE: | |
1375 | + force = 2; | |
1376 | + | |
1377 | + case DLM_RELEASE_LOCKSPACE: | |
1378 | + if (!capable(CAP_SYS_ADMIN)) | |
1379 | + return -EPERM; | |
1380 | + | |
1381 | + lsinfo = find_lockspace(u); | |
1382 | + if (!lsinfo) | |
1383 | + return -EINVAL; | |
1384 | + status = unregister_lockspace(lsinfo, force); | |
1385 | + break; | |
1386 | + | |
1387 | + default: | |
1388 | + return -ENOTTY; | |
1389 | + } | |
1390 | + | |
1391 | + return status; | |
1392 | +} | |
1393 | + | |
1394 | +/* Deal with the messy stuff of copying a web of structs | |
1395 | + from kernel space to userspace */ | |
1396 | +static int copy_query_result(struct ast_info *ast) | |
1397 | +{ | |
1398 | + int status = -EFAULT; | |
1399 | + struct dlm_queryinfo qi; | |
1400 | + | |
1401 | + /* Get the pointers to userspace structs */ | |
1402 | + if (copy_from_user(&qi, ast->user_queryinfo, | |
1403 | + sizeof(struct dlm_queryinfo))) | |
1404 | + goto copy_out; | |
1405 | + | |
1406 | + /* TODO: does this deref a user pointer? */ | |
1407 | + if (put_user(ast->queryinfo->gqi_lockcount, | |
1408 | + &ast->user_queryinfo->gqi_lockcount)) | |
1409 | + goto copy_out; | |
1410 | + | |
1411 | + if (qi.gqi_resinfo) { | |
1412 | + if (copy_to_user(qi.gqi_resinfo, ast->queryinfo->gqi_resinfo, | |
1413 | + sizeof(struct dlm_resinfo))) | |
1414 | + goto copy_out; | |
1415 | + } | |
1416 | + | |
1417 | + if (qi.gqi_lockinfo) { | |
1418 | + if (copy_to_user(qi.gqi_lockinfo, ast->queryinfo->gqi_lockinfo, | |
1419 | + sizeof(struct dlm_lockinfo) * ast->queryinfo->gqi_lockcount)) | |
1420 | + goto copy_out; | |
1421 | + } | |
1422 | + | |
1423 | + status = 0; | |
1424 | + | |
1425 | + if (ast->queryinfo->gqi_lockinfo) | |
1426 | + kfree(ast->queryinfo->gqi_lockinfo); | |
1427 | + | |
1428 | + if (ast->queryinfo->gqi_resinfo) | |
1429 | + kfree(ast->queryinfo->gqi_resinfo); | |
1430 | + | |
1431 | + kfree(ast->queryinfo); | |
1432 | + | |
1433 | + copy_out: | |
1434 | + return status; | |
1435 | +} | |
1436 | + | |
1437 | +/* Read call, might block if no ASTs are waiting. | |
1438 | + * It will only ever return one message at a time, regardless | |
1439 | + * of how many are pending. | |
1440 | + */ | |
1441 | +static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) | |
1442 | +{ | |
1443 | + struct file_info *fi = file->private_data; | |
1444 | + struct ast_info *ast; | |
1445 | + int ret; | |
1446 | + DECLARE_WAITQUEUE(wait, current); | |
1447 | + | |
1448 | + if (count < sizeof(struct dlm_lock_result)) | |
1449 | + return -EINVAL; | |
1450 | + | |
1451 | + spin_lock(&fi->fi_ast_lock); | |
1452 | + if (list_empty(&fi->fi_ast_list)) { | |
1453 | + | |
1454 | + /* No waiting ASTs. | |
1455 | + * Return EOF if the lockspace been deleted. | |
1456 | + */ | |
1457 | + if (test_bit(1, &fi->fi_ls->ls_flags)) | |
1458 | + return 0; | |
1459 | + | |
1460 | + if (file->f_flags & O_NONBLOCK) { | |
1461 | + spin_unlock(&fi->fi_ast_lock); | |
1462 | + return -EAGAIN; | |
1463 | + } | |
1464 | + | |
1465 | + add_wait_queue(&fi->fi_wait, &wait); | |
1466 | + | |
1467 | + repeat: | |
1468 | + set_current_state(TASK_INTERRUPTIBLE); | |
1469 | + if (list_empty(&fi->fi_ast_list) && | |
1470 | + !signal_pending(current)) { | |
1471 | + | |
1472 | + spin_unlock(&fi->fi_ast_lock); | |
1473 | + schedule(); | |
1474 | + spin_lock(&fi->fi_ast_lock); | |
1475 | + goto repeat; | |
1476 | + } | |
1477 | + | |
1478 | + current->state = TASK_RUNNING; | |
1479 | + remove_wait_queue(&fi->fi_wait, &wait); | |
1480 | + | |
1481 | + if (signal_pending(current)) { | |
1482 | + spin_unlock(&fi->fi_ast_lock); | |
1483 | + return -ERESTARTSYS; | |
1484 | + } | |
1485 | + } | |
1486 | + | |
1487 | + ast = list_entry(fi->fi_ast_list.next, struct ast_info, list); | |
1488 | + list_del(&ast->list); | |
1489 | + spin_unlock(&fi->fi_ast_lock); | |
1490 | + | |
1491 | + ret = sizeof(struct dlm_lock_result); | |
1492 | + if (copy_to_user(buffer, &ast->result, sizeof(struct dlm_lock_result))) | |
1493 | + ret = -EFAULT; | |
1494 | + | |
1495 | + /* If it was a query then copy the result block back here */ | |
1496 | + if (ast->queryinfo) { | |
1497 | + int status = copy_query_result(ast); | |
1498 | + if (status) | |
1499 | + ret = status; | |
1500 | + } | |
1501 | + | |
1502 | + kfree(ast); | |
1503 | + return ret; | |
1504 | +} | |
1505 | + | |
1506 | +static unsigned int dlm_poll(struct file *file, poll_table *wait) | |
1507 | +{ | |
1508 | + struct file_info *fi = file->private_data; | |
1509 | + | |
1510 | + poll_wait(file, &fi->fi_wait, wait); | |
1511 | + | |
1512 | + spin_lock(&fi->fi_ast_lock); | |
1513 | + if (!list_empty(&fi->fi_ast_list)) { | |
1514 | + spin_unlock(&fi->fi_ast_lock); | |
1515 | + return POLLIN | POLLRDNORM; | |
1516 | + } | |
1517 | + | |
1518 | + spin_unlock(&fi->fi_ast_lock); | |
1519 | + return 0; | |
1520 | +} | |
1521 | + | |
1522 | +static int do_user_query(struct file_info *fi, struct dlm_lock_params *kparams) | |
1523 | +{ | |
1524 | + struct lock_info *li; | |
1525 | + int status; | |
1526 | + | |
1527 | + li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); | |
1528 | + if (!li) | |
1529 | + return -ENOMEM; | |
1530 | + | |
1531 | + get_file_info(fi); | |
1532 | + li->li_user_lksb = kparams->lksb; | |
1533 | + li->li_astparam = kparams->astparam; | |
1534 | + li->li_bastaddr = kparams->bastaddr; | |
1535 | + li->li_astaddr = kparams->astaddr; | |
1536 | + li->li_file = fi; | |
1537 | + li->li_flags = 0; | |
1538 | + li->li_cmd = kparams->cmd; | |
1539 | + clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | |
1540 | + | |
1541 | + if (copy_from_user(&li->li_lksb, kparams->lksb, | |
1542 | + sizeof(struct dlm_lksb))) { | |
1543 | + kfree(li); | |
1544 | + return -EFAULT; | |
1545 | + } | |
1546 | + li->li_user_queryinfo = (struct dlm_queryinfo *)li->li_lksb.sb_lvbptr; | |
1547 | + | |
1548 | + /* Allocate query structs */ | |
1549 | + status = -ENOMEM; | |
1550 | + li->li_queryinfo = kmalloc(sizeof(struct dlm_queryinfo), GFP_KERNEL); | |
1551 | + if (!li->li_queryinfo) | |
1552 | + goto out1; | |
1553 | + | |
1554 | + /* Mainly to get gqi_lock buffer size */ | |
1555 | + if (copy_from_user(li->li_queryinfo, li->li_lksb.sb_lvbptr, | |
1556 | + sizeof(struct dlm_queryinfo))) { | |
1557 | + status = -EFAULT; | |
1558 | + goto out1; | |
1559 | + } | |
1560 | + | |
1561 | + /* Overwrite userspace pointers we just copied with kernel space ones */ | |
1562 | + if (li->li_queryinfo->gqi_resinfo) { | |
1563 | + li->li_queryinfo->gqi_resinfo = kmalloc(sizeof(struct dlm_resinfo), GFP_KERNEL); | |
1564 | + if (!li->li_queryinfo->gqi_resinfo) | |
1565 | + goto out1; | |
1566 | + } | |
1567 | + if (li->li_queryinfo->gqi_lockinfo) { | |
1568 | + li->li_queryinfo->gqi_lockinfo = | |
1569 | + kmalloc(sizeof(struct dlm_lockinfo) * li->li_queryinfo->gqi_locksize, | |
1570 | + GFP_KERNEL); | |
1571 | + if (!li->li_queryinfo->gqi_lockinfo) | |
1572 | + goto out2; | |
1573 | + } | |
1574 | + | |
1575 | + li->li_lksb.sb_lvbptr = (char *)li->li_queryinfo; | |
1576 | + | |
1577 | + return dlm_query(fi->fi_ls->ls_lockspace, &li->li_lksb, | |
1578 | + kparams->flags, /* query */ | |
1579 | + li->li_queryinfo, | |
1580 | + ast_routine, li); | |
1581 | + | |
1582 | + out2: | |
1583 | + kfree(li->li_queryinfo); | |
1584 | + | |
1585 | + out1: | |
1586 | + kfree(li); | |
1587 | + return status; | |
1588 | +} | |
1589 | + | |
1590 | +static int do_user_lock(struct file_info *fi, struct dlm_lock_params *kparams, | |
1591 | + const char *buffer) | |
1592 | +{ | |
1593 | + struct lock_info *li; | |
1594 | + int status; | |
1595 | + char name[DLM_RESNAME_MAXLEN]; | |
1596 | + | |
1597 | + /* | |
1598 | + * Validate things that we need to have correct. | |
1599 | + */ | |
1600 | + if (kparams->namelen > DLM_RESNAME_MAXLEN) | |
1601 | + return -EINVAL; | |
1602 | + | |
1603 | + if (!kparams->astaddr) | |
1604 | + return -EINVAL; | |
1605 | + | |
1606 | + if (!kparams->lksb) | |
1607 | + return -EINVAL; | |
1608 | + | |
1609 | + /* Get the lock name */ | |
1610 | + if (copy_from_user(name, buffer + offsetof(struct dlm_lock_params, name), | |
1611 | + kparams->namelen)) { | |
1612 | + return -EFAULT; | |
1613 | + } | |
1614 | + | |
1615 | + /* For conversions, the lock will already have a lock_info | |
1616 | + block squirelled away in astparam */ | |
1617 | + if (kparams->flags & DLM_LKF_CONVERT) { | |
10d56c87 | 1618 | + struct dlm_lkb *lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid); |
4bf12011 | 1619 | + if (!lkb) { |
1620 | + return -EINVAL; | |
1621 | + } | |
1622 | + li = (struct lock_info *)lkb->lkb_astparam; | |
1623 | + | |
1624 | + /* Only override these if they are provided */ | |
1625 | + if (li->li_user_lksb) | |
1626 | + li->li_user_lksb = kparams->lksb; | |
1627 | + if (li->li_astparam) | |
1628 | + li->li_astparam = kparams->astparam; | |
1629 | + if (li->li_bastaddr) | |
1630 | + li->li_bastaddr = kparams->bastaddr; | |
1631 | + if (li->li_bastaddr) | |
1632 | + li->li_astaddr = kparams->astaddr; | |
1633 | + li->li_flags = 0; | |
1634 | + } | |
1635 | + else { | |
1636 | + li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); | |
1637 | + if (!li) | |
1638 | + return -ENOMEM; | |
1639 | + | |
1640 | + li->li_user_lksb = kparams->lksb; | |
1641 | + li->li_astparam = kparams->astparam; | |
1642 | + li->li_bastaddr = kparams->bastaddr; | |
1643 | + li->li_astaddr = kparams->astaddr; | |
1644 | + li->li_file = fi; | |
1645 | + li->li_flags = 0; | |
1646 | + li->li_cmd = kparams->cmd; | |
1647 | + li->li_queryinfo = NULL; | |
1648 | + | |
1649 | + /* semaphore to allow us to complete our work before | |
1650 | + the AST routine runs. In fact we only need (and use) this | |
1651 | + when the initial lock fails */ | |
1652 | + init_MUTEX_LOCKED(&li->li_firstlock); | |
1653 | + set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | |
1654 | + | |
1655 | + get_file_info(fi); | |
1656 | + } | |
1657 | + | |
1658 | + /* Copy the user's LKSB into kernel space, | |
1659 | + needed for conversions & value block operations */ | |
1660 | + if (kparams->lksb && copy_from_user(&li->li_lksb, kparams->lksb, | |
1661 | + sizeof(struct dlm_lksb))) | |
1662 | + return -EFAULT; | |
1663 | + | |
1664 | + /* Lock it ... */ | |
1665 | + status = dlm_lock(fi->fi_ls->ls_lockspace, kparams->mode, &li->li_lksb, | |
1666 | + kparams->flags, name, kparams->namelen, | |
1667 | + kparams->parent, | |
1668 | + ast_routine, | |
1669 | + li, | |
1670 | + li->li_bastaddr ? bast_routine : NULL, | |
1671 | + kparams->range.ra_end ? &kparams->range : NULL); | |
1672 | + | |
1673 | + /* If it succeeded (this far) with a new lock then keep track of | |
1674 | + it on the file's lkb list */ | |
1675 | + if (!status && !(kparams->flags & DLM_LKF_CONVERT)) { | |
10d56c87 | 1676 | + struct dlm_lkb *lkb; |
4bf12011 | 1677 | + lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, li->li_lksb.sb_lkid); |
1678 | + | |
1679 | + if (lkb) { | |
1680 | + spin_lock(&fi->fi_lkb_lock); | |
1681 | + list_add(&lkb->lkb_ownerqueue, | |
1682 | + &fi->fi_lkb_list); | |
1683 | + spin_unlock(&fi->fi_lkb_lock); | |
1684 | + } | |
1685 | + else { | |
1686 | + log_print("failed to get lkb for new lock"); | |
1687 | + } | |
1688 | + up(&li->li_firstlock); | |
1689 | + } | |
1690 | + | |
1691 | + return status; | |
1692 | +} | |
1693 | + | |
1694 | +static int do_user_unlock(struct file_info *fi, struct dlm_lock_params *kparams) | |
1695 | +{ | |
1696 | + struct lock_info *li; | |
10d56c87 | 1697 | + struct dlm_lkb *lkb; |
4bf12011 | 1698 | + int status; |
1699 | + | |
1700 | + lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid); | |
1701 | + if (!lkb) { | |
1702 | + return -EINVAL; | |
1703 | + } | |
1704 | + | |
1705 | + li = (struct lock_info *)lkb->lkb_astparam; | |
1706 | + | |
1707 | + li->li_user_lksb = kparams->lksb; | |
1708 | + li->li_astparam = kparams->astparam; | |
1709 | + li->li_cmd = kparams->cmd; | |
1710 | + | |
1711 | + /* Have to do it here cos the lkb may not exist after | |
1712 | + * dlm_unlock() */ | |
1713 | + spin_lock(&fi->fi_lkb_lock); | |
1714 | + list_del(&lkb->lkb_ownerqueue); | |
1715 | + spin_unlock(&fi->fi_lkb_lock); | |
1716 | + | |
1717 | + /* Use existing lksb & astparams */ | |
1718 | + status = dlm_unlock(fi->fi_ls->ls_lockspace, | |
1719 | + kparams->lkid, | |
1720 | + kparams->flags, NULL, NULL); | |
1721 | + | |
1722 | + return status; | |
1723 | +} | |
1724 | + | |
1725 | +/* Write call, submit a locking request */ | |
1726 | +static ssize_t dlm_write(struct file *file, const char __user *buffer, | |
1727 | + size_t count, loff_t *ppos) | |
1728 | +{ | |
1729 | + struct file_info *fi = file->private_data; | |
1730 | + struct dlm_lock_params kparams; | |
1731 | + sigset_t tmpsig; | |
1732 | + sigset_t allsigs; | |
1733 | + int status; | |
1734 | + | |
1735 | + if (count < sizeof(kparams)) | |
1736 | + return -EINVAL; | |
1737 | + | |
1738 | + /* Has the lockspace been deleted */ | |
1739 | + if (test_bit(1, &fi->fi_ls->ls_flags)) | |
1740 | + return -ENOENT; | |
1741 | + | |
1742 | + /* Get the command info */ | |
1743 | + if (copy_from_user(&kparams, buffer, sizeof(kparams))) | |
1744 | + return -EFAULT; | |
1745 | + | |
1746 | + if (check_version(&kparams)) | |
1747 | + return -EINVAL; | |
1748 | + | |
1749 | + /* Block signals while we are doing this */ | |
1750 | + sigfillset(&allsigs); | |
1751 | + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | |
1752 | + | |
1753 | + switch (kparams.cmd) | |
1754 | + { | |
1755 | + case DLM_USER_LOCK: | |
1756 | + status = do_user_lock(fi, &kparams, buffer); | |
1757 | + break; | |
1758 | + | |
1759 | + case DLM_USER_UNLOCK: | |
1760 | + status = do_user_unlock(fi, &kparams); | |
1761 | + break; | |
1762 | + | |
1763 | + case DLM_USER_QUERY: | |
1764 | + status = do_user_query(fi, &kparams); | |
1765 | + break; | |
1766 | + | |
1767 | + default: | |
1768 | + status = -EINVAL; | |
1769 | + break; | |
1770 | + } | |
1771 | + /* Restore signals */ | |
1772 | + sigprocmask(SIG_SETMASK, &tmpsig, NULL); | |
1773 | + recalc_sigpending(); | |
1774 | + | |
1775 | + if (status == 0) | |
1776 | + return count; | |
1777 | + else | |
1778 | + return status; | |
1779 | +} | |
1780 | + | |
1781 | +void dlm_device_free_devices() | |
1782 | +{ | |
1783 | + struct user_ls *tmp; | |
1784 | + struct user_ls *lsinfo; | |
1785 | + | |
1786 | + list_for_each_entry_safe(lsinfo, tmp, &user_ls_list, ls_list) { | |
1787 | + misc_deregister(&lsinfo->ls_miscinfo); | |
1788 | + | |
1789 | + /* Tidy up, but don't delete the lsinfo struct until | |
1790 | + all the users have closed their devices */ | |
1791 | + list_del(&lsinfo->ls_list); | |
1792 | + kfree(lsinfo->ls_miscinfo.name); | |
1793 | + set_bit(1, &lsinfo->ls_flags); /* LS has been deleted */ | |
1794 | + } | |
1795 | +} | |
1796 | + | |
1797 | +static struct file_operations _dlm_fops = { | |
1798 | + .open = dlm_open, | |
1799 | + .release = dlm_close, | |
1800 | + .ioctl = dlm_ioctl, | |
1801 | + .read = dlm_read, | |
1802 | + .write = dlm_write, | |
1803 | + .poll = dlm_poll, | |
1804 | + .owner = THIS_MODULE, | |
1805 | +}; | |
1806 | + | |
1807 | +static struct file_operations _dlm_ctl_fops = { | |
1808 | + .open = dlm_ctl_open, | |
1809 | + .release = dlm_ctl_close, | |
1810 | + .ioctl = dlm_ctl_ioctl, | |
1811 | + .owner = THIS_MODULE, | |
1812 | +}; | |
1813 | + | |
1814 | +/* | |
1815 | + * Create control device | |
1816 | + */ | |
1817 | +int dlm_device_init(void) | |
1818 | +{ | |
1819 | + int r; | |
1820 | + | |
1821 | + INIT_LIST_HEAD(&user_ls_list); | |
1822 | + | |
1823 | + ctl_device.name = "dlm-control"; | |
1824 | + ctl_device.fops = &_dlm_ctl_fops; | |
1825 | + ctl_device.minor = MISC_DYNAMIC_MINOR; | |
1826 | + | |
1827 | + r = misc_register(&ctl_device); | |
1828 | + if (r) { | |
1829 | + log_print("misc_register failed for DLM control device"); | |
1830 | + return r; | |
1831 | + } | |
1832 | + | |
1833 | + return 0; | |
1834 | +} | |
1835 | + | |
1836 | +void dlm_device_exit(void) | |
1837 | +{ | |
1838 | + misc_deregister(&ctl_device); | |
1839 | +} | |
1840 | + | |
1841 | +/* | |
1842 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
1843 | + * Emacs will notice this stuff at the end of the file and automatically | |
1844 | + * adjust the settings for this buffer only. This must remain at the end | |
1845 | + * of the file. | |
1846 | + * --------------------------------------------------------------------------- | |
1847 | + * Local variables: | |
1848 | + * c-file-style: "linux" | |
1849 | + * End: | |
1850 | + */ | |
1851 | diff -urN linux-orig/cluster/dlm/device.h linux-patched/cluster/dlm/device.h | |
1852 | --- linux-orig/cluster/dlm/device.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 1853 | +++ linux-patched/cluster/dlm/device.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 1854 | @@ -0,0 +1,19 @@ |
1855 | +/****************************************************************************** | |
1856 | +******************************************************************************* | |
1857 | +** | |
1858 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
1859 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
1860 | +** | |
1861 | +** This copyrighted material is made available to anyone wishing to use, | |
1862 | +** modify, copy, or redistribute it subject to the terms and conditions | |
1863 | +** of the GNU General Public License v.2. | |
1864 | +** | |
1865 | +******************************************************************************* | |
1866 | +******************************************************************************/ | |
1867 | + | |
1868 | +#ifndef __DEVICE_DOT_H__ | |
1869 | +#define __DEVICE_DOT_H__ | |
1870 | + | |
1871 | +extern void dlm_device_free_devices(void); | |
1872 | + | |
1873 | +#endif /* __DEVICE_DOT_H__ */ | |
1874 | diff -urN linux-orig/cluster/dlm/dir.c linux-patched/cluster/dlm/dir.c | |
1875 | --- linux-orig/cluster/dlm/dir.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
1876 | +++ linux-patched/cluster/dlm/dir.c 2004-07-13 18:57:22.000000000 +0800 |
1877 | @@ -0,0 +1,427 @@ | |
4bf12011 | 1878 | +/****************************************************************************** |
1879 | +******************************************************************************* | |
1880 | +** | |
1881 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
1882 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
1883 | +** | |
1884 | +** This copyrighted material is made available to anyone wishing to use, | |
1885 | +** modify, copy, or redistribute it subject to the terms and conditions | |
1886 | +** of the GNU General Public License v.2. | |
1887 | +** | |
1888 | +******************************************************************************* | |
1889 | +******************************************************************************/ | |
1890 | + | |
1891 | +#include "dlm_internal.h" | |
1892 | +#include "nodes.h" | |
1893 | +#include "lockspace.h" | |
1894 | +#include "lowcomms.h" | |
1895 | +#include "reccomms.h" | |
1896 | +#include "rsb.h" | |
1897 | +#include "config.h" | |
1898 | +#include "memory.h" | |
1899 | +#include "recover.h" | |
1900 | +#include "util.h" | |
1901 | + | |
10d56c87 AM |
1902 | +struct resmov { |
1903 | + uint32_t rm_nodeid; | |
1904 | + uint16_t rm_length; | |
1905 | + uint16_t rm_pad; | |
1906 | +}; | |
1907 | + | |
1908 | + | |
4bf12011 | 1909 | +/* |
1910 | + * We use the upper 16 bits of the hash value to select the directory node. | |
1911 | + * Low bits are used for distribution of rsb's among hash buckets on each node. | |
1912 | + * | |
1913 | + * From the hash value, we are interested in arriving at a final value between | |
1914 | + * zero and the number of nodes minus one (num_nodes - 1). | |
1915 | + * | |
1916 | + * To accomplish this scaling, we take the nearest power of two larger than | |
1917 | + * num_nodes and subtract one to create a bit mask. The mask is applied to the | |
1918 | + * hash, reducing the range to nearer the final range. | |
1919 | + * | |
1920 | + * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | |
1921 | + * num_nodes to the previously masked hash value. | |
1922 | + * | |
1923 | + * This value in the desired range is used as an offset into the sorted list of | |
1924 | + * nodeid's to give the particular nodeid of the directory node. | |
1925 | + */ | |
1926 | + | |
10d56c87 | 1927 | +uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length) |
4bf12011 | 1928 | +{ |
1929 | + struct list_head *tmp; | |
10d56c87 | 1930 | + struct dlm_csb *csb = NULL; |
4bf12011 | 1931 | + uint32_t hash, node, n = 0, nodeid; |
1932 | + | |
1933 | + if (ls->ls_num_nodes == 1) { | |
1934 | + nodeid = our_nodeid(); | |
1935 | + goto out; | |
1936 | + } | |
1937 | + | |
10d56c87 | 1938 | + hash = dlm_hash(name, length); |
4bf12011 | 1939 | + node = (hash >> 16) & ls->ls_nodes_mask; |
1940 | + node %= ls->ls_num_nodes; | |
1941 | + | |
1942 | + list_for_each(tmp, &ls->ls_nodes) { | |
1943 | + if (n++ != node) | |
1944 | + continue; | |
10d56c87 | 1945 | + csb = list_entry(tmp, struct dlm_csb, list); |
4bf12011 | 1946 | + break; |
1947 | + } | |
1948 | + | |
10d56c87 | 1949 | + DLM_ASSERT(csb, printk("num_nodes=%u n=%u node=%u mask=%x\n", |
4bf12011 | 1950 | + ls->ls_num_nodes, n, node, ls->ls_nodes_mask);); |
10d56c87 | 1951 | + nodeid = csb->node->nodeid; |
4bf12011 | 1952 | + |
1953 | + out: | |
1954 | + return nodeid; | |
1955 | +} | |
1956 | + | |
10d56c87 | 1957 | +uint32_t get_directory_nodeid(struct dlm_rsb *rsb) |
4bf12011 | 1958 | +{ |
1959 | + return name_to_directory_nodeid(rsb->res_ls, rsb->res_name, | |
1960 | + rsb->res_length); | |
1961 | +} | |
1962 | + | |
10d56c87 | 1963 | +static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) |
4bf12011 | 1964 | +{ |
1965 | + uint32_t val; | |
1966 | + | |
10d56c87 AM |
1967 | + val = dlm_hash(name, len); |
1968 | + val &= (ls->ls_dirtbl_size - 1); | |
4bf12011 | 1969 | + |
1970 | + return val; | |
1971 | +} | |
1972 | + | |
10d56c87 | 1973 | +static void add_resdata_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) |
4bf12011 | 1974 | +{ |
10d56c87 | 1975 | + uint32_t bucket; |
4bf12011 | 1976 | + |
10d56c87 AM |
1977 | + bucket = dir_hash(ls, de->name, de->length); |
1978 | + list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | |
4bf12011 | 1979 | +} |
1980 | + | |
10d56c87 AM |
1981 | +static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, |
1982 | + int namelen, uint32_t bucket) | |
4bf12011 | 1983 | +{ |
10d56c87 | 1984 | + struct dlm_direntry *de; |
4bf12011 | 1985 | + |
10d56c87 AM |
1986 | + list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { |
1987 | + if (de->length == namelen && !memcmp(name, de->name, namelen)) | |
4bf12011 | 1988 | + goto out; |
1989 | + } | |
10d56c87 | 1990 | + de = NULL; |
4bf12011 | 1991 | + out: |
10d56c87 | 1992 | + return de; |
4bf12011 | 1993 | +} |
1994 | + | |
10d56c87 | 1995 | +void remove_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen) |
4bf12011 | 1996 | +{ |
10d56c87 | 1997 | + struct dlm_direntry *de; |
4bf12011 | 1998 | + uint32_t bucket; |
1999 | + | |
10d56c87 | 2000 | + bucket = dir_hash(ls, name, namelen); |
4bf12011 | 2001 | + |
10d56c87 | 2002 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
4bf12011 | 2003 | + |
10d56c87 | 2004 | + de = search_bucket(ls, name, namelen, bucket); |
4bf12011 | 2005 | + |
10d56c87 AM |
2006 | + if (!de) { |
2007 | + log_debug(ls, "remove from %u none", nodeid); | |
4bf12011 | 2008 | + goto out; |
2009 | + } | |
2010 | + | |
10d56c87 AM |
2011 | + if (de->master_nodeid != nodeid) { |
2012 | + log_debug(ls, "remove from %u ID %u", | |
2013 | + nodeid, de->master_nodeid); | |
4bf12011 | 2014 | + goto out; |
2015 | + } | |
2016 | + | |
10d56c87 AM |
2017 | + list_del(&de->list); |
2018 | + free_resdata(de); | |
2019 | + out: | |
2020 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
4bf12011 | 2021 | +} |
2022 | + | |
10d56c87 | 2023 | +void dlm_dir_clear(struct dlm_ls *ls) |
4bf12011 | 2024 | +{ |
2025 | + struct list_head *head; | |
10d56c87 | 2026 | + struct dlm_direntry *de; |
4bf12011 | 2027 | + int i; |
2028 | + | |
10d56c87 AM |
2029 | + for (i = 0; i < ls->ls_dirtbl_size; i++) { |
2030 | + head = &ls->ls_dirtbl[i].list; | |
4bf12011 | 2031 | + while (!list_empty(head)) { |
10d56c87 AM |
2032 | + de = list_entry(head->next, struct dlm_direntry, list); |
2033 | + list_del(&de->list); | |
2034 | + free_resdata(de); | |
4bf12011 | 2035 | + } |
2036 | + } | |
2037 | +} | |
2038 | + | |
10d56c87 | 2039 | +static void resmov_in(struct resmov *rm, char *buf) |
4bf12011 | 2040 | +{ |
10d56c87 | 2041 | + struct resmov tmp; |
4bf12011 | 2042 | + |
10d56c87 | 2043 | + memcpy(&tmp, buf, sizeof(struct resmov)); |
4bf12011 | 2044 | + |
2045 | + rm->rm_nodeid = be32_to_cpu(tmp.rm_nodeid); | |
2046 | + rm->rm_length = be16_to_cpu(tmp.rm_length); | |
2047 | +} | |
2048 | + | |
10d56c87 | 2049 | +int dlm_dir_rebuild_local(struct dlm_ls *ls) |
4bf12011 | 2050 | +{ |
10d56c87 AM |
2051 | + struct dlm_csb *csb; |
2052 | + struct dlm_direntry *de; | |
2053 | + struct dlm_rcom *rc; | |
2054 | + struct resmov mov, last_mov; | |
4bf12011 | 2055 | + char *b, *last_name; |
2056 | + int error = -ENOMEM, count = 0; | |
2057 | + | |
2058 | + log_all(ls, "rebuild resource directory"); | |
2059 | + | |
10d56c87 | 2060 | + dlm_dir_clear(ls); |
4bf12011 | 2061 | + |
2062 | + rc = allocate_rcom_buffer(ls); | |
2063 | + if (!rc) | |
2064 | + goto out; | |
2065 | + | |
2066 | + last_name = (char *) kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | |
2067 | + if (!last_name) | |
2068 | + goto free_rc; | |
2069 | + | |
10d56c87 | 2070 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
4bf12011 | 2071 | + last_mov.rm_length = 0; |
2072 | + for (;;) { | |
10d56c87 | 2073 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 2074 | + if (error) |
2075 | + goto free_last; | |
2076 | + | |
2077 | + memcpy(rc->rc_buf, last_name, last_mov.rm_length); | |
2078 | + rc->rc_datalen = last_mov.rm_length; | |
2079 | + | |
10d56c87 | 2080 | + error = rcom_send_message(ls, csb->node->nodeid, |
4bf12011 | 2081 | + RECCOMM_RECOVERNAMES, rc, 1); |
2082 | + if (error) | |
2083 | + goto free_last; | |
2084 | + | |
2085 | + schedule(); | |
2086 | + | |
2087 | + /* | |
2088 | + * pick each res out of buffer | |
2089 | + */ | |
2090 | + | |
2091 | + b = rc->rc_buf; | |
2092 | + | |
2093 | + for (;;) { | |
10d56c87 AM |
2094 | + resmov_in(&mov, b); |
2095 | + b += sizeof(struct resmov); | |
4bf12011 | 2096 | + |
2097 | + /* Length of 0 with a non-zero nodeid marks the | |
2098 | + * end of the list */ | |
2099 | + if (!mov.rm_length && mov.rm_nodeid) | |
2100 | + goto done; | |
2101 | + | |
2102 | + /* This is just the end of the block */ | |
2103 | + if (!mov.rm_length) | |
2104 | + break; | |
2105 | + | |
2106 | + error = -ENOMEM; | |
10d56c87 AM |
2107 | + de = allocate_resdata(ls, mov.rm_length); |
2108 | + if (!de) | |
4bf12011 | 2109 | + goto free_last; |
2110 | + | |
10d56c87 AM |
2111 | + de->master_nodeid = mov.rm_nodeid; |
2112 | + de->length = mov.rm_length; | |
4bf12011 | 2113 | + |
10d56c87 | 2114 | + memcpy(de->name, b, mov.rm_length); |
4bf12011 | 2115 | + b += mov.rm_length; |
2116 | + | |
10d56c87 | 2117 | + add_resdata_to_hash(ls, de); |
4bf12011 | 2118 | + count++; |
2119 | + | |
2120 | + last_mov = mov; | |
2121 | + memset(last_name, 0, DLM_RESNAME_MAXLEN); | |
10d56c87 | 2122 | + memcpy(last_name, de->name, de->length); |
4bf12011 | 2123 | + } |
2124 | + } | |
2125 | + done: | |
2126 | + ; | |
2127 | + } | |
2128 | + | |
2129 | + set_bit(LSFL_RESDIR_VALID, &ls->ls_flags); | |
2130 | + error = 0; | |
2131 | + | |
2132 | + log_all(ls, "rebuilt %d resources", count); | |
2133 | + | |
2134 | + free_last: | |
2135 | + kfree(last_name); | |
2136 | + | |
2137 | + free_rc: | |
2138 | + free_rcom_buffer(rc); | |
2139 | + | |
2140 | + out: | |
2141 | + return error; | |
2142 | +} | |
2143 | + | |
2144 | +/* | |
10d56c87 | 2145 | + * The reply end of dlm_dir_rebuild_local/RECOVERNAMES. Collect and send as |
4bf12011 | 2146 | + * many resource names as can fit in the buffer. |
2147 | + */ | |
2148 | + | |
10d56c87 AM |
2149 | +int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen, |
2150 | + char *outbuf, int outlen, uint32_t nodeid) | |
4bf12011 | 2151 | +{ |
2152 | + struct list_head *list; | |
10d56c87 | 2153 | + struct dlm_rsb *start_rsb = NULL, *rsb; |
4bf12011 | 2154 | + int offset = 0, start_namelen, error; |
2155 | + char *start_name; | |
10d56c87 | 2156 | + struct resmov tmp; |
4bf12011 | 2157 | + uint32_t dir_nodeid; |
2158 | + | |
2159 | + /* | |
2160 | + * Find the rsb where we left off (or start again) | |
2161 | + */ | |
2162 | + | |
2163 | + start_namelen = inlen; | |
2164 | + start_name = inbuf; | |
2165 | + | |
2166 | + if (start_namelen > 1) { | |
2167 | + error = find_or_create_rsb(ls, NULL, start_name, | |
2168 | + start_namelen, 0, &start_rsb); | |
10d56c87 | 2169 | + DLM_ASSERT(!error && start_rsb, printk("error %d\n", error);); |
4bf12011 | 2170 | + release_rsb(start_rsb); |
2171 | + } | |
2172 | + | |
2173 | + /* | |
2174 | + * Send rsb names for rsb's we're master of and whose directory node | |
2175 | + * matches the requesting node. | |
2176 | + */ | |
2177 | + | |
2178 | + down_read(&ls->ls_rec_rsblist); | |
2179 | + if (start_rsb) | |
2180 | + list = start_rsb->res_rootlist.next; | |
2181 | + else | |
2182 | + list = ls->ls_rootres.next; | |
2183 | + | |
2184 | + for (offset = 0; list != &ls->ls_rootres; list = list->next) { | |
10d56c87 | 2185 | + rsb = list_entry(list, struct dlm_rsb, res_rootlist); |
4bf12011 | 2186 | + if (rsb->res_nodeid) |
2187 | + continue; | |
2188 | + | |
2189 | + dir_nodeid = get_directory_nodeid(rsb); | |
2190 | + if (dir_nodeid != nodeid) | |
2191 | + continue; | |
2192 | + | |
10d56c87 | 2193 | + if (offset + sizeof(struct resmov)*2 + rsb->res_length > outlen) { |
4bf12011 | 2194 | + /* Write end-of-block record */ |
10d56c87 AM |
2195 | + memset(&tmp, 0, sizeof(struct resmov)); |
2196 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); | |
2197 | + offset += sizeof(struct resmov); | |
4bf12011 | 2198 | + goto out; |
2199 | + } | |
2200 | + | |
10d56c87 | 2201 | + memset(&tmp, 0, sizeof(struct resmov)); |
4bf12011 | 2202 | + tmp.rm_nodeid = cpu_to_be32(our_nodeid()); |
2203 | + tmp.rm_length = cpu_to_be16(rsb->res_length); | |
2204 | + | |
10d56c87 AM |
2205 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); |
2206 | + offset += sizeof(struct resmov); | |
4bf12011 | 2207 | + |
2208 | + memcpy(outbuf + offset, rsb->res_name, rsb->res_length); | |
2209 | + offset += rsb->res_length; | |
2210 | + } | |
2211 | + | |
2212 | + /* | |
2213 | + * If we've reached the end of the list (and there's room) write a | |
2214 | + * terminating record. | |
2215 | + */ | |
2216 | + | |
2217 | + if ((list == &ls->ls_rootres) && | |
10d56c87 | 2218 | + (offset + sizeof(struct resmov) <= outlen)) { |
4bf12011 | 2219 | + |
10d56c87 | 2220 | + memset(&tmp, 0, sizeof(struct resmov)); |
4bf12011 | 2221 | + /* This only needs to be non-zero */ |
2222 | + tmp.rm_nodeid = cpu_to_be32(1); | |
2223 | + /* and this must be zero */ | |
2224 | + tmp.rm_length = 0; | |
10d56c87 AM |
2225 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); |
2226 | + offset += sizeof(struct resmov); | |
4bf12011 | 2227 | + } |
2228 | + | |
2229 | + out: | |
2230 | + up_read(&ls->ls_rec_rsblist); | |
2231 | + return offset; | |
2232 | +} | |
2233 | + | |
10d56c87 AM |
2234 | +static int get_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name, |
2235 | + int namelen, uint32_t *r_nodeid, int recovery) | |
4bf12011 | 2236 | +{ |
10d56c87 | 2237 | + struct dlm_direntry *de, *tmp; |
4bf12011 | 2238 | + uint32_t bucket; |
2239 | + | |
10d56c87 | 2240 | + bucket = dir_hash(ls, name, namelen); |
4bf12011 | 2241 | + |
10d56c87 AM |
2242 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
2243 | + de = search_bucket(ls, name, namelen, bucket); | |
2244 | + if (de) { | |
2245 | + *r_nodeid = de->master_nodeid; | |
2246 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
4bf12011 | 2247 | + goto out; |
10d56c87 | 2248 | + } |
4bf12011 | 2249 | + |
10d56c87 | 2250 | + write_unlock(&ls->ls_dirtbl[bucket].lock); |
4bf12011 | 2251 | + |
10d56c87 AM |
2252 | + de = allocate_resdata(ls, namelen); |
2253 | + if (!de) | |
2254 | + return -ENOMEM; | |
4bf12011 | 2255 | + |
10d56c87 AM |
2256 | + de->master_nodeid = nodeid; |
2257 | + de->length = namelen; | |
2258 | + memcpy(de->name, name, namelen); | |
4bf12011 | 2259 | + |
10d56c87 AM |
2260 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
2261 | + tmp = search_bucket(ls, name, namelen, bucket); | |
4bf12011 | 2262 | + if (tmp) { |
10d56c87 AM |
2263 | + free_resdata(de); |
2264 | + de = tmp; | |
2265 | + } else { | |
2266 | + list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | |
4bf12011 | 2267 | + } |
10d56c87 AM |
2268 | + *r_nodeid = de->master_nodeid; |
2269 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
4bf12011 | 2270 | + |
10d56c87 AM |
2271 | + out: |
2272 | + return 0; | |
2273 | +} | |
4bf12011 | 2274 | + |
10d56c87 AM |
2275 | +int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen, |
2276 | + uint32_t *r_nodeid) | |
2277 | +{ | |
2278 | + return get_resdata(ls, nodeid, name, namelen, r_nodeid, 0); | |
2279 | +} | |
4bf12011 | 2280 | + |
10d56c87 AM |
2281 | +int dlm_dir_lookup_recovery(struct dlm_ls *ls, uint32_t nodeid, char *name, |
2282 | + int namelen, uint32_t *r_nodeid) | |
2283 | +{ | |
2284 | + return get_resdata(ls, nodeid, name, namelen, r_nodeid, 1); | |
4bf12011 | 2285 | +} |
2286 | + | |
2287 | +/* | |
2288 | + * The node with lowest id queries all nodes to determine when all are done. | |
2289 | + * All other nodes query the low nodeid for this. | |
2290 | + */ | |
2291 | + | |
10d56c87 | 2292 | +int dlm_dir_rebuild_wait(struct dlm_ls *ls) |
4bf12011 | 2293 | +{ |
2294 | + int error; | |
2295 | + | |
2296 | + if (ls->ls_low_nodeid == our_nodeid()) { | |
10d56c87 | 2297 | + error = dlm_wait_status_all(ls, RESDIR_VALID); |
4bf12011 | 2298 | + if (!error) |
2299 | + set_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags); | |
2300 | + } else | |
10d56c87 | 2301 | + error = dlm_wait_status_low(ls, RESDIR_ALL_VALID); |
4bf12011 | 2302 | + |
2303 | + return error; | |
2304 | +} | |
2305 | diff -urN linux-orig/cluster/dlm/dir.h linux-patched/cluster/dlm/dir.h | |
2306 | --- linux-orig/cluster/dlm/dir.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
2307 | +++ linux-patched/cluster/dlm/dir.h 2004-07-13 18:57:22.000000000 +0800 |
2308 | @@ -0,0 +1,31 @@ | |
4bf12011 | 2309 | +/****************************************************************************** |
2310 | +******************************************************************************* | |
2311 | +** | |
2312 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2313 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2314 | +** | |
2315 | +** This copyrighted material is made available to anyone wishing to use, | |
2316 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2317 | +** of the GNU General Public License v.2. | |
2318 | +** | |
2319 | +******************************************************************************* | |
2320 | +******************************************************************************/ | |
2321 | + | |
2322 | +#ifndef __DIR_DOT_H__ | |
2323 | +#define __DIR_DOT_H__ | |
2324 | + | |
10d56c87 AM |
2325 | +int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen, |
2326 | + uint32_t *r_nodeid); | |
2327 | +int dlm_dir_lookup_recovery(struct dlm_ls *ls, uint32_t nodeid, char *name, | |
2328 | + int namelen, uint32_t *r_nodeid); | |
2329 | +uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length); | |
2330 | +uint32_t get_directory_nodeid(struct dlm_rsb *rsb); | |
2331 | +void remove_resdata(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen); | |
2332 | +int dlm_dir_rebuild_local(struct dlm_ls *ls); | |
2333 | +int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen, | |
2334 | + char *outbuf, int outlen, uint32_t nodeid); | |
2335 | +int dlm_dir_rebuild_wait(struct dlm_ls * ls); | |
2336 | +void dlm_dir_clear(struct dlm_ls *ls); | |
2337 | +void dlm_dir_dump(struct dlm_ls *ls); | |
4bf12011 | 2338 | + |
2339 | +#endif /* __DIR_DOT_H__ */ | |
2340 | diff -urN linux-orig/cluster/dlm/dlm_internal.h linux-patched/cluster/dlm/dlm_internal.h | |
2341 | --- linux-orig/cluster/dlm/dlm_internal.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
2342 | +++ linux-patched/cluster/dlm/dlm_internal.h 2004-07-13 18:57:22.000000000 +0800 |
2343 | @@ -0,0 +1,594 @@ | |
4bf12011 | 2344 | +/****************************************************************************** |
2345 | +******************************************************************************* | |
2346 | +** | |
2347 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2348 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2349 | +** | |
2350 | +** This copyrighted material is made available to anyone wishing to use, | |
2351 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2352 | +** of the GNU General Public License v.2. | |
2353 | +** | |
2354 | +******************************************************************************* | |
2355 | +******************************************************************************/ | |
2356 | + | |
2357 | +#ifndef __DLM_INTERNAL_DOT_H__ | |
2358 | +#define __DLM_INTERNAL_DOT_H__ | |
2359 | + | |
2360 | +/* | |
2361 | + * This is the main header file to be included in each DLM source file. | |
2362 | + */ | |
2363 | + | |
2364 | +#define DLM_RELEASE_NAME "<CVS>" | |
2365 | + | |
2366 | +#include <linux/slab.h> | |
2367 | +#include <linux/sched.h> | |
2368 | +#include <asm/semaphore.h> | |
2369 | +#include <linux/types.h> | |
2370 | +#include <linux/spinlock.h> | |
2371 | +#include <linux/vmalloc.h> | |
2372 | +#include <asm/uaccess.h> | |
2373 | +#include <linux/list.h> | |
2374 | +#include <linux/errno.h> | |
2375 | +#include <linux/random.h> | |
2376 | + | |
2377 | +#include <cluster/dlm.h> | |
2378 | +#include <cluster/dlm_device.h> | |
2379 | +#include <cluster/service.h> | |
2380 | + | |
2381 | +#ifndef TRUE | |
2382 | +#define TRUE (1) | |
2383 | +#endif | |
2384 | + | |
2385 | +#ifndef FALSE | |
2386 | +#define FALSE (0) | |
2387 | +#endif | |
2388 | + | |
2389 | +#if (BITS_PER_LONG == 64) | |
2390 | +#define PRIu64 "lu" | |
2391 | +#define PRId64 "ld" | |
2392 | +#define PRIo64 "lo" | |
2393 | +#define PRIx64 "lx" | |
2394 | +#define PRIX64 "lX" | |
2395 | +#define SCNu64 "lu" | |
2396 | +#define SCNd64 "ld" | |
2397 | +#define SCNo64 "lo" | |
2398 | +#define SCNx64 "lx" | |
2399 | +#define SCNX64 "lX" | |
2400 | +#else | |
2401 | +#define PRIu64 "Lu" | |
2402 | +#define PRId64 "Ld" | |
2403 | +#define PRIo64 "Lo" | |
2404 | +#define PRIx64 "Lx" | |
2405 | +#define PRIX64 "LX" | |
2406 | +#define SCNu64 "Lu" | |
2407 | +#define SCNd64 "Ld" | |
2408 | +#define SCNo64 "Lo" | |
2409 | +#define SCNx64 "Lx" | |
2410 | +#define SCNX64 "LX" | |
2411 | +#endif | |
2412 | + | |
2413 | +#define wchan_cond_sleep_intr(chan, sleep_cond) \ | |
2414 | +do \ | |
2415 | +{ \ | |
2416 | + DECLARE_WAITQUEUE(__wait_chan, current); \ | |
2417 | + current->state = TASK_INTERRUPTIBLE; \ | |
2418 | + add_wait_queue(&chan, &__wait_chan); \ | |
2419 | + if ((sleep_cond)) \ | |
2420 | + schedule(); \ | |
2421 | + remove_wait_queue(&chan, &__wait_chan); \ | |
2422 | + current->state = TASK_RUNNING; \ | |
2423 | +} \ | |
2424 | +while (0) | |
2425 | + | |
2426 | +static inline int check_timeout(unsigned long stamp, unsigned int seconds) | |
2427 | +{ | |
2428 | + return time_after(jiffies, stamp + seconds * HZ); | |
2429 | +} | |
2430 | + | |
2431 | + | |
2432 | +#define log_print(fmt, args...) printk("dlm: "fmt"\n", ##args) | |
2433 | + | |
2434 | +#define log_all(ls, fmt, args...) \ | |
2435 | + do { \ | |
2436 | + printk("dlm: %s: " fmt "\n", (ls)->ls_name, ##args); \ | |
2437 | + dlm_debug_log(ls, fmt, ##args); \ | |
2438 | + } while (0) | |
2439 | + | |
2440 | +#define log_error log_all | |
2441 | + | |
2442 | + | |
2443 | +#define DLM_DEBUG | |
2444 | +#if defined(DLM_DEBUG) | |
2445 | +#define log_debug(ls, fmt, args...) dlm_debug_log(ls, fmt, ##args) | |
2446 | +#else | |
2447 | +#define log_debug(ls, fmt, args...) | |
2448 | +#endif | |
2449 | + | |
2450 | +#if defined(DLM_DEBUG) && defined(DLM_DEBUG_ALL) | |
2451 | +#undef log_debug | |
2452 | +#define log_debug log_all | |
2453 | +#endif | |
2454 | + | |
2455 | + | |
10d56c87 | 2456 | +#define DLM_ASSERT(x, do) \ |
4bf12011 | 2457 | +{ \ |
2458 | + if (!(x)) \ | |
2459 | + { \ | |
10d56c87 | 2460 | + dlm_locks_dump(); \ |
4bf12011 | 2461 | + dlm_debug_dump(); \ |
2462 | + printk("\nDLM: Assertion failed on line %d of file %s\n" \ | |
2463 | + "DLM: assertion: \"%s\"\n" \ | |
2464 | + "DLM: time = %lu\n", \ | |
2465 | + __LINE__, __FILE__, #x, jiffies); \ | |
2466 | + {do} \ | |
2467 | + printk("\n"); \ | |
2468 | + BUG(); \ | |
2469 | + panic("DLM: Record message above and reboot.\n"); \ | |
2470 | + } \ | |
2471 | +} | |
2472 | + | |
2473 | + | |
10d56c87 AM |
2474 | +struct dlm_ls; |
2475 | +struct dlm_lkb; | |
2476 | +struct dlm_rsb; | |
2477 | +struct dlm_csb; | |
2478 | +struct dlm_node; | |
2479 | +struct dlm_lkbtable; | |
2480 | +struct dlm_rsbtable; | |
2481 | +struct dlm_dirtable; | |
2482 | +struct dlm_direntry; | |
2483 | +struct dlm_recover; | |
2484 | +struct dlm_header; | |
2485 | +struct dlm_request; | |
2486 | +struct dlm_reply; | |
2487 | +struct dlm_rcom; | |
2488 | +struct dlm_query_request; | |
2489 | +struct dlm_query_reply; | |
4bf12011 | 2490 | + |
4bf12011 | 2491 | + |
10d56c87 AM |
2492 | +struct dlm_direntry { |
2493 | + struct list_head list; | |
2494 | + uint32_t master_nodeid; | |
2495 | + uint16_t length; | |
2496 | + char name[1]; | |
4bf12011 | 2497 | +}; |
2498 | + | |
10d56c87 AM |
2499 | +struct dlm_dirtable { |
2500 | + struct list_head list; | |
2501 | + rwlock_t lock; | |
2502 | +}; | |
4bf12011 | 2503 | + |
10d56c87 AM |
2504 | +struct dlm_rsbtable { |
2505 | + struct list_head list; | |
2506 | + rwlock_t lock; | |
2507 | +}; | |
2508 | + | |
2509 | +struct dlm_lkbtable { | |
2510 | + struct list_head list; | |
2511 | + rwlock_t lock; | |
2512 | + uint16_t counter; | |
4bf12011 | 2513 | +}; |
2514 | + | |
2515 | +/* | |
10d56c87 | 2516 | + * Cluster node (per node in cluster) |
4bf12011 | 2517 | + */ |
2518 | + | |
10d56c87 AM |
2519 | +struct dlm_node { |
2520 | + struct list_head list; | |
2521 | + uint32_t nodeid; | |
2522 | + int refcount; /* num csb's referencing */ | |
4bf12011 | 2523 | +}; |
2524 | + | |
2525 | +/* | |
10d56c87 | 2526 | + * Cluster System Block (per node in a ls) |
4bf12011 | 2527 | + */ |
2528 | + | |
10d56c87 AM |
2529 | +struct dlm_csb { |
2530 | + struct list_head list; /* per-lockspace node list */ | |
2531 | + struct dlm_node * node; /* global node structure */ | |
2532 | + int gone_event; /* event id when node removed */ | |
4bf12011 | 2533 | + |
10d56c87 | 2534 | + /* recovery stats for debugging */ |
4bf12011 | 2535 | + |
10d56c87 AM |
2536 | + uint32_t names_send_count; |
2537 | + uint32_t names_send_msgid; | |
2538 | + uint32_t names_recv_count; | |
2539 | + uint32_t names_recv_msgid; | |
2540 | + uint32_t locks_send_count; | |
2541 | + uint32_t locks_send_msgid; | |
2542 | + uint32_t locks_recv_count; | |
2543 | + uint32_t locks_recv_msgid; | |
4bf12011 | 2544 | +}; |
2545 | + | |
2546 | +/* | |
10d56c87 | 2547 | + * Used to save and manage recovery state for a lockspace. |
4bf12011 | 2548 | + */ |
2549 | + | |
10d56c87 AM |
2550 | +struct dlm_recover { |
2551 | + struct list_head list; | |
2552 | + uint32_t * nodeids; | |
2553 | + int node_count; | |
2554 | + int event_id; | |
4bf12011 | 2555 | +}; |
2556 | + | |
2557 | +/* | |
10d56c87 | 2558 | + * Elements in the range array |
4bf12011 | 2559 | + */ |
2560 | + | |
10d56c87 AM |
2561 | +#define GR_RANGE_START (0) |
2562 | +#define GR_RANGE_END (1) | |
2563 | +#define RQ_RANGE_START (2) | |
2564 | +#define RQ_RANGE_END (3) | |
4bf12011 | 2565 | + |
10d56c87 AM |
2566 | +/* |
2567 | + * Lockspace structure | |
2568 | + */ | |
2569 | + | |
2570 | +#define LSFL_WORK (0) | |
2571 | +#define LSFL_LS_RUN (1) | |
2572 | +#define LSFL_LS_STOP (2) | |
2573 | +#define LSFL_LS_START (3) | |
2574 | +#define LSFL_LS_FINISH (4) | |
2575 | +#define LSFL_RECCOMM_WAIT (5) | |
2576 | +#define LSFL_RECCOMM_READY (6) | |
2577 | +#define LSFL_NOTIMERS (7) | |
2578 | +#define LSFL_FINISH_RECOVERY (8) | |
2579 | +#define LSFL_RESDIR_VALID (9) | |
2580 | +#define LSFL_ALL_RESDIR_VALID (10) | |
2581 | +#define LSFL_NODES_VALID (11) | |
2582 | +#define LSFL_ALL_NODES_VALID (12) | |
2583 | +#define LSFL_REQUEST_WARN (13) | |
2584 | +#define LSFL_NOCONVGRANT (14) | |
2585 | + | |
2586 | +#define LSST_NONE (0) | |
2587 | +#define LSST_INIT (1) | |
2588 | +#define LSST_INIT_DONE (2) | |
2589 | +#define LSST_CLEAR (3) | |
2590 | +#define LSST_WAIT_START (4) | |
2591 | +#define LSST_RECONFIG_DONE (5) | |
2592 | + | |
2593 | +struct dlm_ls { | |
2594 | + struct list_head ls_list; /* list of lockspaces */ | |
2595 | + uint32_t ls_local_id; /* local unique lockspace ID */ | |
2596 | + uint32_t ls_global_id; /* global unique lockspace ID */ | |
2597 | + int ls_allocation; /* Memory allocation policy */ | |
2598 | + unsigned long ls_flags; /* LSFL_ */ | |
2599 | + | |
2600 | + struct dlm_rsbtable * ls_rsbtbl; | |
2601 | + uint32_t ls_rsbtbl_size; | |
2602 | + | |
2603 | + struct dlm_lkbtable * ls_lkbtbl; | |
2604 | + uint32_t ls_lkbtbl_size; | |
2605 | + | |
2606 | + struct dlm_dirtable * ls_dirtbl; | |
2607 | + uint32_t ls_dirtbl_size; | |
2608 | + | |
2609 | + struct list_head ls_nodes; /* current nodes in RC */ | |
2610 | + struct list_head ls_nodes_gone; /* dead node list, recovery */ | |
2611 | + uint32_t ls_num_nodes; /* number of nodes in RC */ | |
2612 | + uint32_t ls_nodes_mask; | |
2613 | + uint32_t ls_low_nodeid; | |
2614 | + | |
2615 | + struct rw_semaphore ls_unlock_sem; /* To prevent unlock on a | |
2616 | + parent lock racing with a | |
2617 | + new child lock */ | |
2618 | + | |
2619 | + struct list_head ls_deadlockq; /* List of locks in conversion | |
2620 | + ordered by duetime. for | |
2621 | + deadlock detection */ | |
2622 | + | |
2623 | + /* recovery related */ | |
2624 | + | |
2625 | + struct list_head ls_recover; /* dlm_recover structs */ | |
2626 | + spinlock_t ls_recover_lock; | |
2627 | + int ls_last_stop; | |
2628 | + int ls_last_start; | |
2629 | + int ls_last_finish; | |
2630 | + int ls_state; /* recovery states */ | |
2631 | + | |
2632 | + struct rw_semaphore ls_in_recovery; /* block local requests */ | |
2633 | + struct list_head ls_requestqueue;/* queue remote requests */ | |
2634 | + | |
2635 | + struct dlm_rcom * ls_rcom; /* recovery comms */ | |
2636 | + uint32_t ls_rcom_msgid; | |
2637 | + struct semaphore ls_rcom_lock; | |
2638 | + | |
2639 | + struct list_head ls_recover_list; | |
2640 | + spinlock_t ls_recover_list_lock; | |
2641 | + int ls_recover_list_count; | |
2642 | + wait_queue_head_t ls_wait_general; | |
2643 | + | |
2644 | + struct list_head ls_rootres; /* List of root resources */ | |
2645 | + | |
2646 | + struct rw_semaphore ls_rec_rsblist; /* To prevent incoming recovery | |
2647 | + operations happening while | |
2648 | + we are purging */ | |
2649 | + | |
2650 | + struct rw_semaphore ls_gap_rsblist; /* To protect rootres list | |
2651 | + in grant_after_purge() which | |
2652 | + runs outside recovery */ | |
2653 | + | |
2654 | + struct list_head ls_rebuild_rootrsb_list; /* Root of lock trees | |
2655 | + we are | |
2656 | + deserialising */ | |
2657 | + int ls_namelen; | |
2658 | + char ls_name[1]; | |
4bf12011 | 2659 | +}; |
2660 | + | |
2661 | +/* | |
2662 | + * Resource block | |
2663 | + */ | |
2664 | + | |
10d56c87 AM |
2665 | +#define RESFL_NEW_MASTER (0) |
2666 | +#define RESFL_RECOVER_LIST (1) | |
2667 | +#define RESFL_MASTER (2) | |
4bf12011 | 2668 | + |
10d56c87 AM |
2669 | +struct dlm_rsb { |
2670 | + struct list_head res_hashchain; | |
2671 | + uint32_t res_bucket; | |
4bf12011 | 2672 | + |
10d56c87 | 2673 | + struct dlm_ls * res_ls; /* The owning lockspace */ |
4bf12011 | 2674 | + |
10d56c87 | 2675 | + struct list_head res_rootlist; /* List of root rsb's */ |
4bf12011 | 2676 | + |
10d56c87 AM |
2677 | + struct list_head res_subreslist; /* List of all sub-resources |
2678 | + for this root rsb */ | |
4bf12011 | 2679 | + |
10d56c87 AM |
2680 | + uint8_t res_depth; /* Depth in resource tree */ |
2681 | + unsigned long res_flags; /* Flags, RESFL_ */ | |
4bf12011 | 2682 | + |
10d56c87 AM |
2683 | + struct list_head res_grantqueue; |
2684 | + struct list_head res_convertqueue; | |
2685 | + struct list_head res_waitqueue; | |
4bf12011 | 2686 | + |
10d56c87 | 2687 | + uint32_t res_nodeid; /* nodeid of master node */ |
4bf12011 | 2688 | + |
10d56c87 AM |
2689 | + struct dlm_rsb * res_root; /* root rsb if a subresource */ |
2690 | + struct dlm_rsb * res_parent; /* parent rsb (if any) */ | |
4bf12011 | 2691 | + |
10d56c87 AM |
2692 | + atomic_t res_ref; /* Number of lkb's */ |
2693 | + uint16_t res_remasterid; /* ID used during remaster */ | |
4bf12011 | 2694 | + |
10d56c87 AM |
2695 | + struct list_head res_recover_list; /* General list for use |
2696 | + during recovery */ | |
2697 | + int res_recover_msgid; | |
2698 | + int res_newlkid_expect; | |
4bf12011 | 2699 | + |
10d56c87 | 2700 | + struct rw_semaphore res_lock; |
4bf12011 | 2701 | + |
10d56c87 | 2702 | + char * res_lvbptr; /* Lock value block */ |
4bf12011 | 2703 | + |
10d56c87 AM |
2704 | + uint8_t res_length; |
2705 | + char res_name[1]; /* <res_length> bytes */ | |
4bf12011 | 2706 | +}; |
2707 | + | |
2708 | +/* | |
2709 | + * Lock block. To avoid confusion, where flags mirror the | |
2710 | + * public flags, they should have the same value. | |
2711 | + */ | |
2712 | + | |
10d56c87 AM |
2713 | +#define GDLM_LKSTS_NEW (0) |
2714 | +#define GDLM_LKSTS_WAITING (1) | |
2715 | +#define GDLM_LKSTS_GRANTED (2) | |
2716 | +#define GDLM_LKSTS_CONVERT (3) | |
4bf12011 | 2717 | + |
10d56c87 AM |
2718 | +#define GDLM_LKFLG_VALBLK (0x00000008) |
2719 | +#define GDLM_LKFLG_PERSISTENT (0x00000080) /* Don't unlock when process exits */ | |
2720 | +#define GDLM_LKFLG_NODLCKWT (0x00000100) /* Don't do deadlock detection */ | |
2721 | +#define GDLM_LKFLG_EXPEDITE (0x00000400) /* Move to head of convert queue */ | |
4bf12011 | 2722 | + |
2723 | +/* Internal flags */ | |
10d56c87 | 2724 | +#define GDLM_LKFLG_RANGE (0x00001000) /* Range field is present |
5cdbd17b | 2725 | + (remote protocol only) */ |
10d56c87 AM |
2726 | +#define GDLM_LKFLG_MSTCPY (0x00002000) |
2727 | +#define GDLM_LKFLG_DELETED (0x00004000) /* LKB is being deleted */ | |
2728 | +#define GDLM_LKFLG_LQCONVERT (0x00008000) | |
2729 | +#define GDLM_LKFLG_LQRESEND (0x00010000) /* LKB on lockqueue must be resent */ | |
2730 | +#define GDLM_LKFLG_DEMOTED (0x00020000) | |
2731 | +#define GDLM_LKFLG_RESENT (0x00040000) | |
2732 | +#define GDLM_LKFLG_NOREBUILD (0x00080000) | |
4bf12011 | 2733 | + |
5cdbd17b AM |
2734 | +#define AST_COMP (1) |
2735 | +#define AST_BAST (2) | |
2736 | +#define AST_DEL (4) | |
4bf12011 | 2737 | + |
10d56c87 AM |
2738 | +struct dlm_lkb { |
2739 | + uint32_t lkb_flags; | |
2740 | + uint16_t lkb_status; /* grant, wait, convert */ | |
2741 | + int8_t lkb_rqmode; /* requested lock mode */ | |
2742 | + int8_t lkb_grmode; /* granted lock mode */ | |
2743 | + uint32_t lkb_retstatus; /* status to return in lksb */ | |
2744 | + uint32_t lkb_id; /* our lock ID */ | |
2745 | + struct dlm_lksb * lkb_lksb; /* status block of caller */ | |
5cdbd17b AM |
2746 | + struct list_head lkb_idtbl_list; /* lockidtbl */ |
2747 | + struct list_head lkb_statequeue; /* rsb's g/c/w queue */ | |
10d56c87 | 2748 | + struct dlm_rsb * lkb_resource; |
5cdbd17b AM |
2749 | + struct list_head lkb_ownerqueue; /* list of locks owned by a |
2750 | + process */ | |
10d56c87 AM |
2751 | + struct dlm_lkb * lkb_parent; /* parent lock if any */ |
2752 | + atomic_t lkb_childcnt; /* number of children */ | |
5cdbd17b AM |
2753 | + |
2754 | + struct list_head lkb_lockqueue; /* queue of locks waiting | |
2755 | + for remote reply */ | |
2756 | + int lkb_lockqueue_state; /* reason on lockqueue */ | |
2757 | + int lkb_lockqueue_flags; /* as passed into | |
2758 | + lock/unlock */ | |
2759 | + unsigned long lkb_lockqueue_time; /* time lkb went on the | |
2760 | + lockqueue */ | |
10d56c87 | 2761 | + unsigned long lkb_duetime; /* for deadlock detection */ |
5cdbd17b AM |
2762 | + |
2763 | + uint32_t lkb_remid; /* id on remote partner */ | |
2764 | + uint32_t lkb_nodeid; /* id of remote partner */ | |
2765 | + | |
2766 | + void * lkb_astaddr; | |
2767 | + void * lkb_bastaddr; | |
2768 | + long lkb_astparam; | |
2769 | + struct list_head lkb_astqueue; /* locks with asts to deliver */ | |
2770 | + uint16_t lkb_astflags; /* COMP, BAST, DEL */ | |
2771 | + uint8_t lkb_bastmode; /* requested mode */ | |
2772 | + uint8_t lkb_highbast; /* highest mode bast sent for */ | |
4bf12011 | 2773 | + |
10d56c87 | 2774 | + struct dlm_request * lkb_request; |
4bf12011 | 2775 | + |
5cdbd17b | 2776 | + struct list_head lkb_deadlockq; /* ls_deadlockq list */ |
4bf12011 | 2777 | + |
5cdbd17b AM |
2778 | + char * lkb_lvbptr; /* points to lksb lvb on local |
2779 | + lock, allocated lvb on | |
2780 | + on remote lock */ | |
2781 | + uint64_t * lkb_range; /* Points to an array of 64 bit | |
2782 | + numbers that represent the | |
2783 | + requested and granted ranges | |
10d56c87 | 2784 | + of the lock. NULL implies |
5cdbd17b | 2785 | + 0-ffffffffffffffff */ |
4bf12011 | 2786 | +}; |
2787 | + | |
2788 | +/* | |
4bf12011 | 2789 | + * Header part of the mid-level comms system. All packets start with |
2790 | + * this header so we can identify them. The comms packet can | |
2791 | + * contain many of these structs but the are split into individual | |
2792 | + * work units before being passed to the lockqueue routines. | |
2793 | + * below this are the structs that this is a header for | |
2794 | + */ | |
2795 | + | |
10d56c87 AM |
2796 | +struct dlm_header { |
2797 | + uint8_t rh_cmd; /* What we are */ | |
2798 | + uint8_t rh_flags; /* maybe just a pad */ | |
2799 | + uint16_t rh_length; /* Length of struct (so we can | |
2800 | + send many in 1 message) */ | |
2801 | + uint32_t rh_lkid; /* Lock ID tag: ie the local | |
2802 | + (requesting) lock ID */ | |
2803 | + uint32_t rh_lockspace; /* Lockspace ID */ | |
4bf12011 | 2804 | +}; |
2805 | + | |
2806 | +/* | |
2807 | + * This is the struct used in a remote lock/unlock/convert request | |
2808 | + * The mid-level comms API should turn this into native byte order. | |
2809 | + * Most "normal" lock operations will use these two structs for | |
2810 | + * communications. Recovery operations use their own structs | |
2811 | + * but still with the gd_req_header on the front. | |
2812 | + */ | |
2813 | + | |
10d56c87 AM |
2814 | +struct dlm_request { |
2815 | + struct dlm_header rr_header; | |
2816 | + uint32_t rr_remlkid; /* Remote lock ID */ | |
2817 | + uint32_t rr_remparid; /* Parent's remote lock ID */ | |
2818 | + uint32_t rr_flags; /* Flags from lock/convert req*/ | |
2819 | + uint64_t rr_range_start; /* Yes, these are in the right | |
2820 | + place... */ | |
2821 | + uint64_t rr_range_end; | |
2822 | + uint32_t rr_status; /* Status to return if this is | |
2823 | + an AST request */ | |
2824 | + uint8_t rr_rqmode; /* Requested lock mode */ | |
2825 | + uint8_t rr_asts; /* Whether the LKB has ASTs */ | |
2826 | + char rr_lvb[DLM_LVB_LEN]; | |
2827 | + char rr_name[1]; /* As long as needs be. Only | |
2828 | + used for directory lookups. | |
2829 | + The length of this can be | |
2830 | + worked out from the packet | |
2831 | + length */ | |
4bf12011 | 2832 | +}; |
2833 | + | |
2834 | +/* | |
2835 | + * This is the struct returned by a remote lock/unlock/convert request | |
2836 | + * The mid-level comms API should turn this into native byte order. | |
2837 | + */ | |
2838 | + | |
10d56c87 AM |
2839 | +struct dlm_reply { |
2840 | + struct dlm_header rl_header; | |
2841 | + uint32_t rl_lockstate; /* Whether request was | |
2842 | + queued/granted/waiting */ | |
2843 | + uint32_t rl_nodeid; /* nodeid of lock master */ | |
2844 | + uint32_t rl_status; /* Status to return to caller */ | |
2845 | + uint32_t rl_lkid; /* Remote lkid */ | |
2846 | + char rl_lvb[DLM_LVB_LEN]; | |
4bf12011 | 2847 | +}; |
2848 | + | |
2849 | +/* | |
2850 | + * Recovery comms message | |
2851 | + */ | |
2852 | + | |
10d56c87 AM |
2853 | +struct dlm_rcom { |
2854 | + struct dlm_header rc_header; /* 32 byte aligned */ | |
2855 | + uint32_t rc_msgid; | |
2856 | + uint16_t rc_datalen; | |
2857 | + uint8_t rc_expanded; | |
2858 | + uint8_t rc_subcmd; /* secondary command */ | |
2859 | + char rc_buf[1]; /* first byte of data goes here | |
2860 | + and extends beyond here for | |
2861 | + another datalen - 1 bytes. | |
2862 | + rh_length is set to sizeof | |
2863 | + dlm_rcom + datalen - 1 */ | |
4bf12011 | 2864 | +}; |
2865 | + | |
2866 | + | |
2867 | +/* A remote query: GDLM_REMCMD_QUERY */ | |
4bf12011 | 2868 | + |
10d56c87 AM |
2869 | +struct dlm_query_request { |
2870 | + struct dlm_header rq_header; | |
2871 | + uint32_t rq_mstlkid; /* LockID on master node */ | |
2872 | + uint32_t rq_query; /* query from the user */ | |
2873 | + uint32_t rq_maxlocks; /* max number of locks we can | |
2874 | + cope with */ | |
4bf12011 | 2875 | +}; |
2876 | + | |
2877 | +/* First block of a reply query. cmd = GDLM_REMCMD_QUERY */ | |
2878 | +/* There may be subsequent blocks of | |
2879 | + lock info in GDLM_REMCMD_QUERYCONT messages which just have | |
2880 | + a normal header. The last of these will have rh_flags set to | |
2881 | + GDLM_REMFLAG_ENDQUERY | |
2882 | + */ | |
4bf12011 | 2883 | + |
10d56c87 AM |
2884 | +struct dlm_query_reply { |
2885 | + struct dlm_header rq_header; | |
2886 | + uint32_t rq_numlocks; /* Number of locks in reply */ | |
2887 | + uint32_t rq_startlock; /* Which lock this block starts | |
2888 | + at (for multi-block replies) */ | |
2889 | + uint32_t rq_status; | |
2890 | + | |
2891 | + /* Resource information */ | |
2892 | + uint32_t rq_grantcount; /* No. of nodes on grantqueue */ | |
2893 | + uint32_t rq_convcount; /* No. of nodes on convertq */ | |
2894 | + uint32_t rq_waitcount; /* No. of nodes on waitqueue */ | |
2895 | + char rq_valblk[DLM_LVB_LEN]; /* Master's LVB | |
2896 | + contents, if | |
2897 | + applicable */ | |
4bf12011 | 2898 | +}; |
2899 | + | |
2900 | +/* | |
2901 | + * Lockqueue wait lock states | |
2902 | + */ | |
2903 | + | |
10d56c87 AM |
2904 | +#define GDLM_LQSTATE_WAIT_RSB 1 |
2905 | +#define GDLM_LQSTATE_WAIT_CONVERT 2 | |
2906 | +#define GDLM_LQSTATE_WAIT_CONDGRANT 3 | |
2907 | +#define GDLM_LQSTATE_WAIT_UNLOCK 4 | |
4bf12011 | 2908 | + |
2909 | +/* Commands sent across the comms link */ | |
10d56c87 AM |
2910 | +#define GDLM_REMCMD_LOOKUP 1 |
2911 | +#define GDLM_REMCMD_LOCKREQUEST 2 | |
2912 | +#define GDLM_REMCMD_UNLOCKREQUEST 3 | |
2913 | +#define GDLM_REMCMD_CONVREQUEST 4 | |
2914 | +#define GDLM_REMCMD_LOCKREPLY 5 | |
2915 | +#define GDLM_REMCMD_LOCKGRANT 6 | |
2916 | +#define GDLM_REMCMD_SENDBAST 7 | |
2917 | +#define GDLM_REMCMD_SENDCAST 8 | |
2918 | +#define GDLM_REMCMD_REM_RESDATA 9 | |
2919 | +#define GDLM_REMCMD_RECOVERMESSAGE 20 | |
2920 | +#define GDLM_REMCMD_RECOVERREPLY 21 | |
2921 | +#define GDLM_REMCMD_QUERY 30 | |
2922 | +#define GDLM_REMCMD_QUERYREPLY 31 | |
4bf12011 | 2923 | + |
2924 | +/* Set in rh_flags when this is the last block of | |
2925 | + query information. Note this could also be the first | |
2926 | + block */ | |
2927 | +#define GDLM_REMFLAG_ENDQUERY 1 | |
2928 | + | |
4bf12011 | 2929 | +#ifndef BUG_ON |
2930 | +#define BUG_ON(x) | |
2931 | +#endif | |
2932 | + | |
10d56c87 | 2933 | +void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...); |
4bf12011 | 2934 | +void dlm_debug_dump(void); |
10d56c87 | 2935 | +void dlm_locks_dump(void); |
4bf12011 | 2936 | + |
2937 | +#endif /* __DLM_INTERNAL_DOT_H__ */ | |
2938 | diff -urN linux-orig/cluster/dlm/lkb.c linux-patched/cluster/dlm/lkb.c | |
2939 | --- linux-orig/cluster/dlm/lkb.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
2940 | +++ linux-patched/cluster/dlm/lkb.c 2004-07-13 18:57:22.000000000 +0800 |
2941 | @@ -0,0 +1,181 @@ | |
4bf12011 | 2942 | +/****************************************************************************** |
2943 | +******************************************************************************* | |
2944 | +** | |
2945 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2946 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2947 | +** | |
2948 | +** This copyrighted material is made available to anyone wishing to use, | |
2949 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2950 | +** of the GNU General Public License v.2. | |
2951 | +** | |
2952 | +******************************************************************************* | |
2953 | +******************************************************************************/ | |
2954 | + | |
2955 | +/* | |
2956 | + * lkb.c | |
2957 | + * | |
2958 | + * Allocate and free locks on the lock ID table. | |
2959 | + * | |
2960 | + * This is slightly naff but I don't really like the | |
2961 | + * VMS lockidtbl stuff as it uses a realloced array | |
2962 | + * to hold the locks in. I think this is slightly better | |
2963 | + * in some ways. | |
2964 | + * | |
2965 | + * Any better suggestions gratefully received. Patrick | |
2966 | + * | |
2967 | + */ | |
2968 | + | |
2969 | +#include "dlm_internal.h" | |
2970 | +#include "lockqueue.h" | |
2971 | +#include "lkb.h" | |
2972 | +#include "config.h" | |
2973 | +#include "rsb.h" | |
2974 | +#include "memory.h" | |
2975 | +#include "lockspace.h" | |
2976 | +#include "util.h" | |
2977 | + | |
2978 | +/* | |
2979 | + * Internal find lock by ID. Must be called with the lockidtbl spinlock held. | |
2980 | + */ | |
2981 | + | |
10d56c87 | 2982 | +static struct dlm_lkb *__find_lock_by_id(struct dlm_ls *ls, uint32_t lkid) |
4bf12011 | 2983 | +{ |
10d56c87 AM |
2984 | + uint16_t bucket = lkid & 0xFFFF; |
2985 | + struct dlm_lkb *lkb; | |
4bf12011 | 2986 | + |
10d56c87 | 2987 | + if (bucket >= ls->ls_lkbtbl_size) |
4bf12011 | 2988 | + goto out; |
2989 | + | |
10d56c87 | 2990 | + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list){ |
4bf12011 | 2991 | + if (lkb->lkb_id == lkid) |
2992 | + return lkb; | |
2993 | + } | |
10d56c87 | 2994 | + out: |
4bf12011 | 2995 | + return NULL; |
2996 | +} | |
2997 | + | |
2998 | +/* | |
4bf12011 | 2999 | + * LKB lkid's are 32 bits and have two 16 bit parts. The bottom 16 bits are a |
3000 | + * random number between 0 and lockidtbl_size-1. This random number specifies | |
3001 | + * the "bucket" for the lkb in lockidtbl. The upper 16 bits are a sequentially | |
3002 | + * assigned per-bucket id. | |
3003 | + * | |
3004 | + * Because the 16 bit id's per bucket can roll over, a new lkid must be checked | |
3005 | + * against the lkid of all lkb's in the bucket to avoid duplication. | |
3006 | + * | |
3007 | + */ | |
3008 | + | |
10d56c87 | 3009 | +struct dlm_lkb *create_lkb(struct dlm_ls *ls) |
4bf12011 | 3010 | +{ |
10d56c87 | 3011 | + struct dlm_lkb *lkb; |
4bf12011 | 3012 | + uint32_t lkid; |
3013 | + uint16_t bucket; | |
3014 | + | |
3015 | + lkb = allocate_lkb(ls); | |
3016 | + if (!lkb) | |
3017 | + goto out; | |
3018 | + | |
10d56c87 AM |
3019 | + retry: |
3020 | + get_random_bytes(&bucket, sizeof(bucket)); | |
3021 | + bucket &= (ls->ls_lkbtbl_size - 1); | |
4bf12011 | 3022 | + |
10d56c87 | 3023 | + write_lock(&ls->ls_lkbtbl[bucket].lock); |
4bf12011 | 3024 | + |
10d56c87 AM |
3025 | + lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); |
3026 | + | |
3027 | + if (__find_lock_by_id(ls, lkid)) { | |
3028 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); | |
3029 | + goto retry; | |
3030 | + } | |
3031 | + | |
3032 | + lkb->lkb_id = lkid; | |
3033 | + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); | |
3034 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); | |
3035 | + out: | |
4bf12011 | 3036 | + return lkb; |
3037 | +} | |
3038 | + | |
3039 | +/* | |
3040 | + * Free LKB and remove it from the lockidtbl. | |
3041 | + * NB - this always frees the lkb whereas release_rsb doesn't free an | |
3042 | + * rsb unless its reference count is zero. | |
3043 | + */ | |
3044 | + | |
10d56c87 | 3045 | +void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) |
4bf12011 | 3046 | +{ |
10d56c87 AM |
3047 | + uint16_t bucket = lkb->lkb_id & 0xFFFF; |
3048 | + | |
4bf12011 | 3049 | + if (lkb->lkb_status) { |
3050 | + log_error(ls, "release lkb with status %u", lkb->lkb_status); | |
3051 | + print_lkb(lkb); | |
3052 | + return; | |
3053 | + } | |
3054 | + | |
3055 | + if (lkb->lkb_parent) | |
3056 | + atomic_dec(&lkb->lkb_parent->lkb_childcnt); | |
3057 | + | |
10d56c87 | 3058 | + write_lock(&ls->ls_lkbtbl[bucket].lock); |
4bf12011 | 3059 | + list_del(&lkb->lkb_idtbl_list); |
10d56c87 | 3060 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); |
4bf12011 | 3061 | + |
3062 | + /* if this is not a master copy then lvbptr points into the user's | |
3063 | + * lksb, so don't free it */ | |
3064 | + if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY) | |
3065 | + free_lvb(lkb->lkb_lvbptr); | |
3066 | + | |
3067 | + if (lkb->lkb_range) | |
3068 | + free_range(lkb->lkb_range); | |
3069 | + | |
3070 | + free_lkb(lkb); | |
3071 | +} | |
3072 | + | |
10d56c87 | 3073 | +struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid) |
4bf12011 | 3074 | +{ |
10d56c87 AM |
3075 | + struct dlm_lkb *lkb; |
3076 | + uint16_t bucket = lkid & 0xFFFF; | |
4bf12011 | 3077 | + |
10d56c87 | 3078 | + read_lock(&ls->ls_lkbtbl[bucket].lock); |
4bf12011 | 3079 | + lkb = __find_lock_by_id(ls, lkid); |
10d56c87 | 3080 | + read_unlock(&ls->ls_lkbtbl[bucket].lock); |
4bf12011 | 3081 | + |
3082 | + return lkb; | |
3083 | +} | |
3084 | + | |
10d56c87 | 3085 | +struct dlm_lkb *dlm_get_lkb(void *ls, uint32_t lkid) |
4bf12011 | 3086 | +{ |
10d56c87 | 3087 | + struct dlm_ls *lspace = find_lockspace_by_local_id(ls); |
4bf12011 | 3088 | + return find_lock_by_id(lspace, lkid); |
3089 | +} | |
3090 | + | |
3091 | +/* | |
3092 | + * Initialise the range parts of an LKB. | |
3093 | + */ | |
3094 | + | |
10d56c87 | 3095 | +int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end) |
4bf12011 | 3096 | +{ |
3097 | + int ret = -ENOMEM; | |
3098 | + | |
3099 | + /* | |
3100 | + * if this wasn't already a range lock, make it one | |
3101 | + */ | |
3102 | + if (!lkb->lkb_range) { | |
3103 | + lkb->lkb_range = allocate_range(lspace); | |
3104 | + if (!lkb->lkb_range) | |
3105 | + goto out; | |
3106 | + | |
3107 | + /* | |
3108 | + * This is needed for conversions that contain ranges where the | |
3109 | + * original lock didn't but it's harmless for new locks too. | |
3110 | + */ | |
3111 | + lkb->lkb_range[GR_RANGE_START] = 0LL; | |
3112 | + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; | |
3113 | + } | |
3114 | + | |
3115 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
3116 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
3117 | + | |
3118 | + ret = 0; | |
3119 | + | |
3120 | + out: | |
3121 | + return ret; | |
3122 | +} | |
3123 | diff -urN linux-orig/cluster/dlm/lkb.h linux-patched/cluster/dlm/lkb.h | |
3124 | --- linux-orig/cluster/dlm/lkb.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
3125 | +++ linux-patched/cluster/dlm/lkb.h 2004-07-13 18:57:22.000000000 +0800 |
3126 | @@ -0,0 +1,23 @@ | |
4bf12011 | 3127 | +/****************************************************************************** |
3128 | +******************************************************************************* | |
3129 | +** | |
3130 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
3131 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
3132 | +** | |
3133 | +** This copyrighted material is made available to anyone wishing to use, | |
3134 | +** modify, copy, or redistribute it subject to the terms and conditions | |
3135 | +** of the GNU General Public License v.2. | |
3136 | +** | |
3137 | +******************************************************************************* | |
3138 | +******************************************************************************/ | |
3139 | + | |
3140 | +#ifndef __LKB_DOT_H__ | |
3141 | +#define __LKB_DOT_H__ | |
3142 | + | |
10d56c87 AM |
3143 | +struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid); |
3144 | +struct dlm_lkb *create_lkb(struct dlm_ls *ls); | |
3145 | +void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb); | |
3146 | +struct dlm_lkb *dlm_get_lkb(void *ls, uint32_t lkid); | |
3147 | +int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end); | |
4bf12011 | 3148 | + |
3149 | +#endif /* __LKB_DOT_H__ */ | |
3150 | diff -urN linux-orig/cluster/dlm/locking.c linux-patched/cluster/dlm/locking.c | |
3151 | --- linux-orig/cluster/dlm/locking.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
3152 | +++ linux-patched/cluster/dlm/locking.c 2004-07-13 18:57:22.000000000 +0800 |
3153 | @@ -0,0 +1,1307 @@ | |
4bf12011 | 3154 | +/****************************************************************************** |
3155 | +******************************************************************************* | |
3156 | +** | |
3157 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
3158 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
10d56c87 | 3159 | +** |
4bf12011 | 3160 | +** This copyrighted material is made available to anyone wishing to use, |
3161 | +** modify, copy, or redistribute it subject to the terms and conditions | |
3162 | +** of the GNU General Public License v.2. | |
3163 | +** | |
3164 | +******************************************************************************* | |
3165 | +******************************************************************************/ | |
3166 | + | |
10d56c87 | 3167 | +/* |
4bf12011 | 3168 | + * locking.c |
3169 | + * | |
3170 | + * This is where the main work of the DLM goes on | |
3171 | + * | |
3172 | + */ | |
3173 | + | |
3174 | +#include "dlm_internal.h" | |
3175 | +#include "lockqueue.h" | |
3176 | +#include "locking.h" | |
3177 | +#include "lockspace.h" | |
3178 | +#include "lkb.h" | |
3179 | +#include "nodes.h" | |
3180 | +#include "dir.h" | |
3181 | +#include "ast.h" | |
3182 | +#include "memory.h" | |
3183 | +#include "rsb.h" | |
10d56c87 AM |
3184 | +#include "util.h" |
3185 | + | |
3186 | +extern struct list_head lslist; | |
4bf12011 | 3187 | + |
3188 | +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
3189 | + | |
10d56c87 | 3190 | +/* |
4bf12011 | 3191 | + * Lock compatibilty matrix - thanks Steve |
3192 | + * UN = Unlocked state. Not really a state, used as a flag | |
3193 | + * PD = Padding. Used to make the matrix a nice power of two in size | |
3194 | + * Other states are the same as the VMS DLM. | |
3195 | + * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) | |
3196 | + */ | |
3197 | + | |
3198 | +#define modes_compat(gr, rq) \ | |
3199 | + __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] | |
3200 | + | |
3201 | +const int __dlm_compat_matrix[8][8] = { | |
3202 | + /* UN NL CR CW PR PW EX PD */ | |
3203 | + {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ | |
3204 | + {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ | |
3205 | + {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ | |
3206 | + {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ | |
3207 | + {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ | |
3208 | + {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ | |
3209 | + {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ | |
3210 | + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | |
3211 | +}; | |
3212 | + | |
10d56c87 | 3213 | +/* |
4bf12011 | 3214 | + * Compatibility matrix for conversions with QUECVT set. |
3215 | + * Granted mode is the row; requested mode is the column. | |
3216 | + * Usage: matrix[grmode+1][rqmode+1] | |
3217 | + */ | |
3218 | + | |
3219 | +const int __quecvt_compat_matrix[8][8] = { | |
3220 | + /* UN NL CR CW PR PW EX PD */ | |
3221 | + {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ | |
3222 | + {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ | |
3223 | + {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ | |
3224 | + {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ | |
3225 | + {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ | |
3226 | + {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ | |
3227 | + {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ | |
3228 | + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | |
3229 | +}; | |
3230 | + | |
10d56c87 | 3231 | +/* |
4bf12011 | 3232 | + * This defines the direction of transfer of LVB data. |
3233 | + * Granted mode is the row; requested mode is the column. | |
3234 | + * Usage: matrix[grmode+1][rqmode+1] | |
3235 | + * 1 = LVB is returned to the caller | |
3236 | + * 0 = LVB is written to the resource | |
3237 | + * -1 = nothing happens to the LVB | |
3238 | + */ | |
3239 | + | |
3240 | +const int __lvb_operations[8][8] = { | |
3241 | + /* UN NL CR CW PR PW EX PD*/ | |
3242 | + { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ | |
3243 | + { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ | |
3244 | + { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ | |
3245 | + { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ | |
3246 | + { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ | |
3247 | + { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ | |
3248 | + { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ | |
3249 | + { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ | |
3250 | +}; | |
3251 | + | |
10d56c87 AM |
3252 | +static void grant_lock(struct dlm_lkb * lkb, int send_remote); |
3253 | +static void send_blocking_asts(struct dlm_rsb * rsb, struct dlm_lkb * lkb); | |
3254 | +static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb); | |
3255 | +static int convert_lock(struct dlm_ls * ls, int mode, struct dlm_lksb *lksb, | |
4bf12011 | 3256 | + int flags, void *ast, void *astarg, void *bast, |
3257 | + struct dlm_range *range); | |
10d56c87 | 3258 | +static int dlm_lock_stage1(struct dlm_ls * lspace, struct dlm_lkb * lkb, int flags, |
4bf12011 | 3259 | + char *name, int namelen); |
3260 | + | |
3261 | + | |
10d56c87 | 3262 | +static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) |
4bf12011 | 3263 | +{ |
10d56c87 | 3264 | + struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 3265 | + |
3266 | + if (lkb->lkb_id == first->lkb_id) | |
3267 | + return 1; | |
3268 | + | |
3269 | + return 0; | |
3270 | +} | |
3271 | + | |
10d56c87 | 3272 | +/* |
4bf12011 | 3273 | + * Return 1 if the locks' ranges overlap |
3274 | + * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff | |
3275 | + */ | |
3276 | + | |
10d56c87 | 3277 | +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) |
4bf12011 | 3278 | +{ |
3279 | + if (!lkb1->lkb_range || !lkb2->lkb_range) | |
3280 | + return 1; | |
3281 | + | |
3282 | + if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || | |
3283 | + lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) | |
3284 | + return 0; | |
3285 | + | |
3286 | + return 1; | |
3287 | +} | |
3288 | + | |
3289 | +/* | |
3290 | + * Resolve conversion deadlock by changing to NL the granted mode of deadlocked | |
3291 | + * locks on the convert queue. One of the deadlocked locks is allowed to | |
3292 | + * retain its original granted state (we choose the lkb provided although it | |
3293 | + * shouldn't matter which.) We do not change the granted mode on locks without | |
3294 | + * the CONVDEADLK flag. If any of these exist (there shouldn't if the app uses | |
3295 | + * the flag consistently) the false return value is used. | |
3296 | + */ | |
3297 | + | |
10d56c87 | 3298 | +static int conversion_deadlock_resolve(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 3299 | +{ |
10d56c87 | 3300 | + struct dlm_lkb *this; |
4bf12011 | 3301 | + int rv = TRUE; |
3302 | + | |
3303 | + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | |
3304 | + if (this == lkb) | |
3305 | + continue; | |
3306 | + | |
3307 | + if (!ranges_overlap(lkb, this)) | |
3308 | + continue; | |
3309 | + | |
3310 | + if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) { | |
3311 | + | |
3312 | + if (!(this->lkb_lockqueue_flags & DLM_LKF_CONVDEADLK)){ | |
3313 | + rv = FALSE; | |
3314 | + continue; | |
3315 | + } | |
3316 | + this->lkb_grmode = DLM_LOCK_NL; | |
3317 | + this->lkb_flags |= GDLM_LKFLG_DEMOTED; | |
3318 | + } | |
3319 | + } | |
3320 | + return rv; | |
3321 | +} | |
3322 | + | |
3323 | +/* | |
3324 | + * "A conversion deadlock arises with a pair of lock requests in the converting | |
3325 | + * queue for one resource. The granted mode of each lock blocks the requested | |
3326 | + * mode of the other lock." | |
3327 | + */ | |
3328 | + | |
10d56c87 | 3329 | +static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 3330 | +{ |
10d56c87 | 3331 | + struct dlm_lkb *this; |
4bf12011 | 3332 | + |
3333 | + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | |
3334 | + if (this == lkb) | |
3335 | + continue; | |
3336 | + | |
3337 | + if (!ranges_overlap(lkb, this)) | |
3338 | + continue; | |
3339 | + | |
3340 | + if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) | |
3341 | + return TRUE; | |
3342 | + } | |
3343 | + return FALSE; | |
3344 | +} | |
3345 | + | |
3346 | +/* | |
3347 | + * Check if the given lkb conflicts with another lkb on the queue. | |
3348 | + */ | |
3349 | + | |
10d56c87 | 3350 | +static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) |
4bf12011 | 3351 | +{ |
10d56c87 | 3352 | + struct dlm_lkb *this; |
4bf12011 | 3353 | + |
3354 | + list_for_each_entry(this, head, lkb_statequeue) { | |
3355 | + if (this == lkb) | |
3356 | + continue; | |
3357 | + if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) | |
3358 | + return TRUE; | |
3359 | + } | |
3360 | + return FALSE; | |
3361 | +} | |
3362 | + | |
3363 | +/* | |
3364 | + * Deadlock can arise when using the QUECVT flag if the requested mode of the | |
3365 | + * first converting lock is incompatible with the granted mode of another | |
3366 | + * converting lock further down the queue. To prevent this deadlock, a | |
3367 | + * requested QUEUECVT lock is granted immediately if adding it to the end of | |
3368 | + * the queue would prevent a lock ahead of it from being granted. | |
3369 | + */ | |
3370 | + | |
10d56c87 | 3371 | +static int queuecvt_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 3372 | +{ |
10d56c87 | 3373 | + struct dlm_lkb *this; |
4bf12011 | 3374 | + |
3375 | + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | |
3376 | + if (this == lkb) | |
3377 | + break; | |
3378 | + | |
3379 | + if (ranges_overlap(lkb, this) && !modes_compat(lkb, this)) | |
3380 | + return TRUE; | |
3381 | + } | |
3382 | + return FALSE; | |
3383 | +} | |
3384 | + | |
10d56c87 | 3385 | +/* |
4bf12011 | 3386 | + * Return 1 if the lock can be granted, 0 otherwise. |
3387 | + * Also detect and resolve conversion deadlocks. | |
3388 | + */ | |
3389 | + | |
10d56c87 | 3390 | +static int can_be_granted(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 3391 | +{ |
10d56c87 AM |
3392 | + if (test_bit(LSFL_NOCONVGRANT, &rsb->res_ls->ls_flags) && |
3393 | + lkb->lkb_grmode == DLM_LOCK_IV && | |
3394 | + !list_empty(&rsb->res_convertqueue)) | |
3395 | + return FALSE; | |
3396 | + | |
3397 | + if (lkb->lkb_rqmode == DLM_LOCK_NL) | |
4bf12011 | 3398 | + return TRUE; |
3399 | + | |
3400 | + if (lkb->lkb_rqmode == lkb->lkb_grmode) | |
3401 | + return TRUE; | |
3402 | + | |
3403 | + if (queue_conflict(&rsb->res_grantqueue, lkb)) | |
3404 | + return FALSE; | |
3405 | + | |
3406 | + if (!queue_conflict(&rsb->res_convertqueue, lkb)) { | |
3407 | + if (!(lkb->lkb_lockqueue_flags & DLM_LKF_QUECVT)) | |
3408 | + return TRUE; | |
3409 | + | |
3410 | + if (list_empty(&rsb->res_convertqueue) || | |
3411 | + first_in_list(lkb, &rsb->res_convertqueue) || | |
3412 | + queuecvt_deadlock_detect(rsb, lkb)) | |
3413 | + return TRUE; | |
3414 | + else | |
3415 | + return FALSE; | |
3416 | + } | |
3417 | + | |
3418 | + /* there *is* a conflict between this lkb and a converting lock so | |
3419 | + we return false unless conversion deadlock resolution is permitted | |
3420 | + (only conversion requests will have the CONVDEADLK flag set) */ | |
3421 | + | |
3422 | + if (!(lkb->lkb_lockqueue_flags & DLM_LKF_CONVDEADLK)) | |
3423 | + return FALSE; | |
3424 | + | |
3425 | + if (!conversion_deadlock_detect(rsb, lkb)) | |
3426 | + return FALSE; | |
3427 | + | |
3428 | + if (conversion_deadlock_resolve(rsb, lkb)) | |
3429 | + return TRUE; | |
3430 | + | |
3431 | + return FALSE; | |
3432 | +} | |
3433 | + | |
3434 | +int dlm_lock(void *lockspace, | |
3435 | + uint32_t mode, | |
3436 | + struct dlm_lksb *lksb, | |
3437 | + uint32_t flags, | |
3438 | + void *name, | |
3439 | + unsigned int namelen, | |
3440 | + uint32_t parent, | |
3441 | + void (*ast) (void *astarg), | |
3442 | + void *astarg, | |
3443 | + void (*bast) (void *astarg, int mode), | |
3444 | + struct dlm_range *range) | |
3445 | +{ | |
10d56c87 AM |
3446 | + struct dlm_ls *lspace; |
3447 | + struct dlm_lkb *lkb = NULL, *parent_lkb = NULL; | |
4bf12011 | 3448 | + int ret = -EINVAL; |
3449 | + | |
3450 | + lspace = find_lockspace_by_local_id(lockspace); | |
3451 | + if (!lspace) | |
3452 | + goto out; | |
3453 | + | |
3454 | + if (mode < 0 || mode > DLM_LOCK_EX) | |
3455 | + goto out; | |
3456 | + | |
3457 | + if (namelen > DLM_RESNAME_MAXLEN) | |
3458 | + goto out; | |
3459 | + | |
3460 | + if (flags & DLM_LKF_CANCEL) | |
3461 | + goto out; | |
3462 | + | |
3463 | + if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) | |
3464 | + goto out; | |
3465 | + | |
3466 | + if (flags & DLM_LKF_EXPEDITE && !(flags & DLM_LKF_CONVERT)) | |
3467 | + goto out; | |
3468 | + | |
3469 | + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) | |
3470 | + goto out; | |
3471 | + | |
3472 | + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) | |
3473 | + goto out; | |
3474 | + | |
3475 | + if (!ast || !lksb) | |
3476 | + goto out; | |
3477 | + | |
3478 | + if (!lksb->sb_lvbptr && (flags & DLM_LKF_VALBLK)) | |
3479 | + goto out; | |
3480 | + | |
3481 | + if ((flags & DLM_LKF_VALBLK) && !lksb->sb_lvbptr) | |
3482 | + goto out; | |
3483 | + | |
10d56c87 | 3484 | + /* |
4bf12011 | 3485 | + * Take conversion path. |
3486 | + */ | |
3487 | + | |
3488 | + if (flags & DLM_LKF_CONVERT) { | |
3489 | + ret = convert_lock(lspace, mode, lksb, flags, ast, astarg, | |
3490 | + bast, range); | |
3491 | + goto out; | |
3492 | + } | |
3493 | + | |
10d56c87 | 3494 | + /* |
4bf12011 | 3495 | + * Take new lock path. |
3496 | + */ | |
3497 | + | |
3498 | + if (parent) { | |
3499 | + down_read(&lspace->ls_unlock_sem); | |
3500 | + | |
3501 | + parent_lkb = find_lock_by_id(lspace, parent); | |
3502 | + | |
3503 | + if (!parent_lkb || | |
3504 | + parent_lkb->lkb_flags & GDLM_LKFLG_DELETED || | |
3505 | + parent_lkb->lkb_flags & GDLM_LKFLG_MSTCPY || | |
3506 | + parent_lkb->lkb_status != GDLM_LKSTS_GRANTED) { | |
3507 | + up_read(&lspace->ls_unlock_sem); | |
3508 | + goto out; | |
3509 | + } | |
3510 | + | |
3511 | + atomic_inc(&parent_lkb->lkb_childcnt); | |
3512 | + up_read(&lspace->ls_unlock_sem); | |
3513 | + } | |
3514 | + | |
3515 | + down_read(&lspace->ls_in_recovery); | |
3516 | + | |
3517 | + ret = -ENOMEM; | |
3518 | + | |
3519 | + lkb = create_lkb(lspace); | |
3520 | + if (!lkb) | |
3521 | + goto fail_dec; | |
3522 | + lkb->lkb_astaddr = ast; | |
3523 | + lkb->lkb_astparam = (long) astarg; | |
3524 | + lkb->lkb_bastaddr = bast; | |
3525 | + lkb->lkb_rqmode = mode; | |
3526 | + lkb->lkb_grmode = DLM_LOCK_IV; | |
10d56c87 | 3527 | + lkb->lkb_nodeid = -1; |
4bf12011 | 3528 | + lkb->lkb_lksb = lksb; |
3529 | + lkb->lkb_parent = parent_lkb; | |
3530 | + lkb->lkb_lockqueue_flags = flags; | |
3531 | + lkb->lkb_lvbptr = lksb->sb_lvbptr; | |
3532 | + | |
3533 | + /* Copy the range if appropriate */ | |
3534 | + if (range) { | |
3535 | + if (range->ra_start > range->ra_end) { | |
3536 | + ret = -EINVAL; | |
3537 | + goto fail_free; | |
3538 | + } | |
3539 | + | |
3540 | + if (lkb_set_range(lspace, lkb, range->ra_start, range->ra_end)) | |
3541 | + goto fail_free; | |
3542 | + } | |
3543 | + | |
3544 | + /* Convert relevant flags to internal numbers */ | |
3545 | + if (flags & DLM_LKF_VALBLK) | |
3546 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; | |
3547 | + if (flags & DLM_LKF_PERSISTENT) | |
3548 | + lkb->lkb_flags |= GDLM_LKFLG_PERSISTENT; | |
3549 | + if (flags & DLM_LKF_NODLCKWT) | |
3550 | + lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT; | |
3551 | + | |
3552 | + lksb->sb_lkid = lkb->lkb_id; | |
3553 | + | |
3554 | + ret = dlm_lock_stage1(lspace, lkb, flags, name, namelen); | |
3555 | + if (ret) | |
3556 | + goto fail_free; | |
3557 | + | |
3558 | + up_read(&lspace->ls_in_recovery); | |
3559 | + | |
3560 | + wake_astd(); | |
3561 | + | |
3562 | + return 0; | |
3563 | + | |
3564 | + fail_free: | |
3565 | + release_lkb(lspace, lkb); | |
3566 | + goto fail_unlock; | |
3567 | + | |
3568 | + fail_dec: | |
3569 | + if (parent_lkb) | |
3570 | + atomic_dec(&parent_lkb->lkb_childcnt); | |
3571 | + | |
3572 | + fail_unlock: | |
3573 | + up_read(&lspace->ls_in_recovery); | |
3574 | + | |
3575 | + out: | |
3576 | + return ret; | |
3577 | +} | |
3578 | + | |
10d56c87 | 3579 | +int dlm_lock_stage1(struct dlm_ls *ls, struct dlm_lkb *lkb, int flags, char *name, |
4bf12011 | 3580 | + int namelen) |
3581 | +{ | |
10d56c87 AM |
3582 | + struct dlm_rsb *rsb, *parent_rsb = NULL; |
3583 | + struct dlm_lkb *parent_lkb = lkb->lkb_parent; | |
4bf12011 | 3584 | + uint32_t nodeid; |
3585 | + int error; | |
3586 | + | |
3587 | + if (parent_lkb) | |
3588 | + parent_rsb = parent_lkb->lkb_resource; | |
3589 | + | |
3590 | + error = find_or_create_rsb(ls, parent_rsb, name, namelen, 1, &rsb); | |
3591 | + if (error) | |
3592 | + goto out; | |
4bf12011 | 3593 | + lkb->lkb_resource = rsb; |
4bf12011 | 3594 | + |
10d56c87 AM |
3595 | + log_debug(ls, "rq %u %x \"%s\"", lkb->lkb_rqmode, lkb->lkb_id, |
3596 | + rsb->res_name); | |
3597 | + /* | |
4bf12011 | 3598 | + * Next stage, do we need to find the master or can |
3599 | + * we get on with the real locking work ? | |
3600 | + */ | |
3601 | + | |
3602 | + if (rsb->res_nodeid == -1) { | |
3603 | + if (get_directory_nodeid(rsb) != our_nodeid()) { | |
3604 | + error = remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB); | |
3605 | + goto out; | |
3606 | + } | |
3607 | + | |
10d56c87 AM |
3608 | + error = dlm_dir_lookup(ls, our_nodeid(), rsb->res_name, |
3609 | + rsb->res_length, &nodeid); | |
4bf12011 | 3610 | + if (error) |
3611 | + goto out; | |
3612 | + | |
10d56c87 AM |
3613 | + if (nodeid == our_nodeid()) { |
3614 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
4bf12011 | 3615 | + nodeid = 0; |
10d56c87 AM |
3616 | + } else |
3617 | + clear_bit(RESFL_MASTER, &rsb->res_flags); | |
4bf12011 | 3618 | + rsb->res_nodeid = nodeid; |
4bf12011 | 3619 | + } |
3620 | + | |
10d56c87 AM |
3621 | + lkb->lkb_nodeid = rsb->res_nodeid; |
3622 | + | |
4bf12011 | 3623 | + error = dlm_lock_stage2(ls, lkb, rsb, flags); |
3624 | + | |
3625 | + out: | |
3626 | + if (error) | |
3627 | + release_rsb(rsb); | |
3628 | + | |
3629 | + return error; | |
3630 | +} | |
3631 | + | |
10d56c87 | 3632 | +/* |
4bf12011 | 3633 | + * Locking routine called after we have an RSB, either a copy of a remote one |
3634 | + * or a local one, or perhaps a shiny new one all of our very own | |
3635 | + */ | |
3636 | + | |
10d56c87 | 3637 | +int dlm_lock_stage2(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_rsb *rsb, int flags) |
4bf12011 | 3638 | +{ |
3639 | + int error = 0; | |
3640 | + | |
10d56c87 AM |
3641 | + DLM_ASSERT(rsb->res_nodeid != -1, print_lkb(lkb); print_rsb(rsb);); |
3642 | + | |
4bf12011 | 3643 | + if (rsb->res_nodeid) { |
3644 | + res_lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
3645 | + error = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONDGRANT); | |
3646 | + } else { | |
3647 | + dlm_lock_stage3(lkb); | |
3648 | + } | |
3649 | + | |
3650 | + return error; | |
3651 | +} | |
3652 | + | |
10d56c87 | 3653 | +/* |
4bf12011 | 3654 | + * Called on an RSB's master node to do stage2 locking for a remote lock |
3655 | + * request. Returns a proper lkb with rsb ready for lock processing. | |
3656 | + * This is analagous to sections of dlm_lock() and dlm_lock_stage1(). | |
3657 | + */ | |
3658 | + | |
10d56c87 AM |
3659 | +struct dlm_lkb *remote_stage2(int remote_nodeid, struct dlm_ls *ls, |
3660 | + struct dlm_request *freq) | |
4bf12011 | 3661 | +{ |
10d56c87 AM |
3662 | + struct dlm_rsb *rsb = NULL, *parent_rsb = NULL; |
3663 | + struct dlm_lkb *lkb = NULL, *parent_lkb = NULL; | |
4bf12011 | 3664 | + int error, namelen; |
3665 | + | |
3666 | + if (freq->rr_remparid) { | |
3667 | + parent_lkb = find_lock_by_id(ls, freq->rr_remparid); | |
3668 | + if (!parent_lkb) | |
3669 | + goto fail; | |
3670 | + | |
3671 | + atomic_inc(&parent_lkb->lkb_childcnt); | |
3672 | + parent_rsb = parent_lkb->lkb_resource; | |
3673 | + } | |
3674 | + | |
10d56c87 | 3675 | + /* |
4bf12011 | 3676 | + * A new MSTCPY lkb. Initialize lkb fields including the real lkid and |
3677 | + * node actually holding the (non-MSTCPY) lkb. AST address are just | |
3678 | + * flags in the master copy. | |
3679 | + */ | |
3680 | + | |
3681 | + lkb = create_lkb(ls); | |
3682 | + if (!lkb) | |
3683 | + goto fail_dec; | |
3684 | + lkb->lkb_grmode = DLM_LOCK_IV; | |
3685 | + lkb->lkb_rqmode = freq->rr_rqmode; | |
3686 | + lkb->lkb_parent = parent_lkb; | |
5cdbd17b AM |
3687 | + lkb->lkb_astaddr = (void *) (long) (freq->rr_asts & AST_COMP); |
3688 | + lkb->lkb_bastaddr = (void *) (long) (freq->rr_asts & AST_BAST); | |
4bf12011 | 3689 | + lkb->lkb_nodeid = remote_nodeid; |
3690 | + lkb->lkb_remid = freq->rr_header.rh_lkid; | |
3691 | + lkb->lkb_flags = GDLM_LKFLG_MSTCPY; | |
3692 | + lkb->lkb_lockqueue_flags = freq->rr_flags; | |
3693 | + | |
3694 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_VALBLK) { | |
3695 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; | |
3696 | + allocate_and_copy_lvb(ls, &lkb->lkb_lvbptr, freq->rr_lvb); | |
3697 | + if (!lkb->lkb_lvbptr) | |
3698 | + goto fail_free; | |
3699 | + } | |
3700 | + | |
3701 | + if (lkb->lkb_lockqueue_flags & GDLM_LKFLG_RANGE) { | |
3702 | + error = lkb_set_range(ls, lkb, freq->rr_range_start, | |
3703 | + freq->rr_range_end); | |
3704 | + if (error) | |
3705 | + goto fail_free; | |
3706 | + } | |
3707 | + | |
10d56c87 | 3708 | + /* |
4bf12011 | 3709 | + * Get the RSB which this lock is for. Create a new RSB if this is a |
3710 | + * new lock on a new resource. We must be the master of any new rsb. | |
3711 | + */ | |
3712 | + | |
3713 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
3714 | + | |
10d56c87 | 3715 | + error = find_or_create_rsb(ls, parent_rsb, freq->rr_name, namelen, 0, |
4bf12011 | 3716 | + &rsb); |
3717 | + if (error) | |
3718 | + goto fail_free; | |
3719 | + | |
10d56c87 AM |
3720 | + if (!rsb || rsb->res_nodeid == -1) { |
3721 | + log_debug(ls, "inval rsb to %u", remote_nodeid); | |
3722 | + lkb->lkb_retstatus = -EINVAL; | |
3723 | + goto out; | |
3724 | + } | |
3725 | + | |
4bf12011 | 3726 | + lkb->lkb_resource = rsb; |
4bf12011 | 3727 | + |
10d56c87 AM |
3728 | + log_debug(ls, "rq %u from %u %x \"%s\"", lkb->lkb_rqmode, remote_nodeid, |
3729 | + lkb->lkb_id, rsb->res_name); | |
3730 | + | |
3731 | + DLM_ASSERT(rsb->res_nodeid == 0, | |
3732 | + print_lkb(lkb); | |
3733 | + print_request(freq); | |
3734 | + printk("nodeid %u\n", remote_nodeid);); | |
4bf12011 | 3735 | + |
10d56c87 AM |
3736 | + out: |
3737 | + return lkb; | |
4bf12011 | 3738 | + |
3739 | + fail_free: | |
3740 | + /* release_lkb handles parent */ | |
3741 | + release_lkb(ls, lkb); | |
3742 | + parent_lkb = NULL; | |
3743 | + | |
3744 | + fail_dec: | |
3745 | + if (parent_lkb) | |
3746 | + atomic_dec(&parent_lkb->lkb_childcnt); | |
3747 | + fail: | |
3748 | + return NULL; | |
3749 | +} | |
3750 | + | |
10d56c87 | 3751 | +/* |
4bf12011 | 3752 | + * The final bit of lock request processing on the master node. Here the lock |
3753 | + * is granted and the completion ast is queued, or the lock is put on the | |
3754 | + * waitqueue and blocking asts are sent. | |
3755 | + */ | |
3756 | + | |
10d56c87 | 3757 | +void dlm_lock_stage3(struct dlm_lkb *lkb) |
4bf12011 | 3758 | +{ |
10d56c87 | 3759 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4bf12011 | 3760 | + |
10d56c87 | 3761 | + /* |
4bf12011 | 3762 | + * This is a locally mastered lock on a resource that already exists, |
3763 | + * see if it can be granted or if it must wait. When this function is | |
3764 | + * called for a remote lock request (process_cluster_request, | |
3765 | + * REMCMD_LOCKREQUEST), the result from grant_lock is returned to the | |
3766 | + * requesting node at the end of process_cluster_request, not at the | |
3767 | + * end of grant_lock. | |
3768 | + */ | |
3769 | + | |
3770 | + down_write(&rsb->res_lock); | |
3771 | + | |
3772 | + if (can_be_granted(rsb, lkb)) { | |
3773 | + grant_lock(lkb, 0); | |
3774 | + goto out; | |
3775 | + } | |
3776 | + | |
10d56c87 | 3777 | + /* |
4bf12011 | 3778 | + * This request is not a conversion, so the lkb didn't exist other than |
3779 | + * for this request and should be freed after EAGAIN is returned in the | |
3780 | + * ast. | |
3781 | + */ | |
3782 | + | |
3783 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) { | |
4bf12011 | 3784 | + lkb->lkb_retstatus = -EAGAIN; |
4bf12011 | 3785 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST) |
3786 | + send_blocking_asts_all(rsb, lkb); | |
5cdbd17b | 3787 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); |
4bf12011 | 3788 | + goto out; |
3789 | + } | |
3790 | + | |
10d56c87 | 3791 | + /* |
4bf12011 | 3792 | + * The requested lkb must wait. Because the rsb of the requested lkb |
3793 | + * is mastered here, send blocking asts for the lkb's blocking the | |
3794 | + * request. | |
3795 | + */ | |
3796 | + | |
3797 | + lkb->lkb_retstatus = 0; | |
3798 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
3799 | + | |
3800 | + send_blocking_asts(rsb, lkb); | |
3801 | + | |
3802 | + out: | |
3803 | + up_write(&rsb->res_lock); | |
3804 | +} | |
3805 | + | |
3806 | +int dlm_unlock(void *lockspace, | |
3807 | + uint32_t lkid, | |
3808 | + uint32_t flags, | |
3809 | + struct dlm_lksb *lksb, | |
3810 | + void *astarg) | |
3811 | +{ | |
10d56c87 AM |
3812 | + struct dlm_ls *ls = find_lockspace_by_local_id(lockspace); |
3813 | + struct dlm_lkb *lkb; | |
3814 | + struct dlm_rsb *rsb; | |
4bf12011 | 3815 | + int ret = -EINVAL; |
3816 | + | |
3817 | + if (!ls) | |
3818 | + goto out; | |
3819 | + | |
3820 | + lkb = find_lock_by_id(ls, lkid); | |
3821 | + if (!lkb) | |
3822 | + goto out; | |
3823 | + | |
3824 | + /* Can't dequeue a master copy (a remote node's mastered lock) */ | |
3825 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) | |
3826 | + goto out; | |
3827 | + | |
3828 | + /* Already waiting for a remote lock operation */ | |
3829 | + if (lkb->lkb_lockqueue_state) { | |
3830 | + ret = -EBUSY; | |
3831 | + goto out; | |
3832 | + } | |
3833 | + | |
3834 | + /* Can only cancel WAITING or CONVERTing locks. | |
3835 | + * This is just a quick check - it is also checked in unlock_stage2() | |
3836 | + * (which may be on the master) under the semaphore. | |
3837 | + */ | |
3838 | + if ((flags & DLM_LKF_CANCEL) && | |
3839 | + (lkb->lkb_status == GDLM_LKSTS_GRANTED)) | |
3840 | + goto out; | |
3841 | + | |
3842 | + /* "Normal" unlocks must operate on a granted lock */ | |
3843 | + if (!(flags & DLM_LKF_CANCEL) && | |
3844 | + (lkb->lkb_status != GDLM_LKSTS_GRANTED)) | |
3845 | + goto out; | |
3846 | + | |
3847 | + down_write(&ls->ls_unlock_sem); | |
4bf12011 | 3848 | + /* Can't dequeue a lock with sublocks */ |
3849 | + if (atomic_read(&lkb->lkb_childcnt)) { | |
3850 | + up_write(&ls->ls_unlock_sem); | |
3851 | + ret = -ENOTEMPTY; | |
3852 | + goto out; | |
3853 | + } | |
4bf12011 | 3854 | + /* Mark it as deleted so we can't use it as a parent in dlm_lock() */ |
3855 | + if (!(flags & DLM_LKF_CANCEL)) | |
3856 | + lkb->lkb_flags |= GDLM_LKFLG_DELETED; | |
3857 | + up_write(&ls->ls_unlock_sem); | |
3858 | + | |
10d56c87 AM |
3859 | + down_read(&ls->ls_in_recovery); |
3860 | + rsb = find_rsb_to_unlock(ls, lkb); | |
3861 | + | |
3862 | + log_debug(ls, "un %x ref %u flg %x nodeid %d/%d \"%s\"", lkb->lkb_id, | |
3863 | + atomic_read(&rsb->res_ref), rsb->res_flags, | |
3864 | + lkb->lkb_nodeid, rsb->res_nodeid, rsb->res_name); | |
3865 | + | |
4bf12011 | 3866 | + /* Save any new params */ |
3867 | + if (lksb) | |
3868 | + lkb->lkb_lksb = lksb; | |
3869 | + if (astarg) | |
3870 | + lkb->lkb_astparam = (long) astarg; | |
4bf12011 | 3871 | + lkb->lkb_lockqueue_flags = flags; |
3872 | + | |
10d56c87 | 3873 | + if (lkb->lkb_nodeid) |
4bf12011 | 3874 | + ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_UNLOCK); |
3875 | + else | |
10d56c87 | 3876 | + ret = dlm_unlock_stage2(lkb, rsb, flags); |
4bf12011 | 3877 | + up_read(&ls->ls_in_recovery); |
3878 | + | |
3879 | + wake_astd(); | |
3880 | + | |
3881 | + out: | |
3882 | + return ret; | |
3883 | +} | |
3884 | + | |
10d56c87 | 3885 | +int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags) |
4bf12011 | 3886 | +{ |
4bf12011 | 3887 | + int remote = lkb->lkb_flags & GDLM_LKFLG_MSTCPY; |
10d56c87 | 3888 | + int old_status; |
4bf12011 | 3889 | + |
3890 | + down_write(&rsb->res_lock); | |
3891 | + | |
3892 | + /* Can only cancel WAITING or CONVERTing locks */ | |
3893 | + if ((flags & DLM_LKF_CANCEL) && | |
3894 | + (lkb->lkb_status == GDLM_LKSTS_GRANTED)) { | |
3895 | + lkb->lkb_retstatus = -EINVAL; | |
5cdbd17b | 3896 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 3897 | + goto out; |
3898 | + } | |
3899 | + | |
3900 | + old_status = lkb_dequeue(lkb); | |
3901 | + | |
10d56c87 | 3902 | + /* |
4bf12011 | 3903 | + * If was granted grant any converting or waiting locks. |
3904 | + */ | |
3905 | + | |
3906 | + if (old_status == GDLM_LKSTS_GRANTED) | |
3907 | + grant_pending_locks(rsb); | |
3908 | + | |
10d56c87 | 3909 | + /* |
4bf12011 | 3910 | + * Cancelling a conversion |
3911 | + */ | |
3912 | + | |
3913 | + if ((old_status == GDLM_LKSTS_CONVERT) && (flags & DLM_LKF_CANCEL)) { | |
3914 | + /* VMS semantics say we should send blocking ASTs again here */ | |
3915 | + send_blocking_asts(rsb, lkb); | |
3916 | + | |
3917 | + /* Remove from deadlock detection */ | |
3918 | + if (lkb->lkb_duetime) | |
3919 | + remove_from_deadlockqueue(lkb); | |
3920 | + | |
3921 | + /* Stick it back on the granted queue */ | |
3922 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
3923 | + lkb->lkb_rqmode = lkb->lkb_grmode; | |
3924 | + | |
3925 | + /* Was it blocking any other locks? */ | |
3926 | + if (first_in_list(lkb, &rsb->res_convertqueue)) | |
3927 | + grant_pending_locks(rsb); | |
3928 | + | |
3929 | + lkb->lkb_retstatus = -DLM_ECANCEL; | |
5cdbd17b | 3930 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 3931 | + goto out; |
3932 | + } | |
3933 | + | |
10d56c87 | 3934 | + /* |
4bf12011 | 3935 | + * The lvb can be saved or cleared on unlock. |
3936 | + */ | |
3937 | + | |
3938 | + if (rsb->res_lvbptr && (lkb->lkb_grmode >= DLM_LOCK_PW)) { | |
3939 | + if ((flags & DLM_LKF_VALBLK) && lkb->lkb_lvbptr) | |
3940 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
3941 | + if (flags & DLM_LKF_IVVALBLK) | |
3942 | + memset(rsb->res_lvbptr, 0, DLM_LVB_LEN); | |
3943 | + } | |
3944 | + | |
5cdbd17b | 3945 | + lkb->lkb_retstatus = flags & DLM_LKF_CANCEL ? -DLM_ECANCEL:-DLM_EUNLOCK; |
4bf12011 | 3946 | + |
10d56c87 AM |
3947 | + if (!remote) |
3948 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); | |
3949 | + | |
3950 | + /* | |
4bf12011 | 3951 | + * Only free the LKB if we are the master copy. Otherwise the AST |
10d56c87 | 3952 | + * delivery routine will free it after delivery. |
4bf12011 | 3953 | + */ |
3954 | + | |
3955 | + if (remote) { | |
3956 | + up_write(&rsb->res_lock); | |
3957 | + release_lkb(rsb->res_ls, lkb); | |
3958 | + release_rsb(rsb); | |
3959 | + goto out2; | |
3960 | + } | |
3961 | + | |
3962 | + out: | |
3963 | + up_write(&rsb->res_lock); | |
3964 | + out2: | |
3965 | + wake_astd(); | |
3966 | + return 0; | |
3967 | +} | |
3968 | + | |
10d56c87 | 3969 | +/* |
4bf12011 | 3970 | + * Lock conversion |
3971 | + */ | |
3972 | + | |
10d56c87 | 3973 | +static int convert_lock(struct dlm_ls *ls, int mode, struct dlm_lksb *lksb, |
4bf12011 | 3974 | + int flags, void *ast, void *astarg, void *bast, |
3975 | + struct dlm_range *range) | |
3976 | +{ | |
10d56c87 AM |
3977 | + struct dlm_lkb *lkb; |
3978 | + struct dlm_rsb *rsb; | |
4bf12011 | 3979 | + int ret = -EINVAL; |
3980 | + | |
3981 | + lkb = find_lock_by_id(ls, lksb->sb_lkid); | |
3982 | + if (!lkb) { | |
3983 | + goto out; | |
3984 | + } | |
3985 | + | |
3986 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) { | |
3987 | + ret = -EBUSY; | |
3988 | + goto out; | |
3989 | + } | |
3990 | + | |
3991 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
3992 | + goto out; | |
3993 | + } | |
3994 | + | |
3995 | + if ((flags & DLM_LKF_QUECVT) && | |
3996 | + !__quecvt_compat_matrix[lkb->lkb_grmode + 1][mode + 1]) { | |
3997 | + goto out; | |
3998 | + } | |
3999 | + | |
4000 | + if (!lksb->sb_lvbptr && (flags & DLM_LKF_VALBLK)) { | |
4001 | + goto out; | |
4002 | + } | |
4003 | + | |
4004 | + if ((flags & DLM_LKF_VALBLK) && !lksb->sb_lvbptr) { | |
4005 | + goto out; | |
4006 | + } | |
4007 | + | |
4008 | + /* Set up the ranges as appropriate */ | |
4009 | + if (range) { | |
4010 | + if (range->ra_start > range->ra_end) | |
4011 | + goto out; | |
4012 | + | |
4013 | + if (lkb_set_range(ls, lkb, range->ra_start, range->ra_end)) { | |
4014 | + ret = -ENOMEM; | |
4015 | + goto out; | |
4016 | + } | |
4017 | + } | |
4018 | + | |
4019 | + rsb = lkb->lkb_resource; | |
10d56c87 AM |
4020 | + down_read(&ls->ls_in_recovery); |
4021 | + | |
4022 | + log_debug(ls, "cv %u %x \"%s\"", mode, lkb->lkb_id, rsb->res_name); | |
4bf12011 | 4023 | + |
4024 | + lkb->lkb_flags &= ~GDLM_LKFLG_VALBLK; | |
4025 | + lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED; | |
4026 | + | |
4027 | + if (flags & DLM_LKF_NODLCKWT) | |
4028 | + lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT; | |
4029 | + if (ast) | |
4030 | + lkb->lkb_astaddr = ast; | |
4031 | + if (astarg) | |
4032 | + lkb->lkb_astparam = (long) astarg; | |
4033 | + if (bast) | |
4034 | + lkb->lkb_bastaddr = bast; | |
4035 | + lkb->lkb_rqmode = mode; | |
4036 | + lkb->lkb_lockqueue_flags = flags; | |
4037 | + lkb->lkb_flags |= (flags & DLM_LKF_VALBLK) ? GDLM_LKFLG_VALBLK : 0; | |
4038 | + lkb->lkb_lvbptr = lksb->sb_lvbptr; | |
4039 | + | |
4040 | + if (rsb->res_nodeid) { | |
4041 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT); | |
4042 | + ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONVERT); | |
4043 | + } else { | |
4044 | + ret = dlm_convert_stage2(lkb, FALSE); | |
4045 | + } | |
4046 | + | |
10d56c87 | 4047 | + up_read(&ls->ls_in_recovery); |
4bf12011 | 4048 | + |
4049 | + wake_astd(); | |
4050 | + | |
4051 | + out: | |
4052 | + return ret; | |
4053 | +} | |
4054 | + | |
10d56c87 | 4055 | +/* |
4bf12011 | 4056 | + * For local conversion requests on locally mastered locks this is called |
4057 | + * directly from dlm_lock/convert_lock. This function is also called for | |
4058 | + * remote conversion requests of MSTCPY locks (from process_cluster_request). | |
4059 | + */ | |
4060 | + | |
10d56c87 | 4061 | +int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast) |
4bf12011 | 4062 | +{ |
10d56c87 | 4063 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4bf12011 | 4064 | + int ret = 0; |
4065 | + | |
4066 | + down_write(&rsb->res_lock); | |
4067 | + | |
4068 | + if (can_be_granted(rsb, lkb)) { | |
4069 | + grant_lock(lkb, 0); | |
4070 | + grant_pending_locks(rsb); | |
4071 | + goto out; | |
4072 | + } | |
4073 | + | |
10d56c87 | 4074 | + /* |
4bf12011 | 4075 | + * Remove lkb from granted queue. |
4076 | + */ | |
4077 | + | |
4078 | + lkb_dequeue(lkb); | |
4079 | + | |
10d56c87 | 4080 | + /* |
4bf12011 | 4081 | + * The user won't wait so stick it back on the grant queue |
4082 | + */ | |
4083 | + | |
4084 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) { | |
4085 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4086 | + ret = lkb->lkb_retstatus = -EAGAIN; | |
4087 | + if (do_ast) | |
5cdbd17b | 4088 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 4089 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST) |
4090 | + send_blocking_asts_all(rsb, lkb); | |
4091 | + goto out; | |
4092 | + } | |
4093 | + | |
10d56c87 | 4094 | + /* |
4bf12011 | 4095 | + * The lkb's status tells which queue it's on. Put back on convert |
4096 | + * queue. (QUECVT requests added at end of the queue, all others in | |
4097 | + * order.) | |
4098 | + */ | |
4099 | + | |
4100 | + lkb->lkb_retstatus = 0; | |
4101 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_CONVERT); | |
4102 | + | |
10d56c87 | 4103 | + /* |
4bf12011 | 4104 | + * If the request can't be granted |
4105 | + */ | |
4106 | + | |
4107 | + send_blocking_asts(rsb, lkb); | |
4108 | + | |
4109 | + if (!(lkb->lkb_flags & GDLM_LKFLG_NODLCKWT)) | |
4110 | + add_to_deadlockqueue(lkb); | |
4111 | + | |
4112 | + out: | |
4113 | + up_write(&rsb->res_lock); | |
4114 | + return ret; | |
4115 | +} | |
4116 | + | |
10d56c87 | 4117 | +/* |
4bf12011 | 4118 | + * Remove lkb from any queue it's on, add it to the granted queue, and queue a |
4119 | + * completion ast. rsb res_lock must be held in write when this is called. | |
4120 | + */ | |
4121 | + | |
10d56c87 | 4122 | +static void grant_lock(struct dlm_lkb *lkb, int send_remote) |
4bf12011 | 4123 | +{ |
10d56c87 | 4124 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4bf12011 | 4125 | + |
4126 | + if (lkb->lkb_duetime) | |
4127 | + remove_from_deadlockqueue(lkb); | |
4128 | + | |
4129 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
4130 | + int b; | |
10d56c87 | 4131 | + DLM_ASSERT(lkb->lkb_lvbptr,); |
4bf12011 | 4132 | + |
4133 | + if (!rsb->res_lvbptr) | |
4134 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
4135 | + | |
4136 | + b = __lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | |
4137 | + if (b) | |
4138 | + memcpy(lkb->lkb_lvbptr, rsb->res_lvbptr, DLM_LVB_LEN); | |
4139 | + else | |
4140 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
4141 | + } | |
4142 | + | |
4143 | + if (lkb->lkb_range) { | |
4144 | + lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; | |
4145 | + lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; | |
4146 | + } | |
4147 | + | |
4148 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
4149 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
4150 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4151 | + | |
4152 | + lkb->lkb_highbast = 0; | |
4153 | + lkb->lkb_retstatus = 0; | |
5cdbd17b | 4154 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 4155 | + |
10d56c87 | 4156 | + /* |
4bf12011 | 4157 | + * A remote conversion request has been granted, either immediately |
4158 | + * upon being requested or after waiting a bit. In the former case, | |
4159 | + * reply_and_grant() is called. In the later case send_remote is 1 and | |
4160 | + * remote_grant() is called. | |
4161 | + * | |
4162 | + * The "send_remote" flag is set only for locks which are granted "out | |
4163 | + * of band" - ie by another lock being converted or unlocked. | |
4164 | + * | |
4165 | + * The second case occurs when this lkb is granted right away as part | |
4166 | + * of processing the initial request. In that case, we send a single | |
4167 | + * message in reply_and_grant which combines the request reply with the | |
4168 | + * grant message. | |
4169 | + */ | |
4170 | + | |
4171 | + if ((lkb->lkb_flags & GDLM_LKFLG_MSTCPY) && lkb->lkb_nodeid) { | |
4172 | + if (send_remote) | |
4173 | + remote_grant(lkb); | |
4174 | + else if (lkb->lkb_request) | |
4175 | + reply_and_grant(lkb); | |
4176 | + } | |
4177 | + | |
4178 | +} | |
4179 | + | |
10d56c87 | 4180 | +static void send_bast_queue(struct list_head *head, struct dlm_lkb *lkb) |
4bf12011 | 4181 | +{ |
10d56c87 | 4182 | + struct dlm_lkb *gr; |
4bf12011 | 4183 | + |
4184 | + list_for_each_entry(gr, head, lkb_statequeue) { | |
4185 | + if (gr->lkb_bastaddr && | |
4186 | + gr->lkb_highbast < lkb->lkb_rqmode && | |
4187 | + ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { | |
5cdbd17b | 4188 | + queue_ast(gr, AST_BAST, lkb->lkb_rqmode); |
4bf12011 | 4189 | + gr->lkb_highbast = lkb->lkb_rqmode; |
4190 | + } | |
4191 | + } | |
4192 | +} | |
4193 | + | |
10d56c87 | 4194 | +/* |
4bf12011 | 4195 | + * Notify granted locks if they are blocking a newly forced-to-wait lock. |
4196 | + */ | |
4197 | + | |
10d56c87 | 4198 | +static void send_blocking_asts(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 4199 | +{ |
4200 | + send_bast_queue(&rsb->res_grantqueue, lkb); | |
4201 | + /* check if the following improves performance */ | |
4202 | + /* send_bast_queue(&rsb->res_convertqueue, lkb); */ | |
4203 | +} | |
4204 | + | |
10d56c87 | 4205 | +static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
4bf12011 | 4206 | +{ |
4207 | + send_bast_queue(&rsb->res_grantqueue, lkb); | |
4208 | + send_bast_queue(&rsb->res_convertqueue, lkb); | |
4209 | +} | |
4210 | + | |
10d56c87 | 4211 | +/* |
4bf12011 | 4212 | + * Called when a lock has been dequeued. Look for any locks to grant that are |
4213 | + * waiting for conversion or waiting to be granted. | |
4214 | + * The rsb res_lock must be held in write when this function is called. | |
4215 | + */ | |
4216 | + | |
10d56c87 | 4217 | +int grant_pending_locks(struct dlm_rsb *rsb) |
4bf12011 | 4218 | +{ |
10d56c87 | 4219 | + struct dlm_lkb *lkb; |
4bf12011 | 4220 | + struct list_head *list; |
4221 | + struct list_head *temp; | |
4222 | + int8_t high = DLM_LOCK_IV; | |
4223 | + | |
4224 | + list_for_each_safe(list, temp, &rsb->res_convertqueue) { | |
10d56c87 | 4225 | + lkb = list_entry(list, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 4226 | + |
4227 | + if (can_be_granted(rsb, lkb)) | |
4228 | + grant_lock(lkb, 1); | |
4229 | + else | |
4230 | + high = MAX(lkb->lkb_rqmode, high); | |
4231 | + } | |
4232 | + | |
4233 | + list_for_each_safe(list, temp, &rsb->res_waitqueue) { | |
10d56c87 | 4234 | + lkb = list_entry(list, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 4235 | + |
4236 | + if (can_be_granted(rsb, lkb)) | |
4237 | + grant_lock(lkb, 1); | |
4238 | + else | |
4239 | + high = MAX(lkb->lkb_rqmode, high); | |
4240 | + } | |
4241 | + | |
10d56c87 | 4242 | + /* |
4bf12011 | 4243 | + * If there are locks left on the wait/convert queue then send blocking |
4244 | + * ASTs to granted locks that are blocking | |
4245 | + * | |
4246 | + * FIXME: This might generate some spurious blocking ASTs for range | |
4247 | + * locks. | |
4248 | + */ | |
4249 | + | |
4250 | + if (high > DLM_LOCK_IV) { | |
4251 | + list_for_each_safe(list, temp, &rsb->res_grantqueue) { | |
10d56c87 | 4252 | + lkb = list_entry(list, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 4253 | + |
4254 | + if (lkb->lkb_bastaddr && | |
4255 | + (lkb->lkb_highbast < high) && | |
4256 | + !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { | |
4257 | + | |
5cdbd17b | 4258 | + queue_ast(lkb, AST_BAST, high); |
4bf12011 | 4259 | + lkb->lkb_highbast = high; |
4260 | + } | |
4261 | + } | |
4262 | + } | |
4263 | + | |
4264 | + return 0; | |
4265 | +} | |
4266 | + | |
10d56c87 | 4267 | +/* |
4bf12011 | 4268 | + * Called to cancel a locking operation that failed due to some internal |
4269 | + * reason. | |
4270 | + * | |
4271 | + * Waiting locks will be removed, converting locks will be reverted to their | |
4272 | + * granted status, unlocks will be left where they are. | |
4273 | + * | |
4274 | + * A completion AST will be delivered to the caller. | |
4275 | + */ | |
4276 | + | |
10d56c87 | 4277 | +int cancel_lockop(struct dlm_lkb *lkb, int status) |
4bf12011 | 4278 | +{ |
4279 | + int state = lkb->lkb_lockqueue_state; | |
5cdbd17b | 4280 | + uint16_t astflags = AST_COMP; |
4bf12011 | 4281 | + |
4282 | + lkb->lkb_lockqueue_state = 0; | |
4283 | + | |
4284 | + switch (state) { | |
4285 | + case GDLM_LQSTATE_WAIT_RSB: | |
5cdbd17b | 4286 | + astflags |= AST_DEL; |
4bf12011 | 4287 | + break; |
4288 | + | |
4289 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
4290 | + res_lkb_dequeue(lkb); | |
5cdbd17b | 4291 | + astflags |= AST_DEL; |
4bf12011 | 4292 | + break; |
4293 | + | |
4294 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
4295 | + res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED); | |
4296 | + | |
4297 | + /* Remove from deadlock detection */ | |
4298 | + if (lkb->lkb_duetime) { | |
4299 | + remove_from_deadlockqueue(lkb); | |
4300 | + } | |
4301 | + break; | |
4302 | + | |
4303 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
4304 | + /* We can leave this. I think.... */ | |
4305 | + break; | |
4306 | + } | |
4307 | + | |
4308 | + lkb->lkb_retstatus = status; | |
5cdbd17b | 4309 | + queue_ast(lkb, astflags, 0); |
4bf12011 | 4310 | + |
4311 | + return 0; | |
4312 | +} | |
4313 | + | |
10d56c87 | 4314 | +/* |
4bf12011 | 4315 | + * Check for conversion deadlock. If a deadlock was found |
4316 | + * return lkb to kill, else return NULL | |
4317 | + */ | |
4318 | + | |
10d56c87 | 4319 | +struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb) |
4bf12011 | 4320 | +{ |
10d56c87 | 4321 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4bf12011 | 4322 | + struct list_head *entry; |
4323 | + | |
10d56c87 | 4324 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT,); |
4bf12011 | 4325 | + |
4326 | + /* Work our way up to the head of the queue looking for locks that | |
4327 | + * conflict with us */ | |
4328 | + | |
4329 | + down_read(&rsb->res_lock); | |
4330 | + | |
4331 | + entry = lkb->lkb_statequeue.prev; | |
4332 | + while (entry != &rsb->res_convertqueue) { | |
10d56c87 | 4333 | + struct dlm_lkb *lkb2 = list_entry(entry, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 4334 | + |
4335 | + if (ranges_overlap(lkb, lkb2) && !modes_compat(lkb2, lkb)) { | |
4336 | + up_read(&rsb->res_lock); | |
4337 | + return lkb; | |
4338 | + } | |
4339 | + entry = entry->prev; | |
4340 | + } | |
4341 | + up_read(&rsb->res_lock); | |
4342 | + | |
4343 | + return 0; | |
4344 | +} | |
4345 | + | |
10d56c87 | 4346 | +/* |
4bf12011 | 4347 | + * Conversion operation was cancelled by us (not the user). |
4348 | + * ret contains the return code to pass onto the user | |
4349 | + */ | |
4350 | + | |
10d56c87 | 4351 | +void cancel_conversion(struct dlm_lkb *lkb, int ret) |
4bf12011 | 4352 | +{ |
10d56c87 | 4353 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4bf12011 | 4354 | + |
4355 | + /* Stick it back on the granted queue */ | |
4356 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4357 | + lkb->lkb_rqmode = lkb->lkb_grmode; | |
4358 | + | |
4359 | + remove_from_deadlockqueue(lkb); | |
4360 | + | |
4361 | + lkb->lkb_retstatus = ret; | |
5cdbd17b | 4362 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 4363 | + wake_astd(); |
4364 | +} | |
4365 | + | |
10d56c87 | 4366 | +/* |
4bf12011 | 4367 | + * As new master of the rsb for this lkb, we need to handle these requests |
4368 | + * removed from the lockqueue and originating from local processes: | |
4369 | + * GDLM_LQSTATE_WAIT_RSB, GDLM_LQSTATE_WAIT_CONDGRANT, | |
4370 | + * GDLM_LQSTATE_WAIT_UNLOCK, GDLM_LQSTATE_WAIT_CONVERT. | |
4371 | + */ | |
4372 | + | |
10d56c87 | 4373 | +void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state) |
4bf12011 | 4374 | +{ |
10d56c87 AM |
4375 | + struct dlm_rsb *rsb; |
4376 | + | |
4bf12011 | 4377 | + switch (state) { |
4378 | + case GDLM_LQSTATE_WAIT_RSB: | |
4379 | + dlm_lock_stage1(lkb->lkb_resource->res_ls, lkb, | |
4380 | + lkb->lkb_lockqueue_flags, | |
4381 | + lkb->lkb_resource->res_name, | |
4382 | + lkb->lkb_resource->res_length); | |
4383 | + break; | |
4384 | + | |
4385 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
4386 | + res_lkb_dequeue(lkb); | |
4387 | + dlm_lock_stage3(lkb); | |
4388 | + break; | |
4389 | + | |
4390 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
10d56c87 AM |
4391 | + rsb = find_rsb_to_unlock(ls, lkb); |
4392 | + dlm_unlock_stage2(lkb, rsb, lkb->lkb_lockqueue_flags); | |
4bf12011 | 4393 | + break; |
4394 | + | |
4395 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
4396 | + dlm_convert_stage2(lkb, TRUE); | |
4397 | + break; | |
4398 | + | |
4399 | + default: | |
10d56c87 | 4400 | + DLM_ASSERT(0,); |
4bf12011 | 4401 | + } |
4402 | +} | |
10d56c87 AM |
4403 | + |
4404 | +static void dump_queue(struct list_head *head) | |
4405 | +{ | |
4406 | + struct dlm_lkb *lkb; | |
4407 | + | |
4408 | + list_for_each_entry(lkb, head, lkb_statequeue) { | |
4409 | + printk("%08x gr %d rq %d flg %x sts %u node %u remid %x " | |
4410 | + "lq %d,%x\n", | |
4411 | + lkb->lkb_id, | |
4412 | + lkb->lkb_grmode, | |
4413 | + lkb->lkb_rqmode, | |
4414 | + lkb->lkb_flags, | |
4415 | + lkb->lkb_status, | |
4416 | + lkb->lkb_nodeid, | |
4417 | + lkb->lkb_remid, | |
4418 | + lkb->lkb_lockqueue_state, | |
4419 | + lkb->lkb_lockqueue_flags); | |
4420 | + } | |
4421 | +} | |
4422 | + | |
4423 | +static void dump_rsb(struct dlm_rsb *rsb) | |
4424 | +{ | |
4425 | + printk("name \"%s\" flags %lx nodeid %u ref %u\n", | |
4426 | + rsb->res_name, rsb->res_flags, rsb->res_nodeid, | |
4427 | + atomic_read(&rsb->res_ref)); | |
4428 | + | |
4429 | + if (!list_empty(&rsb->res_grantqueue)) { | |
4430 | + printk("grant queue\n"); | |
4431 | + dump_queue(&rsb->res_grantqueue); | |
4432 | + } | |
4433 | + | |
4434 | + if (!list_empty(&rsb->res_convertqueue)) { | |
4435 | + printk("convert queue\n"); | |
4436 | + dump_queue(&rsb->res_convertqueue); | |
4437 | + } | |
4438 | + | |
4439 | + if (!list_empty(&rsb->res_waitqueue)) { | |
4440 | + printk("wait queue\n"); | |
4441 | + dump_queue(&rsb->res_waitqueue); | |
4442 | + } | |
4443 | +} | |
4444 | + | |
4445 | +void dlm_locks_dump(void) | |
4446 | +{ | |
4447 | + struct dlm_ls *ls; | |
4448 | + struct dlm_rsb *rsb; | |
4449 | + struct list_head *head; | |
4450 | + int i; | |
4451 | + | |
4452 | + list_for_each_entry(ls, &lslist, ls_list) { | |
4453 | + for (i = 0; i < ls->ls_rsbtbl_size; i++) { | |
4454 | + head = &ls->ls_rsbtbl[i].list; | |
4455 | + list_for_each_entry(rsb, head, res_hashchain) | |
4456 | + dump_rsb(rsb); | |
4457 | + } | |
4458 | + } | |
4459 | +} | |
4460 | + | |
4bf12011 | 4461 | diff -urN linux-orig/cluster/dlm/locking.h linux-patched/cluster/dlm/locking.h |
4462 | --- linux-orig/cluster/dlm/locking.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
4463 | +++ linux-patched/cluster/dlm/locking.h 2004-07-13 18:57:22.000000000 +0800 |
4464 | @@ -0,0 +1,32 @@ | |
4bf12011 | 4465 | +/****************************************************************************** |
4466 | +******************************************************************************* | |
4467 | +** | |
4468 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
4469 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
4470 | +** | |
4471 | +** This copyrighted material is made available to anyone wishing to use, | |
4472 | +** modify, copy, or redistribute it subject to the terms and conditions | |
4473 | +** of the GNU General Public License v.2. | |
4474 | +** | |
4475 | +******************************************************************************* | |
4476 | +******************************************************************************/ | |
4477 | + | |
4478 | +#ifndef __LOCKING_DOT_H__ | |
4479 | +#define __LOCKING_DOT_H__ | |
4480 | + | |
10d56c87 AM |
4481 | +void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state); |
4482 | +void dlm_lock_stage3(struct dlm_lkb *lkb); | |
4483 | +int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast); | |
4484 | +int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags); | |
4485 | +int dlm_lock_stage2(struct dlm_ls *lspace, struct dlm_lkb *lkb, struct dlm_rsb *rsb, int flags); | |
4486 | +struct dlm_rsb *create_rsb(struct dlm_ls *lspace, struct dlm_lkb *lkb, char *name, int namelen); | |
4487 | +int free_rsb_if_unused(struct dlm_rsb *rsb); | |
4488 | +struct dlm_lkb *remote_stage2(int remote_csid, struct dlm_ls *lspace, | |
4489 | + struct dlm_request *freq); | |
4490 | +int cancel_lockop(struct dlm_lkb *lkb, int status); | |
4491 | +int dlm_remove_lock(struct dlm_lkb *lkb, uint32_t flags); | |
4492 | +int grant_pending_locks(struct dlm_rsb *rsb); | |
4493 | +void cancel_conversion(struct dlm_lkb *lkb, int ret); | |
4494 | +struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb); | |
4bf12011 | 4495 | + |
4496 | +#endif /* __LOCKING_DOT_H__ */ | |
4497 | diff -urN linux-orig/cluster/dlm/lockqueue.c linux-patched/cluster/dlm/lockqueue.c | |
4498 | --- linux-orig/cluster/dlm/lockqueue.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
4499 | +++ linux-patched/cluster/dlm/lockqueue.c 2004-07-13 18:57:22.000000000 +0800 |
4500 | @@ -0,0 +1,1092 @@ | |
4bf12011 | 4501 | +/****************************************************************************** |
4502 | +******************************************************************************* | |
4503 | +** | |
4504 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
4505 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
4506 | +** | |
4507 | +** This copyrighted material is made available to anyone wishing to use, | |
4508 | +** modify, copy, or redistribute it subject to the terms and conditions | |
4509 | +** of the GNU General Public License v.2. | |
4510 | +** | |
4511 | +******************************************************************************* | |
4512 | +******************************************************************************/ | |
4513 | + | |
4514 | +/* | |
4515 | + * lockqueue.c | |
4516 | + * | |
4517 | + * This controls the lock queue, which is where locks | |
4518 | + * come when they need to wait for a remote operation | |
4519 | + * to complete. | |
4520 | + * | |
4521 | + * This could also be thought of as the "high-level" comms | |
4522 | + * layer. | |
4523 | + * | |
4524 | + */ | |
4525 | + | |
4526 | +#include "dlm_internal.h" | |
4527 | +#include "lockqueue.h" | |
4528 | +#include "dir.h" | |
4529 | +#include "locking.h" | |
4530 | +#include "lkb.h" | |
4531 | +#include "lowcomms.h" | |
4532 | +#include "midcomms.h" | |
4533 | +#include "reccomms.h" | |
4534 | +#include "nodes.h" | |
4535 | +#include "lockspace.h" | |
4536 | +#include "ast.h" | |
4537 | +#include "memory.h" | |
4538 | +#include "rsb.h" | |
4539 | +#include "queries.h" | |
10d56c87 | 4540 | +#include "util.h" |
4bf12011 | 4541 | + |
10d56c87 AM |
4542 | +static void add_reply_lvb(struct dlm_lkb * lkb, struct dlm_reply *reply); |
4543 | +static void add_request_lvb(struct dlm_lkb * lkb, struct dlm_request *req); | |
4bf12011 | 4544 | + |
4545 | +/* | |
4546 | + * format of an entry on the request queue | |
4547 | + */ | |
4548 | +struct rq_entry { | |
4549 | + struct list_head rqe_list; | |
4550 | + uint32_t rqe_nodeid; | |
4551 | + char rqe_request[1]; | |
4552 | +}; | |
4553 | + | |
4554 | +/* | |
4555 | + * Add a new request (if appropriate) to the request queue and send the remote | |
4556 | + * request out. - runs in the context of the locking caller | |
4557 | + * | |
4558 | + * Recovery of a remote_stage request if the remote end fails while the lkb | |
4559 | + * is still on the lockqueue: | |
4560 | + * | |
4561 | + * o lkbs on the lockqueue are flagged with GDLM_LKFLG_LQRESEND in | |
4562 | + * lockqueue_lkb_mark() at the start of recovery. | |
4563 | + * | |
4564 | + * o Some lkb's will be rebuilt on new master rsb's during recovery. | |
4565 | + * (depends on the type of request, see below). | |
4566 | + * | |
4567 | + * o At the end of recovery, resend_cluster_requests() looks at these | |
4568 | + * LQRESEND lkb's and either: | |
4569 | + * | |
4570 | + * i) resends the request to the new master for the rsb where the | |
4571 | + * request is processed as usual. The lkb remains on the lockqueue until | |
4572 | + * the new master replies and we run process_lockqueue_reply(). | |
4573 | + * | |
4574 | + * ii) if we've become the rsb master, remove the lkb from the lockqueue | |
4575 | + * and processes the request locally via process_remastered_lkb(). | |
4576 | + * | |
4577 | + * GDLM_LQSTATE_WAIT_RSB (1) - these lockqueue lkb's are not on any rsb queue | |
4578 | + * and the request should be resent if dest node is failed. | |
4579 | + * | |
4580 | + * GDLM_LQSTATE_WAIT_CONDGRANT (3) - this lockqueue lkb is on a local rsb's | |
4581 | + * wait queue. Don't rebuild this lkb on a new master rsb (the NOREBUILD flag | |
4582 | + * makes send_lkb_queue() skip it). Resend this request to the new master. | |
4583 | + * | |
4584 | + * GDLM_LQSTATE_WAIT_UNLOCK (4) - this lkb is on a local rsb's queue. It will | |
4585 | + * be rebuilt on the rsb on the new master (restbl_lkb_send/send_lkb_queue). | |
4586 | + * Resend this request to the new master. | |
4587 | + * | |
4588 | + * GDLM_LQSTATE_WAIT_CONVERT (2) - this lkb is on a local rsb convert queue. | |
4589 | + * It will be rebuilt on the new master rsb's granted queue. Resend this | |
4590 | + * request to the new master. | |
4591 | + */ | |
4592 | + | |
10d56c87 | 4593 | +int remote_stage(struct dlm_lkb *lkb, int state) |
4bf12011 | 4594 | +{ |
4595 | + int error; | |
4596 | + | |
4597 | + lkb->lkb_lockqueue_state = state; | |
4598 | + add_to_lockqueue(lkb); | |
4599 | + | |
4600 | + error = send_cluster_request(lkb, state); | |
4601 | + if (error < 0) { | |
4602 | + log_print("remote_stage error sending request %d", error); | |
4603 | + | |
4604 | + /* Leave on lockqueue, it will be resent to correct node during | |
4605 | + * recovery. */ | |
4606 | + | |
4607 | + /* | |
4608 | + lkb->lkb_lockqueue_state = 0; | |
4609 | + remove_from_lockqueue(lkb); | |
4610 | + return -ENOTCONN; | |
4611 | + */ | |
4612 | + } | |
4613 | + return 0; | |
4614 | +} | |
4615 | + | |
4616 | +/* | |
4617 | + * Requests received while the lockspace is in recovery get added to the | |
4618 | + * request queue and processed when recovery is complete. | |
4619 | + */ | |
4620 | + | |
10d56c87 | 4621 | +void add_to_requestqueue(struct dlm_ls *ls, int nodeid, char *request, int length) |
4bf12011 | 4622 | +{ |
4623 | + struct rq_entry *entry; | |
4624 | + | |
4625 | + if (in_nodes_gone(ls, nodeid)) | |
4626 | + return; | |
4627 | + | |
4628 | + entry = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); | |
4629 | + if (!entry) { | |
4630 | + // TODO something better | |
4631 | + printk("dlm: add_to_requestqueue: out of memory\n"); | |
4632 | + return; | |
4633 | + } | |
4634 | + | |
4635 | + log_debug(ls, "add_to_requestqueue %d", nodeid); | |
4636 | + entry->rqe_nodeid = nodeid; | |
4637 | + memcpy(entry->rqe_request, request, length); | |
4638 | + list_add_tail(&entry->rqe_list, &ls->ls_requestqueue); | |
4639 | +} | |
4640 | + | |
10d56c87 | 4641 | +int process_requestqueue(struct dlm_ls *ls) |
4bf12011 | 4642 | +{ |
4643 | + int error = 0, count = 0; | |
4644 | + struct rq_entry *entry, *safe; | |
10d56c87 | 4645 | + struct dlm_header *req; |
4bf12011 | 4646 | + |
4647 | + log_all(ls, "process held requests"); | |
4648 | + | |
4649 | + list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) { | |
10d56c87 | 4650 | + req = (struct dlm_header *) entry->rqe_request; |
4bf12011 | 4651 | + log_debug(ls, "process_requestqueue %u", entry->rqe_nodeid); |
4652 | + | |
4653 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
4654 | + log_debug(ls, "process_requestqueue aborted"); | |
4655 | + error = -EINTR; | |
4656 | + break; | |
4657 | + } | |
4658 | + | |
4659 | + error = process_cluster_request(entry->rqe_nodeid, req, TRUE); | |
4660 | + if (error == -EINTR) { | |
4661 | + log_debug(ls, "process_requestqueue interrupted"); | |
4662 | + break; | |
4663 | + } | |
4664 | + | |
4665 | + list_del(&entry->rqe_list); | |
4666 | + kfree(entry); | |
4667 | + count++; | |
4668 | + error = 0; | |
4669 | + } | |
4670 | + | |
4671 | + log_all(ls, "processed %d requests", count); | |
4672 | + return error; | |
4673 | +} | |
4674 | + | |
10d56c87 | 4675 | +void wait_requestqueue(struct dlm_ls *ls) |
4bf12011 | 4676 | +{ |
4677 | + while (!list_empty(&ls->ls_requestqueue) && | |
4678 | + test_bit(LSFL_LS_RUN, &ls->ls_flags)) | |
4679 | + schedule(); | |
4680 | +} | |
4681 | + | |
4682 | +/* | |
4683 | + * Resdir requests (lookup or remove) and replies from before recovery are | |
4684 | + * invalid since the resdir was rebuilt. Clear them. Requests from nodes now | |
4685 | + * gone are also invalid. | |
4686 | + */ | |
4687 | + | |
10d56c87 | 4688 | +void purge_requestqueue(struct dlm_ls *ls) |
4bf12011 | 4689 | +{ |
4690 | + int count = 0; | |
4691 | + struct rq_entry *entry, *safe; | |
10d56c87 AM |
4692 | + struct dlm_header *req; |
4693 | + struct dlm_request *freq; | |
4694 | + struct dlm_lkb *lkb; | |
4bf12011 | 4695 | + |
4696 | + log_all(ls, "purge requests"); | |
4697 | + | |
4698 | + list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) { | |
10d56c87 AM |
4699 | + req = (struct dlm_header *) entry->rqe_request; |
4700 | + freq = (struct dlm_request *) req; | |
4bf12011 | 4701 | + |
4702 | + if (req->rh_cmd == GDLM_REMCMD_REM_RESDATA || | |
4703 | + req->rh_cmd == GDLM_REMCMD_LOOKUP || | |
4704 | + in_nodes_gone(ls, entry->rqe_nodeid)) { | |
4705 | + | |
4706 | + list_del(&entry->rqe_list); | |
4707 | + kfree(entry); | |
4708 | + count++; | |
4709 | + | |
4710 | + } else if (req->rh_cmd == GDLM_REMCMD_LOCKREPLY) { | |
4711 | + | |
4712 | + /* | |
4713 | + * Replies to resdir lookups are invalid and must be | |
4714 | + * purged. The lookup requests are marked in | |
4715 | + * lockqueue_lkb_mark and will be resent in | |
4716 | + * resend_cluster_requests. The only way to check if | |
4717 | + * this is a lookup reply is to look at the | |
4718 | + * lockqueue_state of the lkb. | |
4719 | + */ | |
4720 | + | |
4721 | + lkb = find_lock_by_id(ls, freq->rr_header.rh_lkid); | |
10d56c87 | 4722 | + DLM_ASSERT(lkb,); |
4bf12011 | 4723 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) { |
4724 | + list_del(&entry->rqe_list); | |
4725 | + kfree(entry); | |
4726 | + count++; | |
4727 | + } | |
4728 | + } | |
4729 | + } | |
4730 | + | |
4731 | + log_all(ls, "purged %d requests", count); | |
4732 | +} | |
4733 | + | |
4734 | +/* | |
4735 | + * Check if there's a reply for the given lkid in the requestqueue. | |
4736 | + */ | |
4737 | + | |
10d56c87 | 4738 | +int reply_in_requestqueue(struct dlm_ls *ls, int lkid) |
4bf12011 | 4739 | +{ |
4740 | + int rv = FALSE; | |
4741 | + struct rq_entry *entry, *safe; | |
10d56c87 AM |
4742 | + struct dlm_header *req; |
4743 | + struct dlm_request *freq; | |
4bf12011 | 4744 | + |
4745 | + list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) { | |
10d56c87 AM |
4746 | + req = (struct dlm_header *) entry->rqe_request; |
4747 | + freq = (struct dlm_request *) req; | |
4bf12011 | 4748 | + |
4749 | + if (req->rh_cmd == GDLM_REMCMD_LOCKREPLY && | |
4750 | + freq->rr_header.rh_lkid == lkid) { | |
4751 | + rv = TRUE; | |
4752 | + break; | |
4753 | + } | |
4754 | + } | |
4755 | + | |
4756 | + return rv; | |
4757 | +} | |
4758 | + | |
10d56c87 | 4759 | +void allocate_and_copy_lvb(struct dlm_ls *ls, char **lvbptr, char *src) |
4bf12011 | 4760 | +{ |
4761 | + if (!*lvbptr) | |
4762 | + *lvbptr = allocate_lvb(ls); | |
4763 | + if (*lvbptr) | |
4764 | + memcpy(*lvbptr, src, DLM_LVB_LEN); | |
4765 | +} | |
4766 | + | |
4767 | +/* | |
4768 | + * Process a lockqueue LKB after it has had it's remote processing complete and | |
10d56c87 AM |
4769 | + * been pulled from the lockqueue. Runs in the context of the DLM recvd thread |
4770 | + * on the machine that requested the lock. | |
4bf12011 | 4771 | + */ |
4772 | + | |
10d56c87 AM |
4773 | +static void process_lockqueue_reply(struct dlm_lkb *lkb, |
4774 | + struct dlm_reply *reply, | |
4775 | + uint32_t nodeid) | |
4bf12011 | 4776 | +{ |
10d56c87 AM |
4777 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
4778 | + struct dlm_ls *ls = rsb->res_ls; | |
4779 | + int oldstate, state = lkb->lkb_lockqueue_state; | |
4bf12011 | 4780 | + |
4781 | + lkb->lkb_lockqueue_state = 0; | |
4782 | + if (state) | |
4783 | + remove_from_lockqueue(lkb); | |
4784 | + | |
4785 | + switch (state) { | |
4786 | + case GDLM_LQSTATE_WAIT_RSB: | |
4787 | + | |
10d56c87 AM |
4788 | + DLM_ASSERT(reply->rl_status == 0, |
4789 | + print_lkb(lkb); | |
4790 | + print_rsb(rsb); | |
4791 | + print_reply(reply);); | |
4792 | + | |
4793 | + DLM_ASSERT(rsb->res_nodeid == -1 || | |
4794 | + rsb->res_nodeid == 0, | |
4795 | + print_lkb(lkb); | |
4796 | + print_rsb(rsb); | |
4797 | + print_reply(reply);); | |
4798 | + | |
4799 | + if (reply->rl_nodeid == our_nodeid()) { | |
4800 | + if (rsb->res_nodeid == -1) { | |
4801 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
4802 | + rsb->res_nodeid = 0; | |
4803 | + } else { | |
4804 | + log_all(ls, "ignore master reply %x %u", | |
4805 | + lkb->lkb_id, nodeid); | |
4806 | + } | |
4807 | + } else { | |
4808 | + DLM_ASSERT(rsb->res_nodeid == -1, | |
4809 | + print_lkb(lkb); | |
4810 | + print_rsb(rsb); | |
4811 | + print_reply(reply);); | |
4bf12011 | 4812 | + |
10d56c87 | 4813 | + clear_bit(RESFL_MASTER, &rsb->res_flags); |
4bf12011 | 4814 | + rsb->res_nodeid = reply->rl_nodeid; |
10d56c87 | 4815 | + } |
4bf12011 | 4816 | + |
10d56c87 AM |
4817 | + log_debug(ls, "lookup reply %x %u", lkb->lkb_id, |
4818 | + rsb->res_nodeid); | |
4bf12011 | 4819 | + |
10d56c87 AM |
4820 | + lkb->lkb_nodeid = rsb->res_nodeid; |
4821 | + dlm_lock_stage2(ls, lkb, rsb, lkb->lkb_lockqueue_flags); | |
4bf12011 | 4822 | + break; |
4823 | + | |
4824 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
4825 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
4826 | + | |
4827 | + /* | |
4828 | + * After a remote lock/conversion/grant request we put the lock | |
4829 | + * on the right queue and send an AST if appropriate. Any lock | |
4830 | + * shuffling (eg newly granted locks because this one was | |
4831 | + * converted downwards) will be dealt with in seperate messages | |
4832 | + * (which may be in the same network message) | |
4833 | + */ | |
4834 | + | |
10d56c87 AM |
4835 | + |
4836 | + /* the destination wasn't the master */ | |
4837 | + if (reply->rl_status == -EINVAL) { | |
4838 | + int master_nodeid; | |
4839 | + | |
4840 | + log_debug(ls, "resend lookup"); | |
4841 | + lkb_dequeue(lkb); | |
4842 | + rsb->res_nodeid = -1; | |
4843 | + lkb->lkb_nodeid = -1; | |
4844 | + if (get_directory_nodeid(rsb) != our_nodeid()) | |
4845 | + remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB); | |
4846 | + else { | |
4847 | + dlm_dir_lookup(ls, our_nodeid(), rsb->res_name, | |
4848 | + rsb->res_length, &master_nodeid); | |
4849 | + | |
4850 | + if (master_nodeid == our_nodeid()) { | |
4851 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
4852 | + master_nodeid = 0; | |
4853 | + } | |
4854 | + else | |
4855 | + clear_bit(RESFL_MASTER,&rsb->res_flags); | |
4856 | + rsb->res_nodeid = master_nodeid; | |
4857 | + lkb->lkb_nodeid = master_nodeid; | |
4858 | + dlm_lock_stage2(ls, lkb, rsb, | |
4859 | + lkb->lkb_lockqueue_flags); | |
4860 | + } | |
4861 | + break; | |
4862 | + } | |
4863 | + | |
4bf12011 | 4864 | + if (!lkb->lkb_remid) |
4865 | + lkb->lkb_remid = reply->rl_lkid; | |
4866 | + | |
4867 | + /* | |
4868 | + * The remote request failed (we assume because of NOQUEUE). | |
4869 | + * If this is a new request (non-conv) the lkb was created just | |
4870 | + * for it so the lkb should be freed. If this was a | |
4871 | + * conversion, the lkb already existed so we should put it back | |
4872 | + * on the grant queue. | |
4873 | + */ | |
4874 | + | |
4875 | + if (reply->rl_status != 0) { | |
10d56c87 | 4876 | + DLM_ASSERT(reply->rl_status == -EAGAIN,); |
4bf12011 | 4877 | + |
4878 | + if (state == GDLM_LQSTATE_WAIT_CONDGRANT) { | |
4879 | + res_lkb_dequeue(lkb); | |
5cdbd17b AM |
4880 | + lkb->lkb_retstatus = reply->rl_status; |
4881 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); | |
4882 | + } else { | |
4bf12011 | 4883 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); |
5cdbd17b AM |
4884 | + lkb->lkb_retstatus = reply->rl_status; |
4885 | + queue_ast(lkb, AST_COMP, 0); | |
4886 | + } | |
4bf12011 | 4887 | + break; |
4888 | + } | |
4889 | + | |
4890 | + /* | |
4891 | + * The remote request was successful in granting the request or | |
4892 | + * queuing it to be granted later. Add the lkb to the | |
4893 | + * appropriate rsb queue. | |
4894 | + */ | |
4895 | + | |
4896 | + switch (reply->rl_lockstate) { | |
4897 | + case GDLM_LKSTS_GRANTED: | |
4898 | + | |
4899 | + /* Compact version of grant_lock(). */ | |
4900 | + | |
4901 | + down_write(&rsb->res_lock); | |
4902 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
4903 | + memcpy(lkb->lkb_lvbptr, reply->rl_lvb, | |
4904 | + DLM_LVB_LEN); | |
4905 | + | |
4906 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
4907 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
4908 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4909 | + | |
4910 | + if (lkb->lkb_range) { | |
4911 | + lkb->lkb_range[GR_RANGE_START] = | |
4912 | + lkb->lkb_range[RQ_RANGE_START]; | |
4913 | + lkb->lkb_range[GR_RANGE_END] = | |
4914 | + lkb->lkb_range[RQ_RANGE_END]; | |
4915 | + } | |
4916 | + up_write(&rsb->res_lock); | |
4917 | + | |
4918 | + lkb->lkb_retstatus = 0; | |
5cdbd17b | 4919 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 4920 | + break; |
4921 | + | |
4922 | + case GDLM_LKSTS_WAITING: | |
4923 | + | |
4924 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
4925 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
4926 | + else | |
4927 | + log_error(ls, "wait reply for granted %x %u", | |
4928 | + lkb->lkb_id, lkb->lkb_nodeid); | |
4929 | + break; | |
4930 | + | |
4931 | + case GDLM_LKSTS_CONVERT: | |
4932 | + | |
4933 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
4934 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT); | |
4935 | + else | |
4936 | + log_error(ls, "convert reply for granted %x %u", | |
4937 | + lkb->lkb_id, lkb->lkb_nodeid); | |
4938 | + break; | |
4939 | + | |
4940 | + default: | |
4941 | + log_error(ls, "process_lockqueue_reply state %d", | |
4942 | + reply->rl_lockstate); | |
4943 | + } | |
4944 | + | |
4945 | + break; | |
4946 | + | |
4947 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
4948 | + | |
4949 | + /* | |
4950 | + * Unlocks should never fail. Update local lock info. This | |
4951 | + * always sends completion AST with status in lksb | |
4952 | + */ | |
4953 | + | |
10d56c87 | 4954 | + DLM_ASSERT(reply->rl_status == 0,); |
4bf12011 | 4955 | + oldstate = res_lkb_dequeue(lkb); |
4956 | + | |
4957 | + /* Differentiate between unlocks and conversion cancellations */ | |
4958 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_CANCEL && | |
4959 | + oldstate == GDLM_LKSTS_CONVERT) { | |
4960 | + res_lkb_enqueue(lkb->lkb_resource, lkb, | |
4961 | + GDLM_LKSTS_GRANTED); | |
4962 | + lkb->lkb_retstatus = -DLM_ECANCEL; | |
5cdbd17b | 4963 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 4964 | + } else { |
4bf12011 | 4965 | + lkb->lkb_retstatus = -DLM_EUNLOCK; |
5cdbd17b | 4966 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); |
4bf12011 | 4967 | + } |
4bf12011 | 4968 | + break; |
4969 | + | |
4970 | + default: | |
4971 | + log_error(ls, "process_lockqueue_reply id %x state %d", | |
4972 | + lkb->lkb_id, state); | |
4973 | + } | |
4974 | +} | |
4975 | + | |
4976 | +/* | |
4977 | + * Tell a remote node to grant a lock. This happens when we are the master | |
4978 | + * copy for a lock that is actually held on a remote node. The remote end is | |
4979 | + * also responsible for sending the completion AST. | |
4980 | + */ | |
4981 | + | |
10d56c87 | 4982 | +void remote_grant(struct dlm_lkb *lkb) |
4bf12011 | 4983 | +{ |
4984 | + struct writequeue_entry *e; | |
10d56c87 | 4985 | + struct dlm_request *req; |
4bf12011 | 4986 | + |
4987 | + // TODO Error handling | |
4988 | + e = lowcomms_get_buffer(lkb->lkb_nodeid, | |
10d56c87 | 4989 | + sizeof(struct dlm_request), |
4bf12011 | 4990 | + lkb->lkb_resource->res_ls->ls_allocation, |
4991 | + (char **) &req); | |
4992 | + if (!e) | |
4993 | + return; | |
4994 | + | |
4995 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOCKGRANT; | |
10d56c87 | 4996 | + req->rr_header.rh_length = sizeof(struct dlm_request); |
4bf12011 | 4997 | + req->rr_header.rh_flags = 0; |
4998 | + req->rr_header.rh_lkid = lkb->lkb_id; | |
4999 | + req->rr_header.rh_lockspace = lkb->lkb_resource->res_ls->ls_global_id; | |
5000 | + req->rr_remlkid = lkb->lkb_remid; | |
5001 | + req->rr_flags = 0; | |
5002 | + | |
5003 | + if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) { | |
5004 | + /* This is a confusing non-standard use of rr_flags which is | |
5005 | + * usually used to pass lockqueue_flags. */ | |
5006 | + req->rr_flags |= GDLM_LKFLG_DEMOTED; | |
5007 | + } | |
5008 | + | |
5009 | + add_request_lvb(lkb, req); | |
5010 | + midcomms_send_buffer(&req->rr_header, e); | |
5011 | +} | |
5012 | + | |
10d56c87 | 5013 | +void reply_and_grant(struct dlm_lkb *lkb) |
4bf12011 | 5014 | +{ |
10d56c87 AM |
5015 | + struct dlm_request *req = lkb->lkb_request; |
5016 | + struct dlm_reply *reply; | |
4bf12011 | 5017 | + struct writequeue_entry *e; |
5018 | + | |
5019 | + // TODO Error handling | |
5020 | + e = lowcomms_get_buffer(lkb->lkb_nodeid, | |
10d56c87 | 5021 | + sizeof(struct dlm_reply), |
4bf12011 | 5022 | + lkb->lkb_resource->res_ls->ls_allocation, |
5023 | + (char **) &reply); | |
5024 | + if (!e) | |
5025 | + return; | |
5026 | + | |
5027 | + reply->rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY; | |
5028 | + reply->rl_header.rh_flags = 0; | |
10d56c87 | 5029 | + reply->rl_header.rh_length = sizeof(struct dlm_reply); |
4bf12011 | 5030 | + reply->rl_header.rh_lkid = req->rr_header.rh_lkid; |
5031 | + reply->rl_header.rh_lockspace = req->rr_header.rh_lockspace; | |
5032 | + | |
5033 | + reply->rl_status = lkb->lkb_retstatus; | |
5034 | + reply->rl_lockstate = lkb->lkb_status; | |
5035 | + reply->rl_lkid = lkb->lkb_id; | |
5036 | + | |
10d56c87 | 5037 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_DEMOTED),); |
4bf12011 | 5038 | + |
5039 | + lkb->lkb_request = NULL; | |
5040 | + | |
5041 | + add_reply_lvb(lkb, reply); | |
5042 | + midcomms_send_buffer(&reply->rl_header, e); | |
5043 | +} | |
5044 | + | |
5045 | +/* | |
5046 | + * Request removal of a dead entry in the resource directory | |
5047 | + */ | |
5048 | + | |
10d56c87 AM |
5049 | +void remote_remove_resdata(struct dlm_ls *ls, int nodeid, char *name, |
5050 | + int namelen) | |
4bf12011 | 5051 | +{ |
5052 | + struct writequeue_entry *e; | |
10d56c87 | 5053 | + struct dlm_request *req; |
4bf12011 | 5054 | + |
5055 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
10d56c87 | 5056 | + struct dlm_rcom *rc = allocate_rcom_buffer(ls); |
4bf12011 | 5057 | + |
5058 | + memcpy(rc->rc_buf, name, namelen); | |
5059 | + rc->rc_datalen = namelen; | |
5060 | + | |
5061 | + rcom_send_message(ls, nodeid, RECCOMM_REMRESDATA, rc, 0); | |
5062 | + | |
5063 | + free_rcom_buffer(rc); | |
5064 | + return; | |
5065 | + } | |
5066 | + // TODO Error handling | |
5067 | + e = lowcomms_get_buffer(nodeid, | |
10d56c87 | 5068 | + sizeof(struct dlm_request) + namelen - 1, |
4bf12011 | 5069 | + ls->ls_allocation, (char **) &req); |
5070 | + if (!e) | |
5071 | + return; | |
5072 | + | |
10d56c87 | 5073 | + memset(req, 0, sizeof(struct dlm_request) + namelen - 1); |
4bf12011 | 5074 | + req->rr_header.rh_cmd = GDLM_REMCMD_REM_RESDATA; |
5075 | + req->rr_header.rh_length = | |
10d56c87 | 5076 | + sizeof(struct dlm_request) + namelen - 1; |
4bf12011 | 5077 | + req->rr_header.rh_flags = 0; |
5078 | + req->rr_header.rh_lkid = 0; | |
5079 | + req->rr_header.rh_lockspace = ls->ls_global_id; | |
5080 | + req->rr_remlkid = 0; | |
4bf12011 | 5081 | + memcpy(req->rr_name, name, namelen); |
5082 | + | |
5083 | + midcomms_send_buffer(&req->rr_header, e); | |
5084 | +} | |
5085 | + | |
5086 | +/* | |
5087 | + * Send remote cluster request to directory or master node before the request | |
5088 | + * is put on the lock queue. Runs in the context of the locking caller. | |
5089 | + */ | |
5090 | + | |
10d56c87 | 5091 | +int send_cluster_request(struct dlm_lkb *lkb, int state) |
4bf12011 | 5092 | +{ |
5093 | + uint32_t target_nodeid; | |
10d56c87 AM |
5094 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
5095 | + struct dlm_ls *ls = rsb->res_ls; | |
5096 | + struct dlm_request *req; | |
4bf12011 | 5097 | + struct writequeue_entry *e; |
5098 | + | |
4bf12011 | 5099 | + if (state == GDLM_LQSTATE_WAIT_RSB) |
5100 | + target_nodeid = get_directory_nodeid(rsb); | |
10d56c87 AM |
5101 | + else |
5102 | + target_nodeid = lkb->lkb_nodeid; | |
5103 | + | |
5104 | + /* during recovery it's valid for target_nodeid to equal our own; | |
5105 | + resend_cluster_requests does this to get requests back on track */ | |
4bf12011 | 5106 | + |
10d56c87 AM |
5107 | + DLM_ASSERT(target_nodeid && target_nodeid != -1, |
5108 | + print_lkb(lkb); | |
5109 | + print_rsb(rsb); | |
5110 | + printk("target_nodeid %u\n", target_nodeid);); | |
4bf12011 | 5111 | + |
5112 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
5113 | + /* this may happen when called by resend_cluster_request */ | |
5114 | + log_error(ls, "send_cluster_request to %u state %d recovery", | |
5115 | + target_nodeid, state); | |
5116 | + } | |
5117 | + | |
5118 | + e = lowcomms_get_buffer(target_nodeid, | |
10d56c87 | 5119 | + sizeof(struct dlm_request) + |
4bf12011 | 5120 | + rsb->res_length - 1, ls->ls_allocation, |
5121 | + (char **) &req); | |
5122 | + if (!e) | |
5123 | + return -ENOBUFS; | |
10d56c87 | 5124 | + memset(req, 0, sizeof(struct dlm_request) + rsb->res_length - 1); |
4bf12011 | 5125 | + |
5126 | + /* Common stuff, some are just defaults */ | |
5127 | + | |
5128 | + if (lkb->lkb_bastaddr) | |
5cdbd17b | 5129 | + req->rr_asts = AST_BAST; |
4bf12011 | 5130 | + if (lkb->lkb_astaddr) |
5cdbd17b | 5131 | + req->rr_asts |= AST_COMP; |
4bf12011 | 5132 | + if (lkb->lkb_parent) |
5133 | + req->rr_remparid = lkb->lkb_parent->lkb_remid; | |
5134 | + | |
5135 | + req->rr_flags = lkb->lkb_lockqueue_flags; | |
5136 | + req->rr_rqmode = lkb->lkb_rqmode; | |
5137 | + req->rr_remlkid = lkb->lkb_remid; | |
5138 | + req->rr_header.rh_length = | |
10d56c87 | 5139 | + sizeof(struct dlm_request) + rsb->res_length - 1; |
4bf12011 | 5140 | + req->rr_header.rh_flags = 0; |
5141 | + req->rr_header.rh_lkid = lkb->lkb_id; | |
5142 | + req->rr_header.rh_lockspace = ls->ls_global_id; | |
5143 | + | |
5144 | + switch (state) { | |
5145 | + | |
5146 | + case GDLM_LQSTATE_WAIT_RSB: | |
5147 | + | |
10d56c87 AM |
5148 | + DLM_ASSERT(!lkb->lkb_parent, |
5149 | + print_lkb(lkb); | |
5150 | + print_rsb(rsb);); | |
5151 | + | |
5152 | + DLM_ASSERT(rsb->res_nodeid == -1, | |
5153 | + print_lkb(lkb); | |
5154 | + print_rsb(rsb);); | |
5155 | + | |
5156 | + log_debug(ls, "send lu %x to %u", lkb->lkb_id, target_nodeid); | |
4bf12011 | 5157 | + |
5158 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOOKUP; | |
5159 | + memcpy(req->rr_name, rsb->res_name, rsb->res_length); | |
5160 | + break; | |
5161 | + | |
5162 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
5163 | + | |
10d56c87 AM |
5164 | + DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid, |
5165 | + print_lkb(lkb); | |
5166 | + print_rsb(rsb);); | |
5167 | + | |
5168 | + log_debug(ls, "send cv %x to %u", lkb->lkb_id, target_nodeid); | |
5169 | + | |
4bf12011 | 5170 | + req->rr_header.rh_cmd = GDLM_REMCMD_CONVREQUEST; |
5171 | + if (lkb->lkb_range) { | |
5172 | + req->rr_flags |= GDLM_LKFLG_RANGE; | |
5173 | + req->rr_range_start = lkb->lkb_range[RQ_RANGE_START]; | |
5174 | + req->rr_range_end = lkb->lkb_range[RQ_RANGE_END]; | |
5175 | + } | |
5176 | + break; | |
5177 | + | |
5178 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
5179 | + | |
10d56c87 AM |
5180 | + DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid, |
5181 | + print_lkb(lkb); | |
5182 | + print_rsb(rsb);); | |
5183 | + | |
5184 | + log_debug(ls, "send rq %x to %u", lkb->lkb_id, target_nodeid); | |
5185 | + | |
4bf12011 | 5186 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOCKREQUEST; |
4bf12011 | 5187 | + memcpy(req->rr_name, rsb->res_name, rsb->res_length); |
5188 | + if (lkb->lkb_range) { | |
5189 | + req->rr_flags |= GDLM_LKFLG_RANGE; | |
5190 | + req->rr_range_start = lkb->lkb_range[RQ_RANGE_START]; | |
5191 | + req->rr_range_end = lkb->lkb_range[RQ_RANGE_END]; | |
5192 | + } | |
5193 | + break; | |
5194 | + | |
5195 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
5196 | + | |
10d56c87 AM |
5197 | + log_debug(ls, "send un %x to %u", lkb->lkb_id, target_nodeid); |
5198 | + | |
5199 | + if (rsb->res_nodeid != -1) | |
5200 | + log_all(ls, "un %x to %u rsb nodeid %u", lkb->lkb_id, | |
5201 | + target_nodeid, rsb->res_nodeid); | |
5202 | + | |
4bf12011 | 5203 | + req->rr_header.rh_cmd = GDLM_REMCMD_UNLOCKREQUEST; |
5204 | + break; | |
5205 | + | |
5206 | + default: | |
10d56c87 | 5207 | + DLM_ASSERT(0, printk("Unknown cluster request\n");); |
4bf12011 | 5208 | + } |
5209 | + | |
5210 | + add_request_lvb(lkb, req); | |
5211 | + midcomms_send_buffer(&req->rr_header, e); | |
5212 | + | |
5213 | + return 0; | |
5214 | +} | |
5215 | + | |
5216 | +/* | |
5217 | + * We got a request from another cluster node, process it and return an info | |
5218 | + * structure with the lock state/LVB etc as required. Executes in the DLM's | |
5219 | + * recvd thread. | |
5220 | + */ | |
5221 | + | |
10d56c87 | 5222 | +int process_cluster_request(int nodeid, struct dlm_header *req, int recovery) |
4bf12011 | 5223 | +{ |
10d56c87 AM |
5224 | + struct dlm_ls *lspace; |
5225 | + struct dlm_lkb *lkb = NULL; | |
5226 | + struct dlm_rsb *rsb; | |
4bf12011 | 5227 | + int send_reply = 0, status = 0, namelen; |
10d56c87 AM |
5228 | + struct dlm_request *freq = (struct dlm_request *) req; |
5229 | + struct dlm_reply *rp = (struct dlm_reply *) req; | |
5230 | + struct dlm_reply reply; | |
4bf12011 | 5231 | + |
5232 | + lspace = find_lockspace_by_global_id(req->rh_lockspace); | |
5233 | + | |
5234 | + if (!lspace) { | |
5235 | + log_print("process_cluster_request invalid lockspace %x " | |
5236 | + "from %d req %u", req->rh_lockspace, nodeid, | |
5237 | + req->rh_cmd); | |
5238 | + status = -EINVAL; | |
5239 | + goto out; | |
5240 | + } | |
5241 | + | |
5242 | + /* wait for recoverd to drain requestqueue */ | |
5243 | + if (!recovery) | |
5244 | + wait_requestqueue(lspace); | |
5245 | + | |
5246 | + /* | |
5247 | + * If we're in recovery then queue the request for later. Otherwise, | |
5248 | + * we still need to get the "in_recovery" lock to make sure the | |
5249 | + * recovery itself doesn't start until we are done. | |
5250 | + */ | |
5251 | + retry: | |
5252 | + if (!test_bit(LSFL_LS_RUN, &lspace->ls_flags)) { | |
5253 | + if (test_bit(LSFL_REQUEST_WARN, &lspace->ls_flags)) | |
5254 | + log_error(lspace, "process_cluster_request warning %u", | |
5255 | + nodeid); | |
5256 | + add_to_requestqueue(lspace, nodeid, (char *) req, | |
5257 | + req->rh_length); | |
10d56c87 AM |
5258 | + log_debug(lspace, "process_cluster_request queue %d from %u", |
5259 | + req->rh_cmd, nodeid); | |
4bf12011 | 5260 | + status = -EINTR; |
5261 | + goto out; | |
5262 | + } | |
5263 | + if (!down_read_trylock(&lspace->ls_in_recovery)) { | |
5264 | + schedule(); | |
5265 | + goto retry; | |
5266 | + } | |
5267 | + | |
5268 | + | |
5269 | + /* | |
5270 | + * Process the request. | |
5271 | + */ | |
5272 | + | |
5273 | + switch (req->rh_cmd) { | |
5274 | + | |
5275 | + case GDLM_REMCMD_LOOKUP: | |
5276 | + { | |
10d56c87 | 5277 | + uint32_t dir_nodeid, r_nodeid; |
4bf12011 | 5278 | + int status; |
4bf12011 | 5279 | + |
5280 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
5281 | + | |
5282 | + dir_nodeid = name_to_directory_nodeid(lspace, | |
5283 | + freq->rr_name, | |
5284 | + namelen); | |
5285 | + if (dir_nodeid != our_nodeid()) | |
5286 | + log_debug(lspace, "ignoring directory lookup"); | |
5287 | + | |
10d56c87 AM |
5288 | + status = dlm_dir_lookup(lspace, nodeid, freq->rr_name, |
5289 | + namelen, &r_nodeid); | |
4bf12011 | 5290 | + if (status) |
5291 | + status = -ENOMEM; | |
5292 | + | |
5293 | + reply.rl_status = status; | |
5294 | + reply.rl_lockstate = 0; | |
10d56c87 | 5295 | + reply.rl_nodeid = r_nodeid; |
4bf12011 | 5296 | + } |
5297 | + send_reply = 1; | |
5298 | + break; | |
5299 | + | |
5300 | + case GDLM_REMCMD_REM_RESDATA: | |
5301 | + | |
5302 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
10d56c87 | 5303 | + remove_resdata(lspace, nodeid, freq->rr_name, namelen); |
4bf12011 | 5304 | + break; |
5305 | + | |
5306 | + case GDLM_REMCMD_LOCKREQUEST: | |
5307 | + | |
5308 | + lkb = remote_stage2(nodeid, lspace, freq); | |
5309 | + if (lkb) { | |
5310 | + lkb->lkb_request = freq; | |
10d56c87 AM |
5311 | + if (lkb->lkb_retstatus != -EINVAL) |
5312 | + dlm_lock_stage3(lkb); | |
4bf12011 | 5313 | + |
5314 | + /* | |
5315 | + * If the request was granted in lock_stage3, then a | |
5316 | + * reply message was already sent in combination with | |
5317 | + * the grant message and lkb_request is NULL. | |
5318 | + */ | |
5319 | + | |
5320 | + if (lkb->lkb_request) { | |
5321 | + lkb->lkb_request = NULL; | |
5322 | + send_reply = 1; | |
5323 | + reply.rl_status = lkb->lkb_retstatus; | |
5324 | + reply.rl_lockstate = lkb->lkb_status; | |
5325 | + reply.rl_lkid = lkb->lkb_id; | |
5326 | + | |
5327 | + /* | |
5328 | + * If the request could not be granted and the | |
5329 | + * user won't wait, then free up the LKB | |
5330 | + */ | |
5331 | + | |
5cdbd17b | 5332 | + if (lkb->lkb_retstatus == -EAGAIN) { |
4bf12011 | 5333 | + rsb = lkb->lkb_resource; |
5334 | + release_lkb(lspace, lkb); | |
5335 | + release_rsb(rsb); | |
5336 | + lkb = NULL; | |
5337 | + } | |
10d56c87 AM |
5338 | + else if (lkb->lkb_retstatus == -EINVAL) { |
5339 | + release_lkb(lspace, lkb); | |
5340 | + lkb = NULL; | |
5341 | + } | |
4bf12011 | 5342 | + } |
5343 | + } else { | |
5344 | + reply.rl_status = -ENOMEM; | |
5345 | + send_reply = 1; | |
5346 | + } | |
5347 | + break; | |
5348 | + | |
5349 | + case GDLM_REMCMD_CONVREQUEST: | |
5350 | + | |
5351 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5352 | + | |
10d56c87 AM |
5353 | + DLM_ASSERT(lkb, |
5354 | + print_request(freq); | |
5355 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5356 | + |
10d56c87 AM |
5357 | + rsb = lkb->lkb_resource; |
5358 | + | |
5359 | + DLM_ASSERT(rsb, | |
5360 | + print_lkb(lkb); | |
5361 | + print_request(freq); | |
5362 | + printk("nodeid %u\n", nodeid);); | |
5363 | + | |
5364 | + DLM_ASSERT(!rsb->res_nodeid, | |
5365 | + print_lkb(lkb); | |
5366 | + print_rsb(rsb); | |
5367 | + print_request(freq); | |
5368 | + printk("nodeid %u\n", nodeid);); | |
5369 | + | |
5370 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY, | |
5371 | + print_lkb(lkb); | |
5372 | + print_rsb(rsb); | |
5373 | + print_request(freq); | |
5374 | + printk("nodeid %u\n", nodeid);); | |
5375 | + | |
5376 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_GRANTED, | |
5377 | + print_lkb(lkb); | |
5378 | + print_rsb(rsb); | |
5379 | + print_request(freq); | |
5380 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5381 | + |
5382 | + lkb->lkb_rqmode = freq->rr_rqmode; | |
5383 | + lkb->lkb_lockqueue_flags = freq->rr_flags; | |
5384 | + lkb->lkb_request = freq; | |
5385 | + lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED; | |
5386 | + | |
10d56c87 AM |
5387 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK || |
5388 | + freq->rr_flags & DLM_LKF_VALBLK) { | |
4bf12011 | 5389 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; |
5390 | + allocate_and_copy_lvb(lspace, &lkb->lkb_lvbptr, | |
5391 | + freq->rr_lvb); | |
5392 | + } | |
5393 | + | |
5394 | + if (freq->rr_flags & GDLM_LKFLG_RANGE) { | |
5395 | + if (lkb_set_range(lspace, lkb, freq->rr_range_start, | |
5396 | + freq->rr_range_end)) { | |
5397 | + reply.rl_status = -ENOMEM; | |
5398 | + send_reply = 1; | |
5399 | + goto out; | |
5400 | + } | |
5401 | + } | |
5402 | + | |
10d56c87 AM |
5403 | + log_debug(lspace, "cv %u from %u %x \"%s\"", lkb->lkb_rqmode, |
5404 | + nodeid, lkb->lkb_id, rsb->res_name); | |
5405 | + | |
4bf12011 | 5406 | + dlm_convert_stage2(lkb, FALSE); |
5407 | + | |
5408 | + /* | |
5409 | + * If the conv request was granted in stage2, then a reply | |
5410 | + * message was already sent in combination with the grant | |
5411 | + * message. | |
5412 | + */ | |
5413 | + | |
5414 | + if (lkb->lkb_request) { | |
5415 | + lkb->lkb_request = NULL; | |
5416 | + send_reply = 1; | |
5417 | + reply.rl_status = lkb->lkb_retstatus; | |
5418 | + reply.rl_lockstate = lkb->lkb_status; | |
5419 | + reply.rl_lkid = lkb->lkb_id; | |
5420 | + } | |
5421 | + break; | |
5422 | + | |
5423 | + case GDLM_REMCMD_LOCKREPLY: | |
5424 | + | |
10d56c87 AM |
5425 | + lkb = find_lock_by_id(lspace, req->rh_lkid); |
5426 | + | |
5427 | + DLM_ASSERT(lkb, | |
5428 | + print_reply(rp); | |
5429 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5430 | + |
10d56c87 AM |
5431 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY), |
5432 | + print_lkb(lkb); | |
5433 | + print_reply(rp); | |
5434 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5435 | + |
10d56c87 | 5436 | + process_lockqueue_reply(lkb, rp, nodeid); |
4bf12011 | 5437 | + break; |
5438 | + | |
5439 | + case GDLM_REMCMD_LOCKGRANT: | |
5440 | + | |
5441 | + /* | |
5442 | + * Remote lock has been granted asynchronously. Do a compact | |
5443 | + * version of what grant_lock() does. | |
5444 | + */ | |
5445 | + | |
5446 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5447 | + | |
10d56c87 AM |
5448 | + DLM_ASSERT(lkb, |
5449 | + print_request(freq); | |
5450 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5451 | + |
5452 | + rsb = lkb->lkb_resource; | |
5453 | + | |
10d56c87 AM |
5454 | + DLM_ASSERT(rsb, |
5455 | + print_lkb(lkb); | |
5456 | + print_request(freq); | |
5457 | + printk("nodeid %u\n", nodeid);); | |
5458 | + | |
5459 | + DLM_ASSERT(rsb->res_nodeid, | |
5460 | + print_lkb(lkb); | |
5461 | + print_rsb(rsb); | |
5462 | + print_request(freq); | |
5463 | + printk("nodeid %u\n", nodeid);); | |
5464 | + | |
5465 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY), | |
5466 | + print_lkb(lkb); | |
5467 | + print_rsb(rsb); | |
5468 | + print_request(freq); | |
5469 | + printk("nodeid %u\n", nodeid);); | |
5470 | + | |
5471 | + if (lkb->lkb_lockqueue_state) { | |
5472 | + log_error(rsb->res_ls, "granting lock on lockqueue"); | |
5473 | + print_lkb(lkb); | |
5474 | + } | |
4bf12011 | 5475 | + |
5476 | + down_write(&rsb->res_lock); | |
5477 | + | |
5478 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
5479 | + memcpy(lkb->lkb_lvbptr, freq->rr_lvb, DLM_LVB_LEN); | |
5480 | + | |
5481 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
5482 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
5483 | + | |
5484 | + if (lkb->lkb_range) { | |
5485 | + lkb->lkb_range[GR_RANGE_START] = | |
5486 | + lkb->lkb_range[RQ_RANGE_START]; | |
5487 | + lkb->lkb_range[GR_RANGE_END] = | |
5488 | + lkb->lkb_range[RQ_RANGE_END]; | |
5489 | + } | |
5490 | + | |
5491 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
5492 | + up_write(&rsb->res_lock); | |
5493 | + | |
5494 | + if (freq->rr_flags & GDLM_LKFLG_DEMOTED) | |
5495 | + lkb->lkb_flags |= GDLM_LKFLG_DEMOTED; | |
5496 | + | |
5497 | + lkb->lkb_retstatus = 0; | |
5cdbd17b | 5498 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 5499 | + break; |
5500 | + | |
5501 | + case GDLM_REMCMD_SENDBAST: | |
5502 | + | |
5503 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5504 | + | |
10d56c87 AM |
5505 | + DLM_ASSERT(lkb, |
5506 | + print_request(freq); | |
5507 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5508 | + |
5509 | + if (lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
5cdbd17b | 5510 | + queue_ast(lkb, AST_BAST, freq->rr_rqmode); |
4bf12011 | 5511 | + break; |
5512 | + | |
5513 | + case GDLM_REMCMD_SENDCAST: | |
5514 | + | |
5515 | + /* This is only used for some error completion ASTs */ | |
5516 | + | |
5517 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5518 | + | |
10d56c87 AM |
5519 | + DLM_ASSERT(lkb, |
5520 | + print_request(freq); | |
5521 | + printk("nodeid %u\n", nodeid);); | |
4bf12011 | 5522 | + |
5523 | + /* Return the lock to granted status */ | |
5524 | + res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED); | |
4bf12011 | 5525 | + lkb->lkb_retstatus = freq->rr_status; |
5cdbd17b | 5526 | + queue_ast(lkb, AST_COMP, 0); |
4bf12011 | 5527 | + break; |
5528 | + | |
5529 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
5530 | + | |
5531 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5532 | + | |
10d56c87 AM |
5533 | + DLM_ASSERT(lkb, |
5534 | + print_request(freq); | |
5535 | + printk("nodeid %u\n", nodeid);); | |
5536 | + | |
5537 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY, | |
5538 | + print_lkb(lkb); | |
5539 | + print_request(freq); | |
5540 | + printk("nodeid %u\n", nodeid);); | |
5541 | + | |
5542 | + rsb = find_rsb_to_unlock(lspace, lkb); | |
5543 | + | |
5544 | + log_debug(lspace, "un from %u %x \"%s\"", nodeid, lkb->lkb_id, | |
5545 | + rsb->res_name); | |
4bf12011 | 5546 | + |
10d56c87 | 5547 | + reply.rl_status = dlm_unlock_stage2(lkb, rsb, freq->rr_flags); |
4bf12011 | 5548 | + send_reply = 1; |
5549 | + break; | |
5550 | + | |
5551 | + case GDLM_REMCMD_QUERY: | |
5552 | + remote_query(nodeid, lspace, req); | |
5553 | + break; | |
5554 | + | |
5555 | + case GDLM_REMCMD_QUERYREPLY: | |
5556 | + remote_query_reply(nodeid, lspace, req); | |
5557 | + break; | |
5558 | + | |
5559 | + default: | |
5560 | + log_error(lspace, "process_cluster_request cmd %d",req->rh_cmd); | |
5561 | + } | |
5562 | + | |
5563 | + up_read(&lspace->ls_in_recovery); | |
5564 | + | |
5565 | + out: | |
5566 | + if (send_reply) { | |
5567 | + reply.rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY; | |
5568 | + reply.rl_header.rh_flags = 0; | |
5569 | + reply.rl_header.rh_length = sizeof(reply); | |
5570 | + reply.rl_header.rh_lkid = freq->rr_header.rh_lkid; | |
5571 | + reply.rl_header.rh_lockspace = freq->rr_header.rh_lockspace; | |
5572 | + | |
5573 | + status = midcomms_send_message(nodeid, &reply.rl_header, | |
5574 | + GFP_KERNEL); | |
5575 | + } | |
5576 | + | |
5577 | + wake_astd(); | |
5578 | + | |
5579 | + return status; | |
5580 | +} | |
5581 | + | |
10d56c87 | 5582 | +static void add_reply_lvb(struct dlm_lkb *lkb, struct dlm_reply *reply) |
4bf12011 | 5583 | +{ |
5584 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
5585 | + memcpy(reply->rl_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
5586 | +} | |
5587 | + | |
10d56c87 | 5588 | +static void add_request_lvb(struct dlm_lkb *lkb, struct dlm_request *req) |
4bf12011 | 5589 | +{ |
5590 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
5591 | + memcpy(req->rr_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
5592 | +} | |
5593 | diff -urN linux-orig/cluster/dlm/lockqueue.h linux-patched/cluster/dlm/lockqueue.h | |
5594 | --- linux-orig/cluster/dlm/lockqueue.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
5595 | +++ linux-patched/cluster/dlm/lockqueue.h 2004-07-13 18:57:22.000000000 +0800 |
5596 | @@ -0,0 +1,28 @@ | |
4bf12011 | 5597 | +/****************************************************************************** |
5598 | +******************************************************************************* | |
5599 | +** | |
5600 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
5601 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
5602 | +** | |
5603 | +** This copyrighted material is made available to anyone wishing to use, | |
5604 | +** modify, copy, or redistribute it subject to the terms and conditions | |
5605 | +** of the GNU General Public License v.2. | |
5606 | +** | |
5607 | +******************************************************************************* | |
5608 | +******************************************************************************/ | |
5609 | + | |
5610 | +#ifndef __LOCKQUEUE_DOT_H__ | |
5611 | +#define __LOCKQUEUE_DOT_H__ | |
5612 | + | |
10d56c87 AM |
5613 | +void remote_grant(struct dlm_lkb * lkb); |
5614 | +void reply_and_grant(struct dlm_lkb * lkb); | |
5615 | +int remote_stage(struct dlm_lkb * lkb, int state); | |
5616 | +int process_cluster_request(int csid, struct dlm_header *req, int recovery); | |
5617 | +int send_cluster_request(struct dlm_lkb * lkb, int state); | |
5618 | +void purge_requestqueue(struct dlm_ls * ls); | |
5619 | +int process_requestqueue(struct dlm_ls * ls); | |
5620 | +int reply_in_requestqueue(struct dlm_ls * ls, int lkid); | |
5621 | +void remote_remove_resdata(struct dlm_ls * ls, int nodeid, char *name, int namelen); | |
5622 | +void allocate_and_copy_lvb(struct dlm_ls * ls, char **lvbptr, char *src); | |
4bf12011 | 5623 | + |
5624 | +#endif /* __LOCKQUEUE_DOT_H__ */ | |
5625 | diff -urN linux-orig/cluster/dlm/lockspace.c linux-patched/cluster/dlm/lockspace.c | |
5626 | --- linux-orig/cluster/dlm/lockspace.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
5627 | +++ linux-patched/cluster/dlm/lockspace.c 2004-07-13 18:57:22.000000000 +0800 |
5628 | @@ -0,0 +1,699 @@ | |
4bf12011 | 5629 | +/****************************************************************************** |
5630 | +******************************************************************************* | |
5631 | +** | |
5632 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
5633 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
5634 | +** | |
5635 | +** This copyrighted material is made available to anyone wishing to use, | |
5636 | +** modify, copy, or redistribute it subject to the terms and conditions | |
5637 | +** of the GNU General Public License v.2. | |
5638 | +** | |
5639 | +******************************************************************************* | |
5640 | +******************************************************************************/ | |
5641 | + | |
5642 | +#include <linux/module.h> | |
5643 | + | |
5644 | +#include "dlm_internal.h" | |
5645 | +#include "recoverd.h" | |
5646 | +#include "ast.h" | |
5647 | +#include "lkb.h" | |
5648 | +#include "nodes.h" | |
5649 | +#include "dir.h" | |
5650 | +#include "lowcomms.h" | |
5651 | +#include "config.h" | |
5652 | +#include "memory.h" | |
5653 | +#include "lockspace.h" | |
5654 | +#include "device.h" | |
5655 | + | |
5656 | +#define GDST_NONE (0) | |
5657 | +#define GDST_RUNNING (1) | |
5658 | + | |
10d56c87 AM |
5659 | +static int dlmstate; |
5660 | +static int dlmcount; | |
5661 | +static struct semaphore dlmstate_lock; | |
4bf12011 | 5662 | +struct list_head lslist; |
5663 | +spinlock_t lslist_lock; | |
5664 | +struct kcl_service_ops ls_ops; | |
5665 | + | |
5666 | +static int new_lockspace(char *name, int namelen, void **lockspace, int flags); | |
5667 | + | |
5668 | + | |
5669 | +void dlm_lockspace_init(void) | |
5670 | +{ | |
10d56c87 AM |
5671 | + dlmstate = GDST_NONE; |
5672 | + dlmcount = 0; | |
5673 | + init_MUTEX(&dlmstate_lock); | |
4bf12011 | 5674 | + INIT_LIST_HEAD(&lslist); |
5675 | + spin_lock_init(&lslist_lock); | |
5676 | +} | |
5677 | + | |
10d56c87 | 5678 | +struct dlm_ls *find_lockspace_by_global_id(uint32_t id) |
4bf12011 | 5679 | +{ |
10d56c87 | 5680 | + struct dlm_ls *ls; |
4bf12011 | 5681 | + |
5682 | + spin_lock(&lslist_lock); | |
5683 | + | |
5684 | + list_for_each_entry(ls, &lslist, ls_list) { | |
5685 | + if (ls->ls_global_id == id) | |
5686 | + goto out; | |
5687 | + } | |
5688 | + ls = NULL; | |
5689 | + out: | |
5690 | + spin_unlock(&lslist_lock); | |
5691 | + return ls; | |
5692 | +} | |
5693 | + | |
5694 | +/* TODO: make this more efficient */ | |
10d56c87 | 5695 | +struct dlm_ls *find_lockspace_by_local_id(void *id) |
4bf12011 | 5696 | +{ |
10d56c87 | 5697 | + struct dlm_ls *ls; |
4bf12011 | 5698 | + |
5699 | + spin_lock(&lslist_lock); | |
5700 | + | |
5701 | + list_for_each_entry(ls, &lslist, ls_list) { | |
5702 | + if (ls->ls_local_id == (uint32_t)(long)id) | |
5703 | + goto out; | |
5704 | + } | |
5705 | + ls = NULL; | |
5706 | + out: | |
5707 | + spin_unlock(&lslist_lock); | |
5708 | + return ls; | |
5709 | +} | |
5710 | + | |
10d56c87 | 5711 | +struct dlm_ls *find_lockspace_by_name(char *name, int namelen) |
4bf12011 | 5712 | +{ |
10d56c87 | 5713 | + struct dlm_ls *ls; |
4bf12011 | 5714 | + |
5715 | + spin_lock(&lslist_lock); | |
5716 | + | |
5717 | + list_for_each_entry(ls, &lslist, ls_list) { | |
5718 | + if (ls->ls_namelen == namelen && | |
5719 | + memcmp(ls->ls_name, name, namelen) == 0) | |
5720 | + goto out; | |
5721 | + } | |
5722 | + ls = NULL; | |
5723 | + out: | |
5724 | + spin_unlock(&lslist_lock); | |
5725 | + return ls; | |
5726 | +} | |
5727 | + | |
5728 | +/* | |
5729 | + * Called from dlm_init. These are the general threads which are not | |
10d56c87 | 5730 | + * lockspace-specific and work for all dlm lockspaces. |
4bf12011 | 5731 | + */ |
5732 | + | |
5733 | +static int threads_start(void) | |
5734 | +{ | |
5735 | + int error; | |
5736 | + | |
5737 | + /* Thread which interacts with cman for all ls's */ | |
10d56c87 | 5738 | + error = dlm_recoverd_start(); |
4bf12011 | 5739 | + if (error) { |
5740 | + log_print("cannot start recovery thread %d", error); | |
5741 | + goto fail; | |
5742 | + } | |
5743 | + | |
5744 | + /* Thread which process lock requests for all ls's */ | |
5745 | + error = astd_start(); | |
5746 | + if (error) { | |
5747 | + log_print("cannot start ast thread %d", error); | |
5748 | + goto recoverd_fail; | |
5749 | + } | |
5750 | + | |
5751 | + /* Thread for sending/receiving messages for all ls's */ | |
5752 | + error = lowcomms_start(); | |
5753 | + if (error) { | |
5754 | + log_print("cannot start lowcomms %d", error); | |
5755 | + goto astd_fail; | |
5756 | + } | |
5757 | + | |
5758 | + return 0; | |
5759 | + | |
5760 | + astd_fail: | |
5761 | + astd_stop(); | |
5762 | + | |
5763 | + recoverd_fail: | |
10d56c87 | 5764 | + dlm_recoverd_stop(); |
4bf12011 | 5765 | + |
5766 | + fail: | |
5767 | + return error; | |
5768 | +} | |
5769 | + | |
5770 | +static void threads_stop(void) | |
5771 | +{ | |
5772 | + lowcomms_stop(); | |
5773 | + astd_stop(); | |
10d56c87 | 5774 | + dlm_recoverd_stop(); |
4bf12011 | 5775 | +} |
5776 | + | |
5777 | +static int init_internal(void) | |
5778 | +{ | |
5779 | + int error = 0; | |
5780 | + | |
10d56c87 AM |
5781 | + if (dlmstate == GDST_RUNNING) |
5782 | + dlmcount++; | |
4bf12011 | 5783 | + else { |
5784 | + error = threads_start(); | |
5785 | + if (error) | |
5786 | + goto out; | |
5787 | + | |
10d56c87 AM |
5788 | + dlmstate = GDST_RUNNING; |
5789 | + dlmcount = 1; | |
4bf12011 | 5790 | + } |
5791 | + | |
5792 | + out: | |
5793 | + return error; | |
5794 | +} | |
5795 | + | |
5796 | + | |
5797 | +/* | |
10d56c87 | 5798 | + * Called after dlm module is loaded and before any lockspaces are created. |
4bf12011 | 5799 | + * Starts and initializes global threads and structures. These global entities |
5800 | + * are shared by and independent of all lockspaces. | |
5801 | + * | |
10d56c87 | 5802 | + * There should be a dlm-specific user command which a person can run which |
4bf12011 | 5803 | + * calls this function. If a user hasn't run that command and something |
5804 | + * creates a new lockspace, this is called first. | |
5805 | + * | |
5806 | + * This also starts the default lockspace. | |
5807 | + */ | |
5808 | + | |
5809 | +int dlm_init(void) | |
5810 | +{ | |
5811 | + int error; | |
5812 | + | |
10d56c87 | 5813 | + down(&dlmstate_lock); |
4bf12011 | 5814 | + error = init_internal(); |
10d56c87 | 5815 | + up(&dlmstate_lock); |
4bf12011 | 5816 | + |
5817 | + return error; | |
5818 | +} | |
5819 | + | |
5820 | +int dlm_release(void) | |
5821 | +{ | |
5822 | + int error = 0; | |
5823 | + | |
10d56c87 | 5824 | + down(&dlmstate_lock); |
4bf12011 | 5825 | + |
10d56c87 | 5826 | + if (dlmstate == GDST_NONE) |
4bf12011 | 5827 | + goto out; |
5828 | + | |
10d56c87 AM |
5829 | + if (dlmcount) |
5830 | + dlmcount--; | |
4bf12011 | 5831 | + |
10d56c87 | 5832 | + if (dlmcount) |
4bf12011 | 5833 | + goto out; |
5834 | + | |
5835 | + spin_lock(&lslist_lock); | |
5836 | + if (!list_empty(&lslist)) { | |
5837 | + spin_unlock(&lslist_lock); | |
5838 | + log_print("cannot stop threads, lockspaces still exist"); | |
5839 | + goto out; | |
5840 | + } | |
5841 | + spin_unlock(&lslist_lock); | |
5842 | + | |
5843 | + threads_stop(); | |
10d56c87 | 5844 | + dlmstate = GDST_NONE; |
4bf12011 | 5845 | + |
5846 | + out: | |
10d56c87 | 5847 | + up(&dlmstate_lock); |
4bf12011 | 5848 | + |
5849 | + return error; | |
5850 | +} | |
5851 | + | |
10d56c87 | 5852 | +struct dlm_ls *allocate_ls(int namelen) |
4bf12011 | 5853 | +{ |
10d56c87 | 5854 | + struct dlm_ls *ls; |
4bf12011 | 5855 | + |
5856 | + /* FIXME: use appropriate malloc type */ | |
5857 | + | |
10d56c87 | 5858 | + ls = kmalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
4bf12011 | 5859 | + if (ls) |
10d56c87 | 5860 | + memset(ls, 0, sizeof(struct dlm_ls) + namelen); |
4bf12011 | 5861 | + |
5862 | + return ls; | |
5863 | +} | |
5864 | + | |
4bf12011 | 5865 | +static int new_lockspace(char *name, int namelen, void **lockspace, int flags) |
5866 | +{ | |
10d56c87 AM |
5867 | + struct dlm_ls *ls; |
5868 | + int i, size, error = -ENOMEM; | |
4bf12011 | 5869 | + uint32_t local_id = 0; |
5870 | + | |
5871 | + if (!try_module_get(THIS_MODULE)) | |
5872 | + return -EINVAL; | |
5873 | + | |
5874 | + if (namelen > MAX_SERVICE_NAME_LEN) | |
5875 | + return -EINVAL; | |
5876 | + | |
5877 | + if ((ls = find_lockspace_by_name(name, namelen))) { | |
10d56c87 | 5878 | + *lockspace = (void *)(long)ls->ls_local_id; |
4bf12011 | 5879 | + return -EEXIST; |
5880 | + } | |
5881 | + | |
5882 | + /* | |
5883 | + * Initialize ls fields | |
5884 | + */ | |
5885 | + | |
5886 | + ls = allocate_ls(namelen); | |
5887 | + if (!ls) | |
5888 | + goto out; | |
5889 | + | |
5890 | + memcpy(ls->ls_name, name, namelen); | |
5891 | + ls->ls_namelen = namelen; | |
5892 | + | |
5893 | + ls->ls_allocation = GFP_KERNEL; | |
10d56c87 | 5894 | + ls->ls_flags = 0; |
4bf12011 | 5895 | + |
10d56c87 AM |
5896 | + size = dlm_config.rsbtbl_size; |
5897 | + ls->ls_rsbtbl_size = size; | |
5898 | + | |
5899 | + ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); | |
5900 | + if (!ls->ls_rsbtbl) | |
4bf12011 | 5901 | + goto out_lsfree; |
10d56c87 AM |
5902 | + for (i = 0; i < size; i++) { |
5903 | + INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); | |
5904 | + rwlock_init(&ls->ls_rsbtbl[i].lock); | |
5905 | + } | |
4bf12011 | 5906 | + |
10d56c87 AM |
5907 | + size = dlm_config.lkbtbl_size; |
5908 | + ls->ls_lkbtbl_size = size; | |
4bf12011 | 5909 | + |
10d56c87 AM |
5910 | + ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); |
5911 | + if (!ls->ls_lkbtbl) | |
5912 | + goto out_rsbfree; | |
5913 | + for (i = 0; i < size; i++) { | |
5914 | + INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); | |
5915 | + rwlock_init(&ls->ls_lkbtbl[i].lock); | |
5916 | + ls->ls_lkbtbl[i].counter = 1; | |
5917 | + } | |
4bf12011 | 5918 | + |
10d56c87 AM |
5919 | + size = dlm_config.dirtbl_size; |
5920 | + ls->ls_dirtbl_size = size; | |
5921 | + | |
5922 | + ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); | |
5923 | + if (!ls->ls_dirtbl) | |
5924 | + goto out_lkbfree; | |
5925 | + for (i = 0; i < size; i++) { | |
5926 | + INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | |
5927 | + rwlock_init(&ls->ls_dirtbl[i].lock); | |
5928 | + } | |
4bf12011 | 5929 | + |
5930 | + INIT_LIST_HEAD(&ls->ls_nodes); | |
4bf12011 | 5931 | + INIT_LIST_HEAD(&ls->ls_nodes_gone); |
10d56c87 | 5932 | + ls->ls_num_nodes = 0; |
4bf12011 | 5933 | + INIT_LIST_HEAD(&ls->ls_recover); |
5934 | + spin_lock_init(&ls->ls_recover_lock); | |
5935 | + INIT_LIST_HEAD(&ls->ls_recover_list); | |
5936 | + ls->ls_recover_list_count = 0; | |
5937 | + spin_lock_init(&ls->ls_recover_list_lock); | |
5938 | + init_waitqueue_head(&ls->ls_wait_general); | |
10d56c87 | 5939 | + INIT_LIST_HEAD(&ls->ls_rootres); |
4bf12011 | 5940 | + INIT_LIST_HEAD(&ls->ls_requestqueue); |
5941 | + INIT_LIST_HEAD(&ls->ls_rebuild_rootrsb_list); | |
5942 | + ls->ls_last_stop = 0; | |
5943 | + ls->ls_last_start = 0; | |
5944 | + ls->ls_last_finish = 0; | |
5945 | + ls->ls_rcom_msgid = 0; | |
5946 | + init_MUTEX(&ls->ls_rcom_lock); | |
5947 | + init_rwsem(&ls->ls_in_recovery); | |
5948 | + init_rwsem(&ls->ls_unlock_sem); | |
5949 | + init_rwsem(&ls->ls_rec_rsblist); | |
5950 | + init_rwsem(&ls->ls_gap_rsblist); | |
5951 | + down_write(&ls->ls_in_recovery); | |
5952 | + | |
4bf12011 | 5953 | + if (flags & DLM_LSF_NOTIMERS) |
5954 | + set_bit(LSFL_NOTIMERS, &ls->ls_flags); | |
10d56c87 AM |
5955 | + if (flags & DLM_LSF_NOCONVGRANT) |
5956 | + set_bit(LSFL_NOCONVGRANT, &ls->ls_flags); | |
4bf12011 | 5957 | + |
5958 | + /* | |
5959 | + * Connect this lockspace with the cluster manager | |
5960 | + */ | |
5961 | + | |
5962 | + error = kcl_register_service(name, namelen, SERVICE_LEVEL_GDLM, | |
5963 | + &ls_ops, TRUE, (void *) ls, &local_id); | |
5964 | + if (error) | |
10d56c87 | 5965 | + goto out_dirfree; |
4bf12011 | 5966 | + |
5967 | + ls->ls_state = LSST_INIT; | |
5968 | + ls->ls_local_id = local_id; | |
5969 | + | |
5970 | + spin_lock(&lslist_lock); | |
5971 | + list_add(&ls->ls_list, &lslist); | |
5972 | + spin_unlock(&lslist_lock); | |
5973 | + | |
5974 | + error = kcl_join_service(local_id); | |
5975 | + if (error) { | |
5976 | + log_error(ls, "service manager join error %d", error); | |
5977 | + goto out_reg; | |
5978 | + } | |
5979 | + | |
5980 | + /* The ls isn't actually running until it receives a start() from CMAN. | |
10d56c87 | 5981 | + Neither does it have a global ls id until started. */ |
4bf12011 | 5982 | + |
5983 | + /* Return the local ID as the lockspace handle. I've left this | |
5984 | + cast to a void* as it allows us to replace it with pretty much | |
5985 | + anything at a future date without breaking clients. But returning | |
5986 | + the address of the lockspace is a bad idea as it could get | |
5987 | + forcibly removed, leaving client with a dangling pointer */ | |
10d56c87 | 5988 | + *lockspace = (void *)(long)local_id; |
4bf12011 | 5989 | + |
5990 | + return 0; | |
5991 | + | |
10d56c87 | 5992 | + out_reg: |
4bf12011 | 5993 | + kcl_unregister_service(ls->ls_local_id); |
10d56c87 AM |
5994 | + out_dirfree: |
5995 | + kfree(ls->ls_dirtbl); | |
5996 | + out_lkbfree: | |
5997 | + kfree(ls->ls_lkbtbl); | |
5998 | + out_rsbfree: | |
5999 | + kfree(ls->ls_rsbtbl); | |
6000 | + out_lsfree: | |
6001 | + kfree(ls); | |
6002 | + out: | |
4bf12011 | 6003 | + return error; |
6004 | +} | |
6005 | + | |
6006 | +/* | |
6007 | + * Called by a system like GFS which wants independent lock spaces. | |
6008 | + */ | |
6009 | + | |
6010 | +int dlm_new_lockspace(char *name, int namelen, void **lockspace, int flags) | |
6011 | +{ | |
6012 | + int error = -ENOSYS; | |
6013 | + | |
10d56c87 | 6014 | + down(&dlmstate_lock); |
4bf12011 | 6015 | + error = init_internal(); |
6016 | + if (error) | |
6017 | + goto out; | |
6018 | + | |
6019 | + error = new_lockspace(name, namelen, lockspace, flags); | |
10d56c87 AM |
6020 | + out: |
6021 | + up(&dlmstate_lock); | |
4bf12011 | 6022 | + return error; |
6023 | +} | |
6024 | + | |
6025 | +/* Return 1 if the lockspace still has active remote locks, | |
6026 | + * 2 if the lockspace still has active local locks. | |
6027 | + */ | |
10d56c87 AM |
6028 | +static int lockspace_busy(struct dlm_ls *ls) |
6029 | +{ | |
6030 | + int i, lkb_found = 0; | |
6031 | + struct dlm_lkb *lkb; | |
6032 | + | |
6033 | + /* NOTE: We check the lockidtbl here rather than the resource table. | |
6034 | + This is because there may be LKBs queued as ASTs that have been | |
6035 | + unlinked from their RSBs and are pending deletion once the AST has | |
6036 | + been delivered */ | |
6037 | + | |
6038 | + for (i = 0; i < ls->ls_lkbtbl_size; i++) { | |
6039 | + read_lock(&ls->ls_lkbtbl[i].lock); | |
6040 | + if (!list_empty(&ls->ls_lkbtbl[i].list)) { | |
6041 | + lkb_found = 1; | |
6042 | + list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, | |
6043 | + lkb_idtbl_list) { | |
6044 | + if (!lkb->lkb_nodeid) { | |
6045 | + read_unlock(&ls->ls_lkbtbl[i].lock); | |
6046 | + return 2; | |
6047 | + } | |
6048 | + } | |
4bf12011 | 6049 | + } |
10d56c87 | 6050 | + read_unlock(&ls->ls_lkbtbl[i].lock); |
4bf12011 | 6051 | + } |
10d56c87 | 6052 | + return lkb_found; |
4bf12011 | 6053 | +} |
6054 | + | |
10d56c87 | 6055 | +static int release_lockspace(struct dlm_ls *ls, int force) |
4bf12011 | 6056 | +{ |
10d56c87 AM |
6057 | + struct dlm_lkb *lkb; |
6058 | + struct dlm_rsb *rsb; | |
6059 | + struct dlm_recover *rv; | |
6060 | + struct dlm_csb *csb; | |
4bf12011 | 6061 | + struct list_head *head; |
6062 | + int i; | |
6063 | + int busy = lockspace_busy(ls); | |
6064 | + | |
6065 | + /* Don't destroy a busy lockspace */ | |
6066 | + if (busy > force) | |
6067 | + return -EBUSY; | |
6068 | + | |
6069 | + if (force < 3) { | |
6070 | + kcl_leave_service(ls->ls_local_id); | |
6071 | + kcl_unregister_service(ls->ls_local_id); | |
6072 | + } | |
6073 | + | |
6074 | + spin_lock(&lslist_lock); | |
6075 | + list_del(&ls->ls_list); | |
6076 | + spin_unlock(&lslist_lock); | |
6077 | + | |
6078 | + /* | |
6079 | + * Free resdata structs. | |
6080 | + */ | |
6081 | + | |
10d56c87 AM |
6082 | + dlm_dir_clear(ls); |
6083 | + kfree(ls->ls_dirtbl); | |
4bf12011 | 6084 | + |
6085 | + /* | |
10d56c87 | 6086 | + * Free all lkb's on lkbtbl[] lists. |
4bf12011 | 6087 | + */ |
6088 | + | |
10d56c87 AM |
6089 | + for (i = 0; i < ls->ls_lkbtbl_size; i++) { |
6090 | + head = &ls->ls_lkbtbl[i].list; | |
4bf12011 | 6091 | + while (!list_empty(head)) { |
10d56c87 AM |
6092 | + lkb = list_entry(head->next, struct dlm_lkb, |
6093 | + lkb_idtbl_list); | |
4bf12011 | 6094 | + list_del(&lkb->lkb_idtbl_list); |
6095 | + | |
6096 | + if (lkb->lkb_lockqueue_state) | |
6097 | + remove_from_lockqueue(lkb); | |
6098 | + | |
5cdbd17b | 6099 | + if (lkb->lkb_astflags & (AST_COMP | AST_BAST)) |
4bf12011 | 6100 | + list_del(&lkb->lkb_astqueue); |
6101 | + | |
10d56c87 | 6102 | + if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY) |
4bf12011 | 6103 | + free_lvb(lkb->lkb_lvbptr); |
6104 | + | |
6105 | + free_lkb(lkb); | |
6106 | + } | |
6107 | + } | |
6108 | + | |
10d56c87 | 6109 | + kfree(ls->ls_lkbtbl); |
4bf12011 | 6110 | + |
6111 | + /* | |
10d56c87 | 6112 | + * Free all rsb's on rsbtbl[] lists |
4bf12011 | 6113 | + */ |
6114 | + | |
10d56c87 AM |
6115 | + for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
6116 | + head = &ls->ls_rsbtbl[i].list; | |
4bf12011 | 6117 | + while (!list_empty(head)) { |
10d56c87 AM |
6118 | + rsb = list_entry(head->next, struct dlm_rsb, |
6119 | + res_hashchain); | |
4bf12011 | 6120 | + list_del(&rsb->res_hashchain); |
6121 | + | |
6122 | + if (rsb->res_lvbptr) | |
6123 | + free_lvb(rsb->res_lvbptr); | |
6124 | + | |
6125 | + free_rsb(rsb); | |
6126 | + } | |
6127 | + } | |
6128 | + | |
10d56c87 | 6129 | + kfree(ls->ls_rsbtbl); |
4bf12011 | 6130 | + |
6131 | + /* | |
6132 | + * Free structures on any other lists | |
6133 | + */ | |
6134 | + | |
6135 | + head = &ls->ls_recover; | |
6136 | + while (!list_empty(head)) { | |
10d56c87 AM |
6137 | + rv = list_entry(head->next, struct dlm_recover, list); |
6138 | + list_del(&rv->list); | |
6139 | + kfree(rv); | |
4bf12011 | 6140 | + } |
6141 | + | |
6142 | + head = &ls->ls_nodes; | |
6143 | + while (!list_empty(head)) { | |
10d56c87 AM |
6144 | + csb = list_entry(head->next, struct dlm_csb, list); |
6145 | + list_del(&csb->list); | |
4bf12011 | 6146 | + release_csb(csb); |
6147 | + } | |
6148 | + | |
6149 | + head = &ls->ls_nodes_gone; | |
6150 | + while (!list_empty(head)) { | |
10d56c87 AM |
6151 | + csb = list_entry(head->next, struct dlm_csb, list); |
6152 | + list_del(&csb->list); | |
4bf12011 | 6153 | + release_csb(csb); |
6154 | + } | |
6155 | + | |
10d56c87 | 6156 | + kfree(ls); |
4bf12011 | 6157 | + |
6158 | + dlm_release(); | |
6159 | + | |
6160 | + module_put(THIS_MODULE); | |
6161 | + return 0; | |
6162 | +} | |
6163 | + | |
6164 | + | |
6165 | +/* | |
6166 | + * Called when a system has released all its locks and is not going to use the | |
6167 | + * lockspace any longer. We blindly free everything we're managing for this | |
6168 | + * lockspace. Remaining nodes will go through the recovery process as if we'd | |
6169 | + * died. The lockspace must continue to function as usual, participating in | |
6170 | + * recoveries, until kcl_leave_service returns. | |
6171 | + * | |
6172 | + * Force has 4 possible values: | |
6173 | + * 0 - don't destroy locksapce if it has any LKBs | |
6174 | + * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs | |
6175 | + * 2 - destroy lockspace regardless of LKBs | |
6176 | + * 3 - destroy lockspace as part of a forced shutdown | |
6177 | + */ | |
6178 | + | |
6179 | +int dlm_release_lockspace(void *lockspace, int force) | |
6180 | +{ | |
10d56c87 | 6181 | + struct dlm_ls *ls; |
4bf12011 | 6182 | + |
6183 | + ls = find_lockspace_by_local_id(lockspace); | |
6184 | + if (!ls) | |
10d56c87 | 6185 | + return -EINVAL; |
4bf12011 | 6186 | + |
6187 | + return release_lockspace(ls, force); | |
6188 | +} | |
6189 | + | |
6190 | + | |
6191 | +/* Called when the cluster is being shut down dirtily */ | |
6192 | +void dlm_emergency_shutdown() | |
6193 | +{ | |
10d56c87 AM |
6194 | + struct dlm_ls *ls; |
6195 | + struct dlm_ls *tmp; | |
4bf12011 | 6196 | + |
6197 | + /* Shut lowcomms down to prevent any socket activity */ | |
6198 | + lowcomms_stop_accept(); | |
6199 | + | |
6200 | + /* Delete the devices that belong the the userland | |
6201 | + lockspaces to be deleted. */ | |
6202 | + dlm_device_free_devices(); | |
6203 | + | |
6204 | + /* Now try to clean the lockspaces */ | |
6205 | + spin_lock(&lslist_lock); | |
6206 | + | |
6207 | + list_for_each_entry_safe(ls, tmp, &lslist, ls_list) { | |
6208 | + spin_unlock(&lslist_lock); | |
6209 | + release_lockspace(ls, 3); | |
6210 | + spin_lock(&lslist_lock); | |
6211 | + } | |
6212 | + | |
6213 | + spin_unlock(&lslist_lock); | |
6214 | +} | |
6215 | + | |
10d56c87 | 6216 | +struct dlm_recover *allocate_dlm_recover(void) |
4bf12011 | 6217 | +{ |
10d56c87 | 6218 | + struct dlm_recover *rv; |
4bf12011 | 6219 | + |
10d56c87 AM |
6220 | + rv = kmalloc(sizeof(struct dlm_recover), GFP_KERNEL); |
6221 | + if (rv) | |
6222 | + memset(rv, 0, sizeof(struct dlm_recover)); | |
6223 | + return rv; | |
4bf12011 | 6224 | +} |
6225 | + | |
6226 | +/* | |
6227 | + * Called by CMAN on a specific ls. "stop" means set flag which while set | |
6228 | + * causes all new requests to ls to be queued and not submitted until flag is | |
6229 | + * cleared. stop on a ls also needs to cancel any prior starts on the ls. | |
6230 | + * The recoverd thread carries out any work called for by this event. | |
6231 | + */ | |
6232 | + | |
6233 | +static int dlm_ls_stop(void *servicedata) | |
6234 | +{ | |
10d56c87 | 6235 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
4bf12011 | 6236 | + int new; |
6237 | + | |
6238 | + spin_lock(&ls->ls_recover_lock); | |
6239 | + ls->ls_last_stop = ls->ls_last_start; | |
6240 | + set_bit(LSFL_LS_STOP, &ls->ls_flags); | |
6241 | + new = test_and_clear_bit(LSFL_LS_RUN, &ls->ls_flags); | |
6242 | + spin_unlock(&ls->ls_recover_lock); | |
6243 | + | |
6244 | + /* | |
6245 | + * This in_recovery lock does two things: | |
6246 | + * | |
6247 | + * 1) Keeps this function from returning until all threads are out | |
6248 | + * of locking routines and locking is truely stopped. | |
6249 | + * 2) Keeps any new requests from being processed until it's unlocked | |
6250 | + * when recovery is complete. | |
6251 | + */ | |
6252 | + | |
6253 | + if (new) | |
6254 | + down_write(&ls->ls_in_recovery); | |
6255 | + | |
6256 | + clear_bit(LSFL_RESDIR_VALID, &ls->ls_flags); | |
6257 | + clear_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags); | |
6258 | + clear_bit(LSFL_NODES_VALID, &ls->ls_flags); | |
6259 | + clear_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags); | |
6260 | + | |
10d56c87 | 6261 | + dlm_recoverd_kick(ls); |
4bf12011 | 6262 | + |
6263 | + return 0; | |
6264 | +} | |
6265 | + | |
6266 | +/* | |
6267 | + * Called by CMAN on a specific ls. "start" means enable the lockspace to do | |
6268 | + * request processing which first requires that the recovery procedure be | |
6269 | + * stepped through with all nodes sharing the lockspace (nodeids). The first | |
6270 | + * start on the ls after it's created is a special case and requires some extra | |
6271 | + * work like figuring out our own local nodeid. We can't do all this in the | |
6272 | + * calling CMAN context, so we must pass this work off to the recoverd thread | |
10d56c87 | 6273 | + * which was created in dlm_init(). The recoverd thread carries out any work |
4bf12011 | 6274 | + * called for by this event. |
6275 | + */ | |
6276 | + | |
6277 | +static int dlm_ls_start(void *servicedata, uint32_t *nodeids, int count, | |
6278 | + int event_id, int type) | |
6279 | +{ | |
10d56c87 AM |
6280 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
6281 | + struct dlm_recover *rv; | |
4bf12011 | 6282 | + int error = -ENOMEM; |
6283 | + | |
10d56c87 AM |
6284 | + rv = allocate_dlm_recover(); |
6285 | + if (!rv) | |
4bf12011 | 6286 | + goto out; |
6287 | + | |
10d56c87 AM |
6288 | + rv->nodeids = nodeids; |
6289 | + rv->node_count = count; | |
6290 | + rv->event_id = event_id; | |
4bf12011 | 6291 | + |
6292 | + spin_lock(&ls->ls_recover_lock); | |
6293 | + ls->ls_last_start = event_id; | |
10d56c87 | 6294 | + list_add_tail(&rv->list, &ls->ls_recover); |
4bf12011 | 6295 | + set_bit(LSFL_LS_START, &ls->ls_flags); |
6296 | + spin_unlock(&ls->ls_recover_lock); | |
6297 | + | |
10d56c87 | 6298 | + dlm_recoverd_kick(ls); |
4bf12011 | 6299 | + error = 0; |
6300 | + | |
6301 | + out: | |
6302 | + return error; | |
6303 | +} | |
6304 | + | |
6305 | +/* | |
6306 | + * Called by CMAN on a specific ls. "finish" means that all nodes which | |
6307 | + * received a "start" have completed the start and called kcl_start_done. | |
6308 | + * The recoverd thread carries out any work called for by this event. | |
6309 | + */ | |
6310 | + | |
6311 | +static void dlm_ls_finish(void *servicedata, int event_id) | |
6312 | +{ | |
10d56c87 | 6313 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
4bf12011 | 6314 | + |
6315 | + spin_lock(&ls->ls_recover_lock); | |
6316 | + ls->ls_last_finish = event_id; | |
6317 | + set_bit(LSFL_LS_FINISH, &ls->ls_flags); | |
6318 | + spin_unlock(&ls->ls_recover_lock); | |
6319 | + | |
10d56c87 | 6320 | + dlm_recoverd_kick(ls); |
4bf12011 | 6321 | +} |
6322 | + | |
6323 | +struct kcl_service_ops ls_ops = { | |
6324 | + .stop = dlm_ls_stop, | |
6325 | + .start = dlm_ls_start, | |
6326 | + .finish = dlm_ls_finish | |
6327 | +}; | |
6328 | diff -urN linux-orig/cluster/dlm/lockspace.h linux-patched/cluster/dlm/lockspace.h | |
6329 | --- linux-orig/cluster/dlm/lockspace.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
6330 | +++ linux-patched/cluster/dlm/lockspace.h 2004-07-13 18:57:22.000000000 +0800 |
6331 | @@ -0,0 +1,27 @@ | |
4bf12011 | 6332 | +/****************************************************************************** |
6333 | +******************************************************************************* | |
6334 | +** | |
6335 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6336 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6337 | +** | |
6338 | +** This copyrighted material is made available to anyone wishing to use, | |
6339 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6340 | +** of the GNU General Public License v.2. | |
6341 | +** | |
6342 | +******************************************************************************* | |
6343 | +******************************************************************************/ | |
6344 | + | |
6345 | +#ifndef __LOCKSPACE_DOT_H__ | |
6346 | +#define __LOCKSPACE_DOT_H__ | |
6347 | + | |
6348 | +void dlm_lockspace_init(void); | |
6349 | +int dlm_init(void); | |
6350 | +int dlm_release(void); | |
6351 | +int dlm_new_lockspace(char *name, int namelen, void **ls, int flags); | |
6352 | +int dlm_release_lockspace(void *ls, int force); | |
10d56c87 AM |
6353 | +struct dlm_ls *find_lockspace_by_global_id(uint32_t id); |
6354 | +struct dlm_ls *find_lockspace_by_local_id(void *id); | |
6355 | +struct dlm_ls *find_lockspace_by_name(char *name, int namelen); | |
4bf12011 | 6356 | +void dlm_emergency_shutdown(void); |
6357 | + | |
6358 | +#endif /* __LOCKSPACE_DOT_H__ */ | |
6359 | diff -urN linux-orig/cluster/dlm/lowcomms.c linux-patched/cluster/dlm/lowcomms.c | |
6360 | --- linux-orig/cluster/dlm/lowcomms.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 6361 | +++ linux-patched/cluster/dlm/lowcomms.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 6362 | @@ -0,0 +1,1354 @@ |
6363 | +/****************************************************************************** | |
6364 | +******************************************************************************* | |
6365 | +** | |
6366 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6367 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6368 | +** | |
6369 | +** This copyrighted material is made available to anyone wishing to use, | |
6370 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6371 | +** of the GNU General Public License v.2. | |
6372 | +** | |
6373 | +******************************************************************************* | |
6374 | +******************************************************************************/ | |
6375 | + | |
6376 | +/* | |
6377 | + * lowcomms.c | |
6378 | + * | |
6379 | + * This is the "low-level" comms layer. | |
6380 | + * | |
6381 | + * It is responsible for sending/receiving messages | |
6382 | + * from other nodes in the cluster. | |
6383 | + * | |
6384 | + * Cluster nodes are referred to by their nodeids. nodeids are | |
6385 | + * simply 32 bit numbers to the locking module - if they need to | |
6386 | + * be expanded for the cluster infrastructure then that is it's | |
6387 | + * responsibility. It is this layer's | |
6388 | + * responsibility to resolve these into IP address or | |
6389 | + * whatever it needs for inter-node communication. | |
6390 | + * | |
6391 | + * The comms level is two kernel threads that deal mainly with | |
6392 | + * the receiving of messages from other nodes and passing them | |
6393 | + * up to the mid-level comms layer (which understands the | |
6394 | + * message format) for execution by the locking core, and | |
6395 | + * a send thread which does all the setting up of connections | |
6396 | + * to remote nodes and the sending of data. Threads are not allowed | |
6397 | + * to send their own data because it may cause them to wait in times | |
6398 | + * of high load. Also, this way, the sending thread can collect together | |
6399 | + * messages bound for one node and send them in one block. | |
6400 | + * | |
6401 | + * I don't see any problem with the recv thread executing the locking | |
6402 | + * code on behalf of remote processes as the locking code is | |
6403 | + * short, efficient and never waits. | |
6404 | + * | |
6405 | + */ | |
6406 | + | |
6407 | + | |
6408 | +#include <asm/ioctls.h> | |
6409 | +#include <net/sock.h> | |
6410 | +#include <net/tcp.h> | |
6411 | +#include <linux/pagemap.h> | |
6412 | +#include <cluster/cnxman.h> | |
6413 | + | |
6414 | +#include "dlm_internal.h" | |
6415 | +#include "lowcomms.h" | |
6416 | +#include "midcomms.h" | |
6417 | +#include "config.h" | |
6418 | + | |
6419 | +struct cbuf { | |
6420 | + unsigned base; | |
6421 | + unsigned len; | |
6422 | + unsigned mask; | |
6423 | +}; | |
6424 | + | |
6425 | +#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0) | |
6426 | +#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0) | |
6427 | +#define CBUF_EMPTY(cb) ((cb)->len == 0) | |
6428 | +#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1)) | |
6429 | +#define CBUF_EAT(cb, n) do { (cb)->len -= (n); \ | |
6430 | + (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0) | |
6431 | +#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask) | |
6432 | + | |
6433 | +struct connection { | |
6434 | + struct socket *sock; /* NULL if not connected */ | |
6435 | + uint32_t nodeid; /* So we know who we are in the list */ | |
6436 | + struct rw_semaphore sock_sem; /* Stop connect races */ | |
6437 | + struct list_head read_list; /* On this list when ready for reading */ | |
6438 | + struct list_head write_list; /* On this list when ready for writing */ | |
6439 | + struct list_head state_list; /* On this list when ready to connect */ | |
6440 | + unsigned long flags; /* bit 1,2 = We are on the read/write lists */ | |
6441 | +#define CF_READ_PENDING 1 | |
6442 | +#define CF_WRITE_PENDING 2 | |
6443 | +#define CF_CONNECT_PENDING 3 | |
6444 | +#define CF_IS_OTHERSOCK 4 | |
6445 | + struct list_head writequeue; /* List of outgoing writequeue_entries */ | |
6446 | + struct list_head listenlist; /* List of allocated listening sockets */ | |
6447 | + spinlock_t writequeue_lock; | |
6448 | + int (*rx_action) (struct connection *); /* What to do when active */ | |
6449 | + struct page *rx_page; | |
6450 | + struct cbuf cb; | |
6451 | + int retries; | |
6452 | +#define MAX_CONNECT_RETRIES 3 | |
6453 | + struct connection *othersock; | |
6454 | +}; | |
6455 | +#define sock2con(x) ((struct connection *)(x)->sk_user_data) | |
6456 | +#define nodeid2con(x) (&connections[(x)]) | |
6457 | + | |
6458 | +/* An entry waiting to be sent */ | |
6459 | +struct writequeue_entry { | |
6460 | + struct list_head list; | |
6461 | + struct page *page; | |
6462 | + int offset; | |
6463 | + int len; | |
6464 | + int end; | |
6465 | + int users; | |
6466 | + struct connection *con; | |
6467 | +}; | |
6468 | + | |
6469 | +/* "Template" structure for IPv4 and IPv6 used to fill | |
6470 | + * in the missing bits when converting between cman (which knows | |
6471 | + * nothing about sockaddr structs) and real life where we actually | |
6472 | + * have to connect to these addresses. Also one of these structs | |
6473 | + * will hold the cached "us" address. | |
6474 | + * | |
6475 | + * It's an in6 sockaddr just so there's enough space for anything | |
6476 | + * we're likely to see here. | |
6477 | + */ | |
6478 | +static struct sockaddr_in6 local_addr; | |
6479 | + | |
6480 | +/* Manage daemons */ | |
6481 | +static struct semaphore thread_lock; | |
6482 | +static struct completion thread_completion; | |
6483 | +static atomic_t send_run; | |
6484 | +static atomic_t recv_run; | |
6485 | + | |
6486 | +/* An array of connections, indexed by NODEID */ | |
6487 | +static struct connection *connections; | |
6488 | +static int conn_array_size; | |
6489 | +static atomic_t writequeue_length; | |
6490 | +static atomic_t accepting; | |
6491 | + | |
6492 | +static wait_queue_t lowcomms_send_waitq_head; | |
6493 | +static wait_queue_head_t lowcomms_send_waitq; | |
6494 | + | |
6495 | +static wait_queue_t lowcomms_recv_waitq_head; | |
6496 | +static wait_queue_head_t lowcomms_recv_waitq; | |
6497 | + | |
6498 | +/* List of sockets that have reads pending */ | |
6499 | +static struct list_head read_sockets; | |
6500 | +static spinlock_t read_sockets_lock; | |
6501 | + | |
6502 | +/* List of sockets which have writes pending */ | |
6503 | +static struct list_head write_sockets; | |
6504 | +static spinlock_t write_sockets_lock; | |
6505 | + | |
6506 | +/* List of sockets which have connects pending */ | |
6507 | +static struct list_head state_sockets; | |
6508 | +static spinlock_t state_sockets_lock; | |
6509 | + | |
6510 | +/* List of allocated listen sockets */ | |
6511 | +static struct list_head listen_sockets; | |
6512 | + | |
6513 | +static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr); | |
6514 | +static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len); | |
6515 | + | |
6516 | + | |
6517 | +/* Data available on socket or listen socket received a connect */ | |
6518 | +static void lowcomms_data_ready(struct sock *sk, int count_unused) | |
6519 | +{ | |
6520 | + struct connection *con = sock2con(sk); | |
6521 | + | |
6522 | + if (test_and_set_bit(CF_READ_PENDING, &con->flags)) | |
6523 | + return; | |
6524 | + | |
6525 | + spin_lock_bh(&read_sockets_lock); | |
6526 | + list_add_tail(&con->read_list, &read_sockets); | |
6527 | + spin_unlock_bh(&read_sockets_lock); | |
6528 | + | |
6529 | + wake_up_interruptible(&lowcomms_recv_waitq); | |
6530 | +} | |
6531 | + | |
6532 | +static void lowcomms_write_space(struct sock *sk) | |
6533 | +{ | |
6534 | + struct connection *con = sock2con(sk); | |
6535 | + | |
6536 | + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | |
6537 | + return; | |
6538 | + | |
6539 | + spin_lock_bh(&write_sockets_lock); | |
6540 | + list_add_tail(&con->write_list, &write_sockets); | |
6541 | + spin_unlock_bh(&write_sockets_lock); | |
6542 | + | |
6543 | + wake_up_interruptible(&lowcomms_send_waitq); | |
6544 | +} | |
6545 | + | |
6546 | +static inline void lowcomms_connect_sock(struct connection *con) | |
6547 | +{ | |
6548 | + if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | |
6549 | + return; | |
6550 | + if (!atomic_read(&accepting)) | |
6551 | + return; | |
6552 | + | |
6553 | + spin_lock_bh(&state_sockets_lock); | |
6554 | + list_add_tail(&con->state_list, &state_sockets); | |
6555 | + spin_unlock_bh(&state_sockets_lock); | |
6556 | + | |
6557 | + wake_up_interruptible(&lowcomms_send_waitq); | |
6558 | +} | |
6559 | + | |
6560 | +static void lowcomms_state_change(struct sock *sk) | |
6561 | +{ | |
6562 | +/* struct connection *con = sock2con(sk); */ | |
6563 | + | |
6564 | + switch (sk->sk_state) { | |
6565 | + case TCP_ESTABLISHED: | |
6566 | + lowcomms_write_space(sk); | |
6567 | + break; | |
6568 | + | |
6569 | + case TCP_FIN_WAIT1: | |
6570 | + case TCP_FIN_WAIT2: | |
6571 | + case TCP_TIME_WAIT: | |
6572 | + case TCP_CLOSE: | |
6573 | + case TCP_CLOSE_WAIT: | |
6574 | + case TCP_LAST_ACK: | |
6575 | + case TCP_CLOSING: | |
6576 | + /* FIXME: I think this causes more trouble than it solves. | |
6577 | + lowcomms wil reconnect anyway when there is something to | |
6578 | + send. This just attempts reconnection if a node goes down! | |
6579 | + */ | |
6580 | + /* lowcomms_connect_sock(con); */ | |
6581 | + break; | |
6582 | + | |
6583 | + default: | |
6584 | + printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state); | |
6585 | + break; | |
6586 | + } | |
6587 | +} | |
6588 | + | |
6589 | +/* Make a socket active */ | |
6590 | +static int add_sock(struct socket *sock, struct connection *con) | |
6591 | +{ | |
6592 | + con->sock = sock; | |
6593 | + | |
6594 | + /* Install a data_ready callback */ | |
6595 | + con->sock->sk->sk_data_ready = lowcomms_data_ready; | |
6596 | + con->sock->sk->sk_write_space = lowcomms_write_space; | |
6597 | + con->sock->sk->sk_state_change = lowcomms_state_change; | |
6598 | + | |
6599 | + return 0; | |
6600 | +} | |
6601 | + | |
6602 | +/* Add the port number to an IP6 or 4 sockaddr and return the address | |
6603 | + length */ | |
6604 | +static void make_sockaddr(struct sockaddr_in6 *saddr, uint16_t port, | |
6605 | + int *addr_len) | |
6606 | +{ | |
6607 | + saddr->sin6_family = local_addr.sin6_family; | |
6608 | + if (local_addr.sin6_family == AF_INET) { | |
6609 | + struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; | |
6610 | + in4_addr->sin_port = cpu_to_be16(port); | |
6611 | + *addr_len = sizeof(struct sockaddr_in); | |
6612 | + } | |
6613 | + else { | |
6614 | + saddr->sin6_port = cpu_to_be16(port); | |
6615 | + *addr_len = sizeof(struct sockaddr_in6); | |
6616 | + } | |
6617 | +} | |
6618 | + | |
6619 | +/* Close a remote connection and tidy up */ | |
6620 | +static void close_connection(struct connection *con) | |
6621 | +{ | |
6622 | + if (test_bit(CF_IS_OTHERSOCK, &con->flags)) | |
6623 | + return; | |
6624 | + | |
6625 | + down_write(&con->sock_sem); | |
6626 | + | |
6627 | + if (con->sock) { | |
6628 | + sock_release(con->sock); | |
6629 | + con->sock = NULL; | |
6630 | + if (con->othersock) { | |
6631 | + down_write(&con->othersock->sock_sem); | |
6632 | + sock_release(con->othersock->sock); | |
6633 | + con->othersock->sock = NULL; | |
6634 | + up_write(&con->othersock->sock_sem); | |
6635 | + kfree(con->othersock); | |
6636 | + con->othersock = NULL; | |
6637 | + } | |
6638 | + } | |
6639 | + if (con->rx_page) { | |
6640 | + __free_page(con->rx_page); | |
6641 | + con->rx_page = NULL; | |
6642 | + } | |
6643 | + up_write(&con->sock_sem); | |
6644 | +} | |
6645 | + | |
6646 | +/* Data received from remote end */ | |
6647 | +static int receive_from_sock(struct connection *con) | |
6648 | +{ | |
6649 | + int ret = 0; | |
6650 | + struct msghdr msg; | |
6651 | + struct iovec iov[2]; | |
6652 | + mm_segment_t fs; | |
6653 | + unsigned len; | |
6654 | + int r; | |
6655 | + int call_again_soon = 0; | |
6656 | + | |
6657 | + down_read(&con->sock_sem); | |
6658 | + | |
6659 | + if (con->sock == NULL) | |
6660 | + goto out; | |
6661 | + if (con->rx_page == NULL) { | |
6662 | + /* | |
6663 | + * This doesn't need to be atomic, but I think it should | |
6664 | + * improve performance if it is. | |
6665 | + */ | |
6666 | + con->rx_page = alloc_page(GFP_ATOMIC); | |
6667 | + if (con->rx_page == NULL) | |
6668 | + goto out_resched; | |
6669 | + CBUF_INIT(&con->cb, PAGE_CACHE_SIZE); | |
6670 | + } | |
6671 | + /* | |
6672 | + * To avoid doing too many short reads, we will reschedule for another | |
6673 | + * another time if there are less than 32 bytes left in the buffer. | |
6674 | + */ | |
6675 | + if (!CBUF_MAY_ADD(&con->cb, 32)) | |
6676 | + goto out_resched; | |
6677 | + | |
6678 | + msg.msg_control = NULL; | |
6679 | + msg.msg_controllen = 0; | |
6680 | + msg.msg_iovlen = 1; | |
6681 | + msg.msg_iov = iov; | |
6682 | + msg.msg_name = NULL; | |
6683 | + msg.msg_namelen = 0; | |
6684 | + msg.msg_flags = 0; | |
6685 | + | |
6686 | + /* | |
6687 | + * iov[0] is the bit of the circular buffer between the current end | |
6688 | + * point (cb.base + cb.len) and the end of the buffer. | |
6689 | + */ | |
6690 | + iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb); | |
6691 | + iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb); | |
6692 | + iov[1].iov_len = 0; | |
6693 | + | |
6694 | + /* | |
6695 | + * iov[1] is the bit of the circular buffer between the start of the | |
6696 | + * buffer and the start of the currently used section (cb.base) | |
6697 | + */ | |
6698 | + if (CBUF_DATA(&con->cb) >= con->cb.base) { | |
6699 | + iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb); | |
6700 | + iov[1].iov_len = con->cb.base; | |
6701 | + iov[1].iov_base = page_address(con->rx_page); | |
6702 | + msg.msg_iovlen = 2; | |
6703 | + } | |
6704 | + len = iov[0].iov_len + iov[1].iov_len; | |
6705 | + | |
6706 | + fs = get_fs(); | |
6707 | + set_fs(get_ds()); | |
6708 | + r = ret = sock_recvmsg(con->sock, &msg, len, | |
6709 | + MSG_DONTWAIT | MSG_NOSIGNAL); | |
6710 | + set_fs(fs); | |
6711 | + | |
6712 | + if (ret <= 0) | |
6713 | + goto out_close; | |
6714 | + if (ret == len) | |
6715 | + call_again_soon = 1; | |
6716 | + CBUF_ADD(&con->cb, ret); | |
6717 | + ret = midcomms_process_incoming_buffer(con->nodeid, | |
6718 | + page_address(con->rx_page), | |
6719 | + con->cb.base, con->cb.len, | |
6720 | + PAGE_CACHE_SIZE); | |
6721 | + if (ret == -EBADMSG) { | |
6722 | + printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, " | |
6723 | + "iov_len=%u, iov_base[0]=%p, read=%d\n", | |
6724 | + page_address(con->rx_page), con->cb.base, con->cb.len, | |
6725 | + len, iov[0].iov_base, r); | |
6726 | + } | |
6727 | + if (ret < 0) | |
6728 | + goto out_close; | |
6729 | + CBUF_EAT(&con->cb, ret); | |
6730 | + | |
6731 | + if (CBUF_EMPTY(&con->cb) && !call_again_soon) { | |
6732 | + __free_page(con->rx_page); | |
6733 | + con->rx_page = NULL; | |
6734 | + } | |
6735 | + out: | |
6736 | + if (call_again_soon) | |
6737 | + goto out_resched; | |
6738 | + up_read(&con->sock_sem); | |
6739 | + ret = 0; | |
6740 | + goto out_ret; | |
6741 | + | |
6742 | + out_resched: | |
6743 | + lowcomms_data_ready(con->sock->sk, 0); | |
6744 | + up_read(&con->sock_sem); | |
6745 | + ret = 0; | |
6746 | + goto out_ret; | |
6747 | + | |
6748 | + out_close: | |
6749 | + up_read(&con->sock_sem); | |
6750 | + if (ret != -EAGAIN && !test_bit(CF_IS_OTHERSOCK, &con->flags)) { | |
6751 | + close_connection(con); | |
6752 | + lowcomms_connect_sock(con); | |
6753 | + } | |
6754 | + | |
6755 | + out_ret: | |
6756 | + return ret; | |
6757 | +} | |
6758 | + | |
6759 | +/* Listening socket is busy, accept a connection */ | |
6760 | +static int accept_from_sock(struct connection *con) | |
6761 | +{ | |
6762 | + int result; | |
6763 | + struct sockaddr_in6 peeraddr; | |
6764 | + struct socket *newsock; | |
6765 | + int len; | |
6766 | + int nodeid; | |
6767 | + struct connection *newcon; | |
6768 | + | |
6769 | + memset(&peeraddr, 0, sizeof(peeraddr)); | |
6770 | + newsock = sock_alloc(); | |
6771 | + if (!newsock) | |
6772 | + return -ENOMEM; | |
6773 | + | |
6774 | + down_read(&con->sock_sem); | |
6775 | + | |
6776 | + result = -ENOTCONN; | |
6777 | + if (con->sock == NULL) | |
6778 | + goto accept_err; | |
6779 | + | |
6780 | + newsock->type = con->sock->type; | |
6781 | + newsock->ops = con->sock->ops; | |
6782 | + | |
6783 | + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); | |
6784 | + if (result < 0) | |
6785 | + goto accept_err; | |
6786 | + | |
6787 | + /* Get the connected socket's peer */ | |
6788 | + if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, | |
6789 | + &len, 2)) { | |
6790 | + result = -ECONNABORTED; | |
6791 | + goto accept_err; | |
6792 | + } | |
6793 | + | |
6794 | + /* Get the new node's NODEID */ | |
6795 | + nodeid = lowcomms_nodeid_from_ipaddr((struct sockaddr *)&peeraddr, len); | |
6796 | + if (nodeid == 0) { | |
6797 | + printk("dlm: connect from non cluster node\n"); | |
6798 | + sock_release(newsock); | |
6799 | + up_read(&con->sock_sem); | |
6800 | + return -1; | |
6801 | + } | |
6802 | + | |
6803 | + log_print("got connection from %d", nodeid); | |
6804 | + | |
6805 | + /* Check to see if we already have a connection to this node. This | |
6806 | + * could happen if the two nodes initiate a connection at roughly | |
6807 | + * the same time and the connections cross on the wire. | |
6808 | + * TEMPORARY FIX: | |
6809 | + * In this case we store the incoming one in "othersock" | |
6810 | + */ | |
6811 | + newcon = nodeid2con(nodeid); | |
6812 | + down_write(&newcon->sock_sem); | |
6813 | + if (newcon->sock) { | |
6814 | + struct connection *othercon; | |
6815 | + | |
6816 | + othercon = kmalloc(sizeof(struct connection), GFP_KERNEL); | |
6817 | + if (!othercon) { | |
6818 | + printk("dlm: failed to allocate incoming socket\n"); | |
6819 | + sock_release(newsock); | |
6820 | + up_write(&newcon->sock_sem); | |
6821 | + up_read(&con->sock_sem); | |
6822 | + goto accept_out; | |
6823 | + } | |
6824 | + memset(othercon, 0, sizeof(*othercon)); | |
6825 | + newcon->othersock = othercon; | |
6826 | + othercon->nodeid = nodeid; | |
6827 | + othercon->sock = newsock; | |
6828 | + othercon->rx_action = receive_from_sock; | |
6829 | + add_sock(newsock, othercon); | |
6830 | + init_rwsem(&othercon->sock_sem); | |
6831 | + set_bit(CF_IS_OTHERSOCK, &othercon->flags); | |
6832 | + newsock->sk->sk_user_data = othercon; | |
6833 | + | |
6834 | + up_write(&newcon->sock_sem); | |
6835 | + lowcomms_data_ready(newsock->sk, 0); | |
6836 | + up_read(&con->sock_sem); | |
6837 | + goto accept_out; | |
6838 | + } | |
6839 | + | |
6840 | + newsock->sk->sk_user_data = newcon; | |
6841 | + newcon->rx_action = receive_from_sock; | |
6842 | + add_sock(newsock, newcon); | |
6843 | + up_write(&newcon->sock_sem); | |
6844 | + | |
6845 | + /* | |
6846 | + * Add it to the active queue in case we got data | |
6847 | + * beween processing the accept adding the socket | |
6848 | + * to the read_sockets list | |
6849 | + */ | |
6850 | + lowcomms_data_ready(newsock->sk, 0); | |
6851 | + | |
6852 | + up_read(&con->sock_sem); | |
6853 | + | |
6854 | + accept_out: | |
6855 | + return 0; | |
6856 | + | |
6857 | + accept_err: | |
6858 | + up_read(&con->sock_sem); | |
6859 | + sock_release(newsock); | |
6860 | + | |
6861 | + printk("dlm: error accepting connection from node: %d\n", result); | |
6862 | + return result; | |
6863 | +} | |
6864 | + | |
6865 | +/* Connect a new socket to its peer */ | |
6866 | +static int connect_to_sock(struct connection *con) | |
6867 | +{ | |
6868 | + int result = -EHOSTUNREACH; | |
6869 | + struct sockaddr_in6 saddr; | |
6870 | + int addr_len; | |
6871 | + struct socket *sock; | |
6872 | + | |
6873 | + if (con->nodeid == 0) { | |
6874 | + log_print("attempt to connect sock 0 foiled"); | |
6875 | + return 0; | |
6876 | + } | |
6877 | + | |
6878 | + down_write(&con->sock_sem); | |
6879 | + if (con->retries++ > MAX_CONNECT_RETRIES) | |
6880 | + goto out; | |
6881 | + | |
6882 | + // FIXME not sure this should happen, let alone like this. | |
6883 | + if (con->sock) { | |
6884 | + sock_release(con->sock); | |
6885 | + con->sock = NULL; | |
6886 | + } | |
6887 | + | |
6888 | + /* Create a socket to communicate with */ | |
6889 | + result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock); | |
6890 | + if (result < 0) | |
6891 | + goto out_err; | |
6892 | + | |
6893 | + if (lowcomms_ipaddr_from_nodeid(con->nodeid, (struct sockaddr *)&saddr) < 0) | |
6894 | + goto out_err; | |
6895 | + | |
6896 | + sock->sk->sk_user_data = con; | |
6897 | + con->rx_action = receive_from_sock; | |
6898 | + | |
6899 | + make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); | |
6900 | + | |
6901 | + add_sock(sock, con); | |
6902 | + result = | |
6903 | + sock->ops->connect(sock, (struct sockaddr *) &saddr, addr_len, | |
6904 | + O_NONBLOCK); | |
6905 | + if (result == -EINPROGRESS) | |
6906 | + result = 0; | |
6907 | + if (result != 0) | |
6908 | + goto out_err; | |
6909 | + | |
6910 | + out: | |
6911 | + up_write(&con->sock_sem); | |
6912 | + /* | |
6913 | + * Returning an error here means we've given up trying to connect to | |
6914 | + * a remote node, otherwise we return 0 and reschedule the connetion | |
6915 | + * attempt | |
6916 | + */ | |
6917 | + return result; | |
6918 | + | |
6919 | + out_err: | |
6920 | + if (con->sock) { | |
6921 | + sock_release(con->sock); | |
6922 | + con->sock = NULL; | |
6923 | + } | |
6924 | + /* | |
6925 | + * Some errors are fatal and this list might need adjusting. For other | |
6926 | + * errors we try again until the max number of retries is reached. | |
6927 | + */ | |
6928 | + if (result != -EHOSTUNREACH && result != -ENETUNREACH && | |
6929 | + result != -ENETDOWN && result != EINVAL | |
6930 | + && result != -EPROTONOSUPPORT) { | |
6931 | + lowcomms_connect_sock(con); | |
6932 | + result = 0; | |
6933 | + } | |
6934 | + goto out; | |
6935 | +} | |
6936 | + | |
6937 | +static struct socket *create_listen_sock(struct connection *con, char *addr, int addr_len) | |
6938 | +{ | |
6939 | + struct socket *sock = NULL; | |
6940 | + mm_segment_t fs; | |
6941 | + int result = 0; | |
6942 | + int one = 1; | |
6943 | + struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr; | |
6944 | + | |
6945 | + /* Create a socket to communicate with */ | |
6946 | + result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock); | |
6947 | + if (result < 0) { | |
6948 | + printk("dlm: Can't create listening comms socket\n"); | |
6949 | + goto create_out; | |
6950 | + } | |
6951 | + | |
6952 | + fs = get_fs(); | |
6953 | + set_fs(get_ds()); | |
6954 | + result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); | |
6955 | + set_fs(fs); | |
6956 | + if (result < 0) { | |
6957 | + printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result); | |
6958 | + } | |
6959 | + sock->sk->sk_user_data = con; | |
6960 | + con->rx_action = accept_from_sock; | |
6961 | + con->sock = sock; | |
6962 | + | |
6963 | + /* Bind to our port */ | |
6964 | + make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); | |
6965 | + result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | |
6966 | + if (result < 0) { | |
6967 | + printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); | |
6968 | + sock_release(sock); | |
6969 | + sock = NULL; | |
6970 | + goto create_out; | |
6971 | + } | |
6972 | + | |
6973 | + fs = get_fs(); | |
6974 | + set_fs(get_ds()); | |
6975 | + | |
6976 | + result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one)); | |
6977 | + set_fs(fs); | |
6978 | + if (result < 0) { | |
6979 | + printk("dlm: Set keepalive failed: %d\n", result); | |
6980 | + } | |
6981 | + | |
6982 | + result = sock->ops->listen(sock, 5); | |
6983 | + if (result < 0) { | |
6984 | + printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); | |
6985 | + sock_release(sock); | |
6986 | + sock = NULL; | |
6987 | + goto create_out; | |
6988 | + } | |
6989 | + | |
6990 | + create_out: | |
6991 | + return sock; | |
6992 | +} | |
6993 | + | |
6994 | + | |
6995 | +/* Listen on all interfaces */ | |
6996 | +static int listen_for_all(void) | |
6997 | +{ | |
6998 | + int result = 0; | |
6999 | + int nodeid; | |
7000 | + struct socket *sock = NULL; | |
7001 | + struct list_head *addr_list; | |
7002 | + struct connection *con = nodeid2con(0); | |
7003 | + struct cluster_node_addr *node_addr; | |
7004 | + char local_addr[sizeof(struct sockaddr_in6)]; | |
7005 | + | |
7006 | + /* This will also fill in local_addr */ | |
7007 | + nodeid = lowcomms_our_nodeid(); | |
7008 | + | |
7009 | + addr_list = kcl_get_node_addresses(nodeid); | |
7010 | + if (!addr_list) { | |
7011 | + printk("dlm: cannot initialise comms layer\n"); | |
7012 | + result = -ENOTCONN; | |
7013 | + goto create_out; | |
7014 | + } | |
7015 | + | |
7016 | + list_for_each_entry(node_addr, addr_list, list) { | |
7017 | + | |
7018 | + if (!con) { | |
7019 | + con = kmalloc(sizeof(struct connection), GFP_KERNEL); | |
7020 | + if (!con) { | |
7021 | + printk("dlm: failed to allocate listen socket\n"); | |
7022 | + goto create_out; | |
7023 | + } | |
7024 | + memset(con, 0, sizeof(*con)); | |
7025 | + init_rwsem(&con->sock_sem); | |
7026 | + spin_lock_init(&con->writequeue_lock); | |
7027 | + INIT_LIST_HEAD(&con->writequeue); | |
7028 | + set_bit(CF_IS_OTHERSOCK, &con->flags); | |
7029 | + } | |
7030 | + | |
7031 | + memcpy(local_addr, node_addr->addr, node_addr->addr_len); | |
7032 | + sock = create_listen_sock(con, local_addr, | |
7033 | + node_addr->addr_len); | |
7034 | + if (sock) { | |
7035 | + add_sock(sock, con); | |
7036 | + } | |
7037 | + else { | |
7038 | + kfree(con); | |
7039 | + } | |
7040 | + | |
7041 | + /* Keep a list of dynamically allocated listening sockets | |
7042 | + so we can free them at shutdown */ | |
7043 | + if (test_bit(CF_IS_OTHERSOCK, &con->flags)) { | |
7044 | + list_add_tail(&con->listenlist, &listen_sockets); | |
7045 | + } | |
7046 | + con = NULL; | |
7047 | + } | |
7048 | + | |
7049 | + create_out: | |
7050 | + return result; | |
7051 | +} | |
7052 | + | |
7053 | + | |
7054 | + | |
7055 | +static struct writequeue_entry *new_writequeue_entry(struct connection *con, | |
7056 | + int allocation) | |
7057 | +{ | |
7058 | + struct writequeue_entry *entry; | |
7059 | + | |
7060 | + entry = kmalloc(sizeof(struct writequeue_entry), allocation); | |
7061 | + if (!entry) | |
7062 | + return NULL; | |
7063 | + | |
7064 | + entry->page = alloc_page(allocation); | |
7065 | + if (!entry->page) { | |
7066 | + kfree(entry); | |
7067 | + return NULL; | |
7068 | + } | |
7069 | + | |
7070 | + entry->offset = 0; | |
7071 | + entry->len = 0; | |
7072 | + entry->end = 0; | |
7073 | + entry->users = 0; | |
7074 | + entry->con = con; | |
7075 | + | |
7076 | + return entry; | |
7077 | +} | |
7078 | + | |
7079 | +struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len, | |
7080 | + int allocation, char **ppc) | |
7081 | +{ | |
7082 | + struct connection *con = nodeid2con(nodeid); | |
7083 | + struct writequeue_entry *e; | |
7084 | + int offset = 0; | |
7085 | + int users = 0; | |
7086 | + | |
7087 | + if (!atomic_read(&accepting)) | |
7088 | + return NULL; | |
7089 | + | |
7090 | + spin_lock(&con->writequeue_lock); | |
7091 | + e = list_entry(con->writequeue.prev, struct writequeue_entry, list); | |
7092 | + if (((struct list_head *) e == &con->writequeue) || | |
7093 | + (PAGE_CACHE_SIZE - e->end < len)) { | |
7094 | + e = NULL; | |
7095 | + } else { | |
7096 | + offset = e->end; | |
7097 | + e->end += len; | |
7098 | + users = e->users++; | |
7099 | + } | |
7100 | + spin_unlock(&con->writequeue_lock); | |
7101 | + | |
7102 | + if (e) { | |
7103 | + got_one: | |
7104 | + if (users == 0) | |
7105 | + kmap(e->page); | |
7106 | + *ppc = page_address(e->page) + offset; | |
7107 | + return e; | |
7108 | + } | |
7109 | + | |
7110 | + e = new_writequeue_entry(con, allocation); | |
7111 | + if (e) { | |
7112 | + spin_lock(&con->writequeue_lock); | |
7113 | + offset = e->end; | |
7114 | + e->end += len; | |
7115 | + users = e->users++; | |
7116 | + list_add_tail(&e->list, &con->writequeue); | |
7117 | + spin_unlock(&con->writequeue_lock); | |
7118 | + atomic_inc(&writequeue_length); | |
7119 | + goto got_one; | |
7120 | + } | |
7121 | + return NULL; | |
7122 | +} | |
7123 | + | |
7124 | +void lowcomms_commit_buffer(struct writequeue_entry *e) | |
7125 | +{ | |
7126 | + struct connection *con = e->con; | |
7127 | + int users; | |
7128 | + | |
7129 | + if (!atomic_read(&accepting)) | |
7130 | + return; | |
7131 | + | |
7132 | + spin_lock(&con->writequeue_lock); | |
7133 | + users = --e->users; | |
7134 | + if (users) | |
7135 | + goto out; | |
7136 | + e->len = e->end - e->offset; | |
7137 | + kunmap(e->page); | |
7138 | + spin_unlock(&con->writequeue_lock); | |
7139 | + | |
7140 | + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { | |
7141 | + spin_lock_bh(&write_sockets_lock); | |
7142 | + list_add_tail(&con->write_list, &write_sockets); | |
7143 | + spin_unlock_bh(&write_sockets_lock); | |
7144 | + | |
7145 | + wake_up_interruptible(&lowcomms_send_waitq); | |
7146 | + } | |
7147 | + return; | |
7148 | + | |
7149 | + out: | |
7150 | + spin_unlock(&con->writequeue_lock); | |
7151 | + return; | |
7152 | +} | |
7153 | + | |
7154 | +static void free_entry(struct writequeue_entry *e) | |
7155 | +{ | |
7156 | + __free_page(e->page); | |
7157 | + kfree(e); | |
7158 | + atomic_dec(&writequeue_length); | |
7159 | +} | |
7160 | + | |
7161 | +/* Send a message */ | |
7162 | +static int send_to_sock(struct connection *con) | |
7163 | +{ | |
7164 | + int ret = 0; | |
7165 | + ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); | |
7166 | + const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | |
7167 | + struct writequeue_entry *e; | |
7168 | + int len, offset; | |
7169 | + | |
7170 | + down_read(&con->sock_sem); | |
7171 | + if (con->sock == NULL) | |
7172 | + goto out_connect; | |
7173 | + | |
7174 | + sendpage = con->sock->ops->sendpage; | |
7175 | + | |
7176 | + spin_lock(&con->writequeue_lock); | |
7177 | + for (;;) { | |
7178 | + e = list_entry(con->writequeue.next, struct writequeue_entry, | |
7179 | + list); | |
7180 | + if ((struct list_head *) e == &con->writequeue) | |
7181 | + break; | |
7182 | + | |
7183 | + len = e->len; | |
7184 | + offset = e->offset; | |
7185 | + BUG_ON(len == 0 && e->users == 0); | |
7186 | + spin_unlock(&con->writequeue_lock); | |
7187 | + | |
7188 | + ret = 0; | |
7189 | + if (len) { | |
7190 | + ret = sendpage(con->sock, e->page, offset, len, | |
7191 | + msg_flags); | |
7192 | + if (ret == -EAGAIN || ret == 0) | |
7193 | + goto out; | |
7194 | + if (ret <= 0) | |
7195 | + goto send_error; | |
7196 | + } | |
7197 | + | |
7198 | + spin_lock(&con->writequeue_lock); | |
7199 | + e->offset += ret; | |
7200 | + e->len -= ret; | |
7201 | + | |
7202 | + if (e->len == 0 && e->users == 0) { | |
7203 | + list_del(&e->list); | |
7204 | + free_entry(e); | |
7205 | + continue; | |
7206 | + } | |
7207 | + } | |
7208 | + spin_unlock(&con->writequeue_lock); | |
7209 | + out: | |
7210 | + up_read(&con->sock_sem); | |
7211 | + return ret; | |
7212 | + | |
7213 | + send_error: | |
7214 | + up_read(&con->sock_sem); | |
7215 | + close_connection(con); | |
7216 | + lowcomms_connect_sock(con); | |
7217 | + return ret; | |
7218 | + | |
7219 | + out_connect: | |
7220 | + up_read(&con->sock_sem); | |
7221 | + lowcomms_connect_sock(con); | |
7222 | + return 0; | |
7223 | +} | |
7224 | + | |
7225 | +/* Called from recoverd when it knows that a node has | |
7226 | + left the cluster */ | |
7227 | +int lowcomms_close(int nodeid) | |
7228 | +{ | |
7229 | + struct connection *con; | |
7230 | + | |
7231 | + if (!connections) | |
7232 | + goto out; | |
7233 | + | |
7234 | + con = nodeid2con(nodeid); | |
7235 | + if (con->sock) { | |
7236 | + close_connection(con); | |
7237 | + return 0; | |
7238 | + } | |
7239 | + | |
7240 | + out: | |
7241 | + return -1; | |
7242 | +} | |
7243 | + | |
7244 | +/* API send message call, may queue the request */ | |
7245 | +/* N.B. This is the old interface - use the new one for new calls */ | |
7246 | +int lowcomms_send_message(int nodeid, char *buf, int len, int allocation) | |
7247 | +{ | |
7248 | + struct writequeue_entry *e; | |
7249 | + char *b; | |
7250 | + | |
10d56c87 | 7251 | + DLM_ASSERT(nodeid < dlm_config.max_connections, |
4bf12011 | 7252 | + printk("nodeid=%u\n", nodeid);); |
7253 | + | |
7254 | + e = lowcomms_get_buffer(nodeid, len, allocation, &b); | |
7255 | + if (e) { | |
7256 | + memcpy(b, buf, len); | |
7257 | + lowcomms_commit_buffer(e); | |
7258 | + return 0; | |
7259 | + } | |
7260 | + return -ENOBUFS; | |
7261 | +} | |
7262 | + | |
7263 | +/* Look for activity on active sockets */ | |
7264 | +static void process_sockets(void) | |
7265 | +{ | |
7266 | + struct list_head *list; | |
7267 | + struct list_head *temp; | |
7268 | + | |
7269 | + spin_lock_bh(&read_sockets_lock); | |
7270 | + list_for_each_safe(list, temp, &read_sockets) { | |
7271 | + struct connection *con = | |
7272 | + list_entry(list, struct connection, read_list); | |
7273 | + list_del(&con->read_list); | |
7274 | + clear_bit(CF_READ_PENDING, &con->flags); | |
7275 | + | |
7276 | + spin_unlock_bh(&read_sockets_lock); | |
7277 | + | |
7278 | + con->rx_action(con); | |
7279 | + | |
7280 | + /* Don't starve out everyone else */ | |
7281 | + schedule(); | |
7282 | + spin_lock_bh(&read_sockets_lock); | |
7283 | + } | |
7284 | + spin_unlock_bh(&read_sockets_lock); | |
7285 | +} | |
7286 | + | |
7287 | +/* Try to send any messages that are pending | |
7288 | + */ | |
7289 | +static void process_output_queue(void) | |
7290 | +{ | |
7291 | + struct list_head *list; | |
7292 | + struct list_head *temp; | |
7293 | + int ret; | |
7294 | + | |
7295 | + spin_lock_bh(&write_sockets_lock); | |
7296 | + list_for_each_safe(list, temp, &write_sockets) { | |
7297 | + struct connection *con = | |
7298 | + list_entry(list, struct connection, write_list); | |
7299 | + list_del(&con->write_list); | |
7300 | + clear_bit(CF_WRITE_PENDING, &con->flags); | |
7301 | + | |
7302 | + spin_unlock_bh(&write_sockets_lock); | |
7303 | + | |
7304 | + ret = send_to_sock(con); | |
7305 | + if (ret < 0) { | |
7306 | + } | |
7307 | + spin_lock_bh(&write_sockets_lock); | |
7308 | + } | |
7309 | + spin_unlock_bh(&write_sockets_lock); | |
7310 | +} | |
7311 | + | |
7312 | +static void process_state_queue(void) | |
7313 | +{ | |
7314 | + struct list_head *list; | |
7315 | + struct list_head *temp; | |
7316 | + int ret; | |
7317 | + | |
7318 | + spin_lock_bh(&state_sockets_lock); | |
7319 | + list_for_each_safe(list, temp, &state_sockets) { | |
7320 | + struct connection *con = | |
7321 | + list_entry(list, struct connection, state_list); | |
7322 | + list_del(&con->state_list); | |
7323 | + clear_bit(CF_CONNECT_PENDING, &con->flags); | |
7324 | + spin_unlock_bh(&state_sockets_lock); | |
7325 | + | |
7326 | + ret = connect_to_sock(con); | |
7327 | + if (ret < 0) { | |
7328 | + } | |
7329 | + spin_lock_bh(&state_sockets_lock); | |
7330 | + } | |
7331 | + spin_unlock_bh(&state_sockets_lock); | |
7332 | +} | |
7333 | + | |
7334 | +/* Discard all entries on the write queues */ | |
7335 | +static void clean_writequeues(void) | |
7336 | +{ | |
7337 | + struct list_head *list; | |
7338 | + struct list_head *temp; | |
7339 | + int nodeid; | |
7340 | + | |
7341 | + for (nodeid = 1; nodeid < dlm_config.max_connections; nodeid++) { | |
7342 | + struct connection *con = nodeid2con(nodeid); | |
7343 | + | |
7344 | + spin_lock(&con->writequeue_lock); | |
7345 | + list_for_each_safe(list, temp, &con->writequeue) { | |
7346 | + struct writequeue_entry *e = | |
7347 | + list_entry(list, struct writequeue_entry, list); | |
7348 | + list_del(&e->list); | |
7349 | + free_entry(e); | |
7350 | + } | |
7351 | + spin_unlock(&con->writequeue_lock); | |
7352 | + } | |
7353 | +} | |
7354 | + | |
7355 | +static int read_list_empty(void) | |
7356 | +{ | |
7357 | + int status; | |
7358 | + | |
7359 | + spin_lock_bh(&read_sockets_lock); | |
7360 | + status = list_empty(&read_sockets); | |
7361 | + spin_unlock_bh(&read_sockets_lock); | |
7362 | + | |
7363 | + return status; | |
7364 | +} | |
7365 | + | |
7366 | +/* DLM Transport comms receive daemon */ | |
7367 | +static int dlm_recvd(void *data) | |
7368 | +{ | |
7369 | + daemonize("dlm_recvd"); | |
7370 | + atomic_set(&recv_run, 1); | |
7371 | + | |
7372 | + init_waitqueue_head(&lowcomms_recv_waitq); | |
7373 | + init_waitqueue_entry(&lowcomms_recv_waitq_head, current); | |
7374 | + add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); | |
7375 | + | |
7376 | + complete(&thread_completion); | |
7377 | + | |
7378 | + while (atomic_read(&recv_run)) { | |
7379 | + | |
7380 | + set_task_state(current, TASK_INTERRUPTIBLE); | |
7381 | + | |
7382 | + if (read_list_empty()) | |
7383 | + schedule(); | |
7384 | + | |
7385 | + set_task_state(current, TASK_RUNNING); | |
7386 | + | |
7387 | + process_sockets(); | |
7388 | + } | |
7389 | + | |
7390 | + down(&thread_lock); | |
7391 | + up(&thread_lock); | |
7392 | + | |
7393 | + complete(&thread_completion); | |
7394 | + | |
7395 | + return 0; | |
7396 | +} | |
7397 | + | |
7398 | +static int write_and_state_lists_empty(void) | |
7399 | +{ | |
7400 | + int status; | |
7401 | + | |
7402 | + spin_lock_bh(&write_sockets_lock); | |
7403 | + status = list_empty(&write_sockets); | |
7404 | + spin_unlock_bh(&write_sockets_lock); | |
7405 | + | |
7406 | + spin_lock_bh(&state_sockets_lock); | |
7407 | + if (list_empty(&state_sockets) == 0) | |
7408 | + status = 0; | |
7409 | + spin_unlock_bh(&state_sockets_lock); | |
7410 | + | |
7411 | + return status; | |
7412 | +} | |
7413 | + | |
7414 | +/* DLM Transport send daemon */ | |
7415 | +static int dlm_sendd(void *data) | |
7416 | +{ | |
7417 | + daemonize("dlm_sendd"); | |
7418 | + atomic_set(&send_run, 1); | |
7419 | + | |
7420 | + init_waitqueue_head(&lowcomms_send_waitq); | |
7421 | + init_waitqueue_entry(&lowcomms_send_waitq_head, current); | |
7422 | + add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); | |
7423 | + | |
7424 | + complete(&thread_completion); | |
7425 | + | |
7426 | + while (atomic_read(&send_run)) { | |
7427 | + | |
7428 | + set_task_state(current, TASK_INTERRUPTIBLE); | |
7429 | + | |
7430 | + if (write_and_state_lists_empty()) | |
7431 | + schedule(); | |
7432 | + | |
7433 | + set_task_state(current, TASK_RUNNING); | |
7434 | + | |
7435 | + process_state_queue(); | |
7436 | + process_output_queue(); | |
7437 | + } | |
7438 | + | |
7439 | + down(&thread_lock); | |
7440 | + up(&thread_lock); | |
7441 | + | |
7442 | + complete(&thread_completion); | |
7443 | + | |
7444 | + return 0; | |
7445 | +} | |
7446 | + | |
7447 | +static void daemons_stop(void) | |
7448 | +{ | |
7449 | + if (atomic_read(&recv_run)) { | |
7450 | + down(&thread_lock); | |
7451 | + atomic_set(&recv_run, 0); | |
7452 | + wake_up_interruptible(&lowcomms_recv_waitq); | |
7453 | + up(&thread_lock); | |
7454 | + wait_for_completion(&thread_completion); | |
7455 | + } | |
7456 | + | |
7457 | + if (atomic_read(&send_run)) { | |
7458 | + down(&thread_lock); | |
7459 | + atomic_set(&send_run, 0); | |
7460 | + wake_up_interruptible(&lowcomms_send_waitq); | |
7461 | + up(&thread_lock); | |
7462 | + wait_for_completion(&thread_completion); | |
7463 | + } | |
7464 | +} | |
7465 | + | |
7466 | +static int daemons_start(void) | |
7467 | +{ | |
7468 | + int error; | |
7469 | + | |
7470 | + error = kernel_thread(dlm_recvd, NULL, 0); | |
7471 | + if (error < 0) { | |
7472 | + log_print("can't start recvd thread: %d", error); | |
7473 | + goto out; | |
7474 | + } | |
7475 | + wait_for_completion(&thread_completion); | |
7476 | + | |
7477 | + error = kernel_thread(dlm_sendd, NULL, 0); | |
7478 | + if (error < 0) { | |
7479 | + log_print("can't start sendd thread: %d", error); | |
7480 | + daemons_stop(); | |
7481 | + goto out; | |
7482 | + } | |
7483 | + wait_for_completion(&thread_completion); | |
7484 | + | |
7485 | + error = 0; | |
7486 | + out: | |
7487 | + return error; | |
7488 | +} | |
7489 | + | |
7490 | +/* | |
7491 | + * Return the largest buffer size we can cope with. | |
7492 | + */ | |
7493 | +int lowcomms_max_buffer_size(void) | |
7494 | +{ | |
7495 | + return PAGE_CACHE_SIZE; | |
7496 | +} | |
7497 | + | |
7498 | +void lowcomms_stop(void) | |
7499 | +{ | |
7500 | + int i; | |
7501 | + struct connection *temp; | |
7502 | + struct connection *lcon; | |
7503 | + | |
7504 | + atomic_set(&accepting, 0); | |
7505 | + | |
7506 | + /* Set all the activity flags to prevent any | |
7507 | + socket activity. | |
7508 | + */ | |
7509 | + for (i = 0; i < conn_array_size; i++) { | |
7510 | + connections[i].flags = 0x7; | |
7511 | + } | |
7512 | + daemons_stop(); | |
7513 | + clean_writequeues(); | |
7514 | + | |
7515 | + for (i = 0; i < conn_array_size; i++) { | |
7516 | + close_connection(nodeid2con(i)); | |
7517 | + } | |
7518 | + | |
7519 | + kfree(connections); | |
7520 | + connections = NULL; | |
7521 | + | |
7522 | + /* Free up any dynamically allocated listening sockets */ | |
7523 | + list_for_each_entry_safe(lcon, temp, &listen_sockets, listenlist) { | |
7524 | + sock_release(lcon->sock); | |
7525 | + kfree(lcon); | |
7526 | + } | |
7527 | + | |
7528 | + kcl_releaseref_cluster(); | |
7529 | +} | |
7530 | + | |
7531 | +/* This is quite likely to sleep... */ | |
7532 | +int lowcomms_start(void) | |
7533 | +{ | |
7534 | + int error = 0; | |
7535 | + int i; | |
7536 | + | |
7537 | + INIT_LIST_HEAD(&read_sockets); | |
7538 | + INIT_LIST_HEAD(&write_sockets); | |
7539 | + INIT_LIST_HEAD(&state_sockets); | |
7540 | + INIT_LIST_HEAD(&listen_sockets); | |
7541 | + | |
7542 | + spin_lock_init(&read_sockets_lock); | |
7543 | + spin_lock_init(&write_sockets_lock); | |
7544 | + spin_lock_init(&state_sockets_lock); | |
7545 | + | |
7546 | + init_completion(&thread_completion); | |
7547 | + init_MUTEX(&thread_lock); | |
7548 | + atomic_set(&send_run, 0); | |
7549 | + atomic_set(&recv_run, 0); | |
7550 | + | |
7551 | + error = -ENOTCONN; | |
7552 | + if (kcl_addref_cluster()) | |
7553 | + goto out; | |
7554 | + | |
7555 | + /* | |
7556 | + * Temporarily initialise the waitq head so that lowcomms_send_message | |
7557 | + * doesn't crash if it gets called before the thread is fully | |
7558 | + * initialised | |
7559 | + */ | |
7560 | + init_waitqueue_head(&lowcomms_send_waitq); | |
7561 | + | |
7562 | + error = -ENOMEM; | |
7563 | + | |
7564 | + connections = kmalloc(sizeof(struct connection) * | |
7565 | + dlm_config.max_connections, GFP_KERNEL); | |
7566 | + if (!connections) | |
7567 | + goto out; | |
7568 | + | |
7569 | + memset(connections, 0, | |
7570 | + sizeof(struct connection) * dlm_config.max_connections); | |
7571 | + for (i = 0; i < dlm_config.max_connections; i++) { | |
7572 | + connections[i].nodeid = i; | |
7573 | + init_rwsem(&connections[i].sock_sem); | |
7574 | + INIT_LIST_HEAD(&connections[i].writequeue); | |
7575 | + spin_lock_init(&connections[i].writequeue_lock); | |
7576 | + } | |
7577 | + conn_array_size = dlm_config.max_connections; | |
7578 | + | |
7579 | + /* Start listening */ | |
7580 | + error = listen_for_all(); | |
7581 | + if (error) | |
7582 | + goto fail_free_conn; | |
7583 | + | |
7584 | + error = daemons_start(); | |
7585 | + if (error) | |
7586 | + goto fail_free_conn; | |
7587 | + | |
7588 | + atomic_set(&accepting, 1); | |
7589 | + | |
7590 | + return 0; | |
7591 | + | |
7592 | + fail_free_conn: | |
7593 | + kfree(connections); | |
7594 | + | |
7595 | + out: | |
7596 | + return error; | |
7597 | +} | |
7598 | + | |
7599 | +/* Don't accept any more outgoing work */ | |
7600 | +void lowcomms_stop_accept() | |
7601 | +{ | |
7602 | + atomic_set(&accepting, 0); | |
7603 | +} | |
7604 | + | |
7605 | +/* Cluster Manager interface functions for looking up | |
7606 | + nodeids and IP addresses by each other | |
7607 | +*/ | |
7608 | + | |
7609 | +/* Return the IP address of a node given its NODEID */ | |
7610 | +static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr) | |
7611 | +{ | |
7612 | + struct list_head *addrs; | |
7613 | + struct cluster_node_addr *node_addr; | |
7614 | + struct cluster_node_addr *current_addr = NULL; | |
7615 | + struct sockaddr_in6 *saddr; | |
7616 | + int interface; | |
7617 | + int i; | |
7618 | + | |
7619 | + addrs = kcl_get_node_addresses(nodeid); | |
7620 | + if (!addrs) | |
7621 | + return -1; | |
7622 | + | |
7623 | + interface = kcl_get_current_interface(); | |
7624 | + | |
7625 | + /* Look for address number <interface> */ | |
7626 | + i=0; /* i/f numbers start at 1 */ | |
7627 | + list_for_each_entry(node_addr, addrs, list) { | |
7628 | + if (interface == ++i) { | |
7629 | + current_addr = node_addr; | |
7630 | + break; | |
7631 | + } | |
7632 | + } | |
7633 | + | |
7634 | + /* If that failed then just use the first one */ | |
7635 | + if (!current_addr) | |
7636 | + current_addr = (struct cluster_node_addr *)addrs->next; | |
7637 | + | |
7638 | + saddr = (struct sockaddr_in6 *)current_addr->addr; | |
7639 | + | |
7640 | + /* Extract the IP address */ | |
7641 | + if (saddr->sin6_family == AF_INET) { | |
7642 | + struct sockaddr_in *in4 = (struct sockaddr_in *)saddr; | |
7643 | + struct sockaddr_in *ret4 = (struct sockaddr_in *)retaddr; | |
7644 | + ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | |
7645 | + } | |
7646 | + else { | |
7647 | + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *)retaddr; | |
7648 | + memcpy(&ret6->sin6_addr, &saddr->sin6_addr, sizeof(saddr->sin6_addr)); | |
7649 | + } | |
7650 | + | |
7651 | + return 0; | |
7652 | +} | |
7653 | + | |
7654 | +/* Return the NODEID for a node given its sockaddr */ | |
7655 | +static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len) | |
7656 | +{ | |
7657 | + struct kcl_cluster_node node; | |
7658 | + struct sockaddr_in6 ipv6_addr; | |
7659 | + struct sockaddr_in ipv4_addr; | |
7660 | + | |
7661 | + if (addr->sa_family == AF_INET) { | |
7662 | + struct sockaddr_in *in4 = (struct sockaddr_in *)addr; | |
7663 | + memcpy(&ipv4_addr, &local_addr, addr_len); | |
7664 | + memcpy(&ipv4_addr.sin_addr, &in4->sin_addr, sizeof(ipv4_addr.sin_addr)); | |
7665 | + | |
7666 | + addr = (struct sockaddr *)&ipv4_addr; | |
7667 | + } | |
7668 | + else { | |
7669 | + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr; | |
7670 | + memcpy(&ipv6_addr, &local_addr, addr_len); | |
7671 | + memcpy(&ipv6_addr.sin6_addr, &in6->sin6_addr, sizeof(ipv6_addr.sin6_addr)); | |
7672 | + | |
7673 | + addr = (struct sockaddr *)&ipv6_addr; | |
7674 | + } | |
7675 | + | |
7676 | + if (kcl_get_node_by_addr((char *)addr, addr_len, &node) == 0) | |
7677 | + return node.node_id; | |
7678 | + else | |
7679 | + return 0; | |
7680 | +} | |
7681 | + | |
7682 | +int lowcomms_our_nodeid(void) | |
7683 | +{ | |
7684 | + struct kcl_cluster_node node; | |
7685 | + struct list_head *addrs; | |
7686 | + struct cluster_node_addr *first_addr; | |
7687 | + static int our_nodeid = 0; | |
7688 | + | |
7689 | + if (our_nodeid) | |
7690 | + return our_nodeid; | |
7691 | + | |
7692 | + if (kcl_get_node_by_nodeid(0, &node) == -1) | |
7693 | + return 0; | |
7694 | + | |
7695 | + our_nodeid = node.node_id; | |
7696 | + | |
7697 | + /* Fill in the "template" structure */ | |
7698 | + addrs = kcl_get_node_addresses(our_nodeid); | |
7699 | + if (!addrs) | |
7700 | + return 0; | |
7701 | + | |
7702 | + first_addr = (struct cluster_node_addr *) addrs->next; | |
7703 | + memcpy(&local_addr, &first_addr->addr, first_addr->addr_len); | |
7704 | + | |
7705 | + return node.node_id; | |
7706 | +} | |
7707 | +/* | |
7708 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
7709 | + * Emacs will notice this stuff at the end of the file and automatically | |
7710 | + * adjust the settings for this buffer only. This must remain at the end | |
7711 | + * of the file. | |
7712 | + * --------------------------------------------------------------------------- | |
7713 | + * Local variables: | |
7714 | + * c-file-style: "linux" | |
7715 | + * End: | |
7716 | + */ | |
7717 | diff -urN linux-orig/cluster/dlm/lowcomms.h linux-patched/cluster/dlm/lowcomms.h | |
7718 | --- linux-orig/cluster/dlm/lowcomms.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 7719 | +++ linux-patched/cluster/dlm/lowcomms.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 7720 | @@ -0,0 +1,34 @@ |
7721 | +/****************************************************************************** | |
7722 | +******************************************************************************* | |
7723 | +** | |
7724 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
7725 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
7726 | +** | |
7727 | +** This copyrighted material is made available to anyone wishing to use, | |
7728 | +** modify, copy, or redistribute it subject to the terms and conditions | |
7729 | +** of the GNU General Public License v.2. | |
7730 | +** | |
7731 | +******************************************************************************* | |
7732 | +******************************************************************************/ | |
7733 | + | |
7734 | +#ifndef __LOWCOMMS_DOT_H__ | |
7735 | +#define __LOWCOMMS_DOT_H__ | |
7736 | + | |
7737 | +/* The old interface */ | |
7738 | +int lowcomms_send_message(int csid, char *buf, int len, int allocation); | |
7739 | + | |
7740 | +/* The new interface */ | |
7741 | +struct writequeue_entry; | |
7742 | +extern struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len, | |
7743 | + int allocation, char **ppc); | |
7744 | +extern void lowcomms_commit_buffer(struct writequeue_entry *e); | |
7745 | + | |
7746 | +int lowcomms_start(void); | |
7747 | +void lowcomms_stop(void); | |
7748 | +void lowcomms_stop_accept(void); | |
7749 | +int lowcomms_close(int nodeid); | |
7750 | +int lowcomms_max_buffer_size(void); | |
7751 | + | |
7752 | +int lowcomms_our_nodeid(void); | |
7753 | + | |
7754 | +#endif /* __LOWCOMMS_DOT_H__ */ | |
7755 | diff -urN linux-orig/cluster/dlm/main.c linux-patched/cluster/dlm/main.c | |
7756 | --- linux-orig/cluster/dlm/main.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 7757 | +++ linux-patched/cluster/dlm/main.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 7758 | @@ -0,0 +1,98 @@ |
7759 | +/****************************************************************************** | |
7760 | +******************************************************************************* | |
7761 | +** | |
7762 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
7763 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
7764 | +** | |
7765 | +** This copyrighted material is made available to anyone wishing to use, | |
7766 | +** modify, copy, or redistribute it subject to the terms and conditions | |
7767 | +** of the GNU General Public License v.2. | |
7768 | +** | |
7769 | +******************************************************************************* | |
7770 | +******************************************************************************/ | |
7771 | + | |
7772 | +#define EXPORT_SYMTAB | |
7773 | + | |
7774 | +#include <linux/init.h> | |
7775 | +#include <linux/proc_fs.h> | |
7776 | +#include <linux/ctype.h> | |
4bf12011 | 7777 | +#include <linux/module.h> |
7778 | +#include <net/sock.h> | |
7779 | + | |
7780 | +#include <cluster/cnxman.h> | |
7781 | + | |
7782 | +#include "dlm_internal.h" | |
7783 | +#include "lockspace.h" | |
7784 | +#include "recoverd.h" | |
7785 | +#include "ast.h" | |
7786 | +#include "lkb.h" | |
7787 | +#include "nodes.h" | |
7788 | +#include "locking.h" | |
7789 | +#include "config.h" | |
7790 | +#include "memory.h" | |
7791 | +#include "recover.h" | |
7792 | +#include "lowcomms.h" | |
7793 | + | |
7794 | +int dlm_device_init(void); | |
7795 | +void dlm_device_exit(void); | |
7796 | +void dlm_proc_init(void); | |
7797 | +void dlm_proc_exit(void); | |
7798 | + | |
7799 | + | |
7800 | +/* Cluster manager callbacks, we want to know if a node dies | |
7801 | + N.B. this is independent of lockspace-specific event callbacks from SM */ | |
7802 | + | |
7803 | +static void cman_callback(kcl_callback_reason reason, long arg) | |
7804 | +{ | |
7805 | + if (reason == DIED) { | |
7806 | + lowcomms_close((int) arg); | |
7807 | + } | |
7808 | + | |
7809 | + /* This is unconditional. so do what we can to tidy up */ | |
7810 | + if (reason == LEAVING) { | |
7811 | + dlm_emergency_shutdown(); | |
7812 | + } | |
7813 | +} | |
7814 | + | |
7815 | +int __init init_dlm(void) | |
7816 | +{ | |
7817 | + dlm_proc_init(); | |
7818 | + dlm_lockspace_init(); | |
7819 | + dlm_recoverd_init(); | |
7820 | + dlm_nodes_init(); | |
7821 | + dlm_device_init(); | |
7822 | + dlm_memory_init(); | |
7823 | + dlm_config_init(); | |
7824 | + | |
7825 | + kcl_add_callback(cman_callback); | |
7826 | + | |
7827 | + printk("DLM %s (built %s %s) installed\n", | |
7828 | + DLM_RELEASE_NAME, __DATE__, __TIME__); | |
7829 | + | |
7830 | + return 0; | |
7831 | +} | |
7832 | + | |
7833 | +void __exit exit_dlm(void) | |
7834 | +{ | |
7835 | + kcl_remove_callback(cman_callback); | |
7836 | + | |
7837 | + dlm_device_exit(); | |
7838 | + dlm_memory_exit(); | |
7839 | + dlm_config_exit(); | |
7840 | + dlm_proc_exit(); | |
7841 | +} | |
7842 | + | |
7843 | +MODULE_DESCRIPTION("Distributed Lock Manager " DLM_RELEASE_NAME); | |
7844 | +MODULE_AUTHOR("Red Hat, Inc."); | |
7845 | +MODULE_LICENSE("GPL"); | |
7846 | + | |
7847 | +module_init(init_dlm); | |
7848 | +module_exit(exit_dlm); | |
7849 | + | |
7850 | +EXPORT_SYMBOL(dlm_init); | |
7851 | +EXPORT_SYMBOL(dlm_release); | |
7852 | +EXPORT_SYMBOL(dlm_new_lockspace); | |
7853 | +EXPORT_SYMBOL(dlm_release_lockspace); | |
7854 | +EXPORT_SYMBOL(dlm_lock); | |
7855 | +EXPORT_SYMBOL(dlm_unlock); | |
10d56c87 | 7856 | +EXPORT_SYMBOL(dlm_debug_dump); |
4bf12011 | 7857 | diff -urN linux-orig/cluster/dlm/memory.c linux-patched/cluster/dlm/memory.c |
7858 | --- linux-orig/cluster/dlm/memory.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 7859 | +++ linux-patched/cluster/dlm/memory.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 7860 | @@ -0,0 +1,238 @@ |
7861 | +/****************************************************************************** | |
7862 | +******************************************************************************* | |
7863 | +** | |
7864 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
7865 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
7866 | +** | |
7867 | +** This copyrighted material is made available to anyone wishing to use, | |
7868 | +** modify, copy, or redistribute it subject to the terms and conditions | |
7869 | +** of the GNU General Public License v.2. | |
7870 | +** | |
7871 | +******************************************************************************* | |
7872 | +******************************************************************************/ | |
7873 | + | |
7874 | +/* memory.c | |
7875 | + * | |
7876 | + * memory allocation routines | |
7877 | + * | |
7878 | + */ | |
7879 | + | |
7880 | +#include "dlm_internal.h" | |
7881 | +#include "memory.h" | |
7882 | +#include "config.h" | |
7883 | + | |
7884 | +/* as the man says...Shouldn't this be in a header file somewhere? */ | |
7885 | +#define BYTES_PER_WORD sizeof(void *) | |
7886 | + | |
7887 | +static kmem_cache_t *rsb_cache_small; | |
7888 | +static kmem_cache_t *rsb_cache_large; | |
7889 | +static kmem_cache_t *lkb_cache; | |
7890 | +static kmem_cache_t *lvb_cache; | |
7891 | +static kmem_cache_t *resdir_cache_large; | |
7892 | +static kmem_cache_t *resdir_cache_small; | |
7893 | + | |
7894 | +/* The thresholds above which we allocate large RSBs/resdatas rather than small | |
7895 | + * ones. This must make the resultant structure end on a word boundary */ | |
7896 | +#define LARGE_RSB_NAME 28 | |
7897 | +#define LARGE_RES_NAME 28 | |
7898 | + | |
7899 | +int dlm_memory_init() | |
7900 | +{ | |
7901 | + int ret = -ENOMEM; | |
7902 | + | |
7903 | + | |
7904 | + rsb_cache_small = | |
7905 | + kmem_cache_create("dlm_rsb(small)", | |
10d56c87 AM |
7906 | + (sizeof(struct dlm_rsb) + LARGE_RSB_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1), |
7907 | + __alignof__(struct dlm_rsb), 0, NULL, NULL); | |
4bf12011 | 7908 | + if (!rsb_cache_small) |
7909 | + goto out; | |
7910 | + | |
7911 | + rsb_cache_large = | |
7912 | + kmem_cache_create("dlm_rsb(large)", | |
10d56c87 AM |
7913 | + sizeof(struct dlm_rsb) + DLM_RESNAME_MAXLEN, |
7914 | + __alignof__(struct dlm_rsb), 0, NULL, NULL); | |
4bf12011 | 7915 | + if (!rsb_cache_large) |
7916 | + goto out_free_rsbs; | |
7917 | + | |
10d56c87 AM |
7918 | + lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), |
7919 | + __alignof__(struct dlm_lkb), 0, NULL, NULL); | |
4bf12011 | 7920 | + if (!lkb_cache) |
7921 | + goto out_free_rsbl; | |
7922 | + | |
7923 | + resdir_cache_large = | |
7924 | + kmem_cache_create("dlm_resdir(l)", | |
10d56c87 AM |
7925 | + sizeof(struct dlm_direntry) + DLM_RESNAME_MAXLEN, |
7926 | + __alignof__(struct dlm_direntry), 0, NULL, NULL); | |
4bf12011 | 7927 | + if (!resdir_cache_large) |
7928 | + goto out_free_lkb; | |
7929 | + | |
7930 | + resdir_cache_small = | |
7931 | + kmem_cache_create("dlm_resdir(s)", | |
10d56c87 AM |
7932 | + (sizeof(struct dlm_direntry) + LARGE_RES_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1), |
7933 | + __alignof__(struct dlm_direntry), 0, NULL, NULL); | |
4bf12011 | 7934 | + if (!resdir_cache_small) |
7935 | + goto out_free_resl; | |
7936 | + | |
7937 | + /* LVB cache also holds ranges, so should be 64bit aligned */ | |
7938 | + lvb_cache = kmem_cache_create("dlm_lvb/range", DLM_LVB_LEN, | |
7939 | + __alignof__(uint64_t), 0, NULL, NULL); | |
7940 | + if (!lkb_cache) | |
7941 | + goto out_free_ress; | |
7942 | + | |
7943 | + ret = 0; | |
7944 | + goto out; | |
7945 | + | |
7946 | + out_free_ress: | |
7947 | + kmem_cache_destroy(resdir_cache_small); | |
7948 | + | |
7949 | + out_free_resl: | |
7950 | + kmem_cache_destroy(resdir_cache_large); | |
7951 | + | |
7952 | + out_free_lkb: | |
7953 | + kmem_cache_destroy(lkb_cache); | |
7954 | + | |
7955 | + out_free_rsbl: | |
7956 | + kmem_cache_destroy(rsb_cache_large); | |
7957 | + | |
7958 | + out_free_rsbs: | |
7959 | + kmem_cache_destroy(rsb_cache_small); | |
7960 | + | |
7961 | + out: | |
7962 | + return ret; | |
7963 | +} | |
7964 | + | |
7965 | +void dlm_memory_exit() | |
7966 | +{ | |
7967 | + kmem_cache_destroy(rsb_cache_large); | |
7968 | + kmem_cache_destroy(rsb_cache_small); | |
7969 | + kmem_cache_destroy(lkb_cache); | |
7970 | + kmem_cache_destroy(resdir_cache_small); | |
7971 | + kmem_cache_destroy(resdir_cache_large); | |
7972 | + kmem_cache_destroy(lvb_cache); | |
7973 | +} | |
7974 | + | |
10d56c87 | 7975 | +struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) |
4bf12011 | 7976 | +{ |
10d56c87 | 7977 | + struct dlm_rsb *r; |
4bf12011 | 7978 | + |
10d56c87 | 7979 | + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); |
4bf12011 | 7980 | + |
7981 | + if (namelen >= LARGE_RSB_NAME) | |
7982 | + r = kmem_cache_alloc(rsb_cache_large, ls->ls_allocation); | |
7983 | + else | |
7984 | + r = kmem_cache_alloc(rsb_cache_small, ls->ls_allocation); | |
7985 | + | |
7986 | + if (r) | |
10d56c87 | 7987 | + memset(r, 0, sizeof(struct dlm_rsb) + namelen); |
4bf12011 | 7988 | + |
7989 | + return r; | |
7990 | +} | |
7991 | + | |
10d56c87 | 7992 | +void free_rsb(struct dlm_rsb *r) |
4bf12011 | 7993 | +{ |
7994 | + int length = r->res_length; | |
7995 | + | |
7996 | +#ifdef POISON | |
10d56c87 | 7997 | + memset(r, 0x55, sizeof(struct dlm_rsb) + r->res_length); |
4bf12011 | 7998 | +#endif |
7999 | + | |
8000 | + if (length >= LARGE_RSB_NAME) | |
8001 | + kmem_cache_free(rsb_cache_large, r); | |
8002 | + else | |
8003 | + kmem_cache_free(rsb_cache_small, r); | |
8004 | +} | |
8005 | + | |
10d56c87 | 8006 | +struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) |
4bf12011 | 8007 | +{ |
10d56c87 | 8008 | + struct dlm_lkb *l; |
4bf12011 | 8009 | + |
8010 | + l = kmem_cache_alloc(lkb_cache, ls->ls_allocation); | |
8011 | + if (l) | |
10d56c87 | 8012 | + memset(l, 0, sizeof(struct dlm_lkb)); |
4bf12011 | 8013 | + |
8014 | + return l; | |
8015 | +} | |
8016 | + | |
10d56c87 | 8017 | +void free_lkb(struct dlm_lkb *l) |
4bf12011 | 8018 | +{ |
8019 | +#ifdef POISON | |
10d56c87 | 8020 | + memset(l, 0xAA, sizeof(struct dlm_lkb)); |
4bf12011 | 8021 | +#endif |
8022 | + kmem_cache_free(lkb_cache, l); | |
8023 | +} | |
8024 | + | |
10d56c87 | 8025 | +struct dlm_direntry *allocate_resdata(struct dlm_ls *ls, int namelen) |
4bf12011 | 8026 | +{ |
10d56c87 | 8027 | + struct dlm_direntry *rd; |
4bf12011 | 8028 | + |
10d56c87 | 8029 | + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); |
4bf12011 | 8030 | + |
8031 | + if (namelen >= LARGE_RES_NAME) | |
8032 | + rd = kmem_cache_alloc(resdir_cache_large, ls->ls_allocation); | |
8033 | + else | |
8034 | + rd = kmem_cache_alloc(resdir_cache_small, ls->ls_allocation); | |
8035 | + | |
8036 | + if (rd) | |
10d56c87 | 8037 | + memset(rd, 0, sizeof(struct dlm_direntry)); |
4bf12011 | 8038 | + |
8039 | + return rd; | |
8040 | +} | |
8041 | + | |
10d56c87 | 8042 | +void free_resdata(struct dlm_direntry *de) |
4bf12011 | 8043 | +{ |
10d56c87 AM |
8044 | + if (de->length >= LARGE_RES_NAME) |
8045 | + kmem_cache_free(resdir_cache_large, de); | |
4bf12011 | 8046 | + else |
10d56c87 | 8047 | + kmem_cache_free(resdir_cache_small, de); |
4bf12011 | 8048 | +} |
8049 | + | |
10d56c87 | 8050 | +char *allocate_lvb(struct dlm_ls *ls) |
4bf12011 | 8051 | +{ |
8052 | + char *l; | |
8053 | + | |
8054 | + l = kmem_cache_alloc(lvb_cache, ls->ls_allocation); | |
8055 | + if (l) | |
8056 | + memset(l, 0, DLM_LVB_LEN); | |
8057 | + | |
8058 | + return l; | |
8059 | +} | |
8060 | + | |
8061 | +void free_lvb(char *l) | |
8062 | +{ | |
8063 | + kmem_cache_free(lvb_cache, l); | |
8064 | +} | |
8065 | + | |
8066 | +/* Ranges are allocated from the LVB cache as they are the same size (4x64 | |
8067 | + * bits) */ | |
10d56c87 | 8068 | +uint64_t *allocate_range(struct dlm_ls * ls) |
4bf12011 | 8069 | +{ |
8070 | + uint64_t *l; | |
8071 | + | |
8072 | + l = kmem_cache_alloc(lvb_cache, ls->ls_allocation); | |
8073 | + if (l) | |
8074 | + memset(l, 0, DLM_LVB_LEN); | |
8075 | + | |
8076 | + return l; | |
8077 | +} | |
8078 | + | |
8079 | +void free_range(uint64_t *l) | |
8080 | +{ | |
8081 | + kmem_cache_free(lvb_cache, l); | |
8082 | +} | |
8083 | + | |
10d56c87 | 8084 | +struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls) |
4bf12011 | 8085 | +{ |
10d56c87 | 8086 | + struct dlm_rcom *rc; |
4bf12011 | 8087 | + |
8088 | + rc = kmalloc(dlm_config.buffer_size, ls->ls_allocation); | |
8089 | + if (rc) | |
8090 | + memset(rc, 0, dlm_config.buffer_size); | |
8091 | + | |
8092 | + return rc; | |
8093 | +} | |
8094 | + | |
10d56c87 | 8095 | +void free_rcom_buffer(struct dlm_rcom *rc) |
4bf12011 | 8096 | +{ |
8097 | + kfree(rc); | |
8098 | +} | |
8099 | diff -urN linux-orig/cluster/dlm/memory.h linux-patched/cluster/dlm/memory.h | |
8100 | --- linux-orig/cluster/dlm/memory.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 8101 | +++ linux-patched/cluster/dlm/memory.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 8102 | @@ -0,0 +1,32 @@ |
8103 | +/****************************************************************************** | |
8104 | +******************************************************************************* | |
8105 | +** | |
8106 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8107 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8108 | +** | |
8109 | +** This copyrighted material is made available to anyone wishing to use, | |
8110 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8111 | +** of the GNU General Public License v.2. | |
8112 | +** | |
8113 | +******************************************************************************* | |
8114 | +******************************************************************************/ | |
8115 | + | |
8116 | +#ifndef __MEMORY_DOT_H__ | |
8117 | +#define __MEMORY_DOT_H__ | |
8118 | + | |
8119 | +int dlm_memory_init(void); | |
8120 | +void dlm_memory_exit(void); | |
10d56c87 AM |
8121 | +struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); |
8122 | +void free_rsb(struct dlm_rsb *r); | |
8123 | +struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); | |
8124 | +void free_lkb(struct dlm_lkb *l); | |
8125 | +struct dlm_direntry *allocate_resdata(struct dlm_ls *ls, int namelen); | |
8126 | +void free_resdata(struct dlm_direntry *de); | |
8127 | +char *allocate_lvb(struct dlm_ls *ls); | |
4bf12011 | 8128 | +void free_lvb(char *l); |
10d56c87 AM |
8129 | +struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls); |
8130 | +void free_rcom_buffer(struct dlm_rcom *rc); | |
8131 | +uint64_t *allocate_range(struct dlm_ls *ls); | |
8132 | +void free_range(uint64_t *l); | |
4bf12011 | 8133 | + |
8134 | +#endif /* __MEMORY_DOT_H__ */ | |
8135 | diff -urN linux-orig/cluster/dlm/midcomms.c linux-patched/cluster/dlm/midcomms.c | |
8136 | --- linux-orig/cluster/dlm/midcomms.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 8137 | +++ linux-patched/cluster/dlm/midcomms.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 8138 | @@ -0,0 +1,351 @@ |
8139 | +/****************************************************************************** | |
8140 | +******************************************************************************* | |
8141 | +** | |
8142 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8143 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8144 | +** | |
8145 | +** This copyrighted material is made available to anyone wishing to use, | |
8146 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8147 | +** of the GNU General Public License v.2. | |
8148 | +** | |
8149 | +******************************************************************************* | |
8150 | +******************************************************************************/ | |
8151 | + | |
8152 | +/* | |
8153 | + * midcomms.c | |
8154 | + * | |
8155 | + * This is the appallingly named "mid-level" comms layer. | |
8156 | + * | |
8157 | + * Its purpose is to take packets from the "real" comms layer, | |
8158 | + * split them up into packets and pass them to the interested | |
8159 | + * part of the locking mechanism. | |
8160 | + * | |
8161 | + * It also takes messages from the locking layer, formats them | |
8162 | + * into packets and sends them to the comms layer. | |
8163 | + * | |
8164 | + * It knows the format of the mid-level messages used and nodeidss | |
8165 | + * but it does not know how to resolve a nodeid into an IP address | |
8166 | + * or any of the comms channel details | |
8167 | + * | |
8168 | + */ | |
8169 | + | |
8170 | +#include "dlm_internal.h" | |
8171 | +#include "lowcomms.h" | |
8172 | +#include "midcomms.h" | |
8173 | +#include "lockqueue.h" | |
8174 | +#include "nodes.h" | |
8175 | +#include "reccomms.h" | |
8176 | +#include "config.h" | |
8177 | + | |
8178 | +/* Byteorder routines */ | |
8179 | + | |
8180 | +static void host_to_network(void *msg) | |
8181 | +{ | |
10d56c87 AM |
8182 | + struct dlm_header *head = msg; |
8183 | + struct dlm_request *req = msg; | |
8184 | + struct dlm_reply *rep = msg; | |
8185 | + struct dlm_query_request *qreq = msg; | |
8186 | + struct dlm_query_reply *qrep= msg; | |
8187 | + struct dlm_rcom *rc = msg; | |
4bf12011 | 8188 | + |
8189 | + /* Force into network byte order */ | |
8190 | + | |
8191 | + /* | |
8192 | + * Do the common header first | |
8193 | + */ | |
8194 | + | |
8195 | + head->rh_length = cpu_to_le16(head->rh_length); | |
8196 | + head->rh_lockspace = cpu_to_le32(head->rh_lockspace); | |
8197 | + /* Leave the lkid alone as it is transparent at the remote end */ | |
8198 | + | |
8199 | + /* | |
8200 | + * Do the fields in the remlockrequest or remlockreply structs | |
8201 | + */ | |
8202 | + | |
8203 | + switch (req->rr_header.rh_cmd) { | |
8204 | + | |
8205 | + case GDLM_REMCMD_LOCKREQUEST: | |
8206 | + case GDLM_REMCMD_CONVREQUEST: | |
8207 | + req->rr_range_start = cpu_to_le64(req->rr_range_start); | |
8208 | + req->rr_range_end = cpu_to_le64(req->rr_range_end); | |
8209 | + /* Deliberate fall through */ | |
8210 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
8211 | + case GDLM_REMCMD_LOOKUP: | |
8212 | + case GDLM_REMCMD_LOCKGRANT: | |
8213 | + case GDLM_REMCMD_SENDBAST: | |
8214 | + case GDLM_REMCMD_SENDCAST: | |
8215 | + case GDLM_REMCMD_REM_RESDATA: | |
8216 | + req->rr_flags = cpu_to_le32(req->rr_flags); | |
8217 | + req->rr_status = cpu_to_le32(req->rr_status); | |
8218 | + break; | |
8219 | + | |
8220 | + case GDLM_REMCMD_LOCKREPLY: | |
10d56c87 AM |
8221 | + rep->rl_lockstate = cpu_to_le32(rep->rl_lockstate); |
8222 | + rep->rl_nodeid = cpu_to_le32(rep->rl_nodeid); | |
8223 | + rep->rl_status = cpu_to_le32(rep->rl_status); | |
4bf12011 | 8224 | + break; |
8225 | + | |
8226 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8227 | + case GDLM_REMCMD_RECOVERREPLY: | |
8228 | + rc->rc_msgid = cpu_to_le32(rc->rc_msgid); | |
8229 | + rc->rc_datalen = cpu_to_le16(rc->rc_datalen); | |
8230 | + break; | |
8231 | + | |
8232 | + case GDLM_REMCMD_QUERY: | |
10d56c87 AM |
8233 | + qreq->rq_mstlkid = cpu_to_le32(qreq->rq_mstlkid); |
8234 | + qreq->rq_query = cpu_to_le32(qreq->rq_query); | |
8235 | + qreq->rq_maxlocks = cpu_to_le32(qreq->rq_maxlocks); | |
4bf12011 | 8236 | + break; |
8237 | + | |
8238 | + case GDLM_REMCMD_QUERYREPLY: | |
10d56c87 AM |
8239 | + qrep->rq_numlocks = cpu_to_le32(qrep->rq_numlocks); |
8240 | + qrep->rq_status = cpu_to_le32(qrep->rq_status); | |
8241 | + qrep->rq_grantcount = cpu_to_le32(qrep->rq_grantcount); | |
8242 | + qrep->rq_waitcount = cpu_to_le32(qrep->rq_waitcount); | |
8243 | + qrep->rq_convcount = cpu_to_le32(qrep->rq_convcount); | |
4bf12011 | 8244 | + break; |
8245 | + | |
8246 | + default: | |
8247 | + printk("dlm: warning, unknown REMCMD type %u\n", | |
8248 | + req->rr_header.rh_cmd); | |
8249 | + } | |
8250 | +} | |
8251 | + | |
8252 | +static void network_to_host(void *msg) | |
8253 | +{ | |
10d56c87 AM |
8254 | + struct dlm_header *head = msg; |
8255 | + struct dlm_request *req = msg; | |
8256 | + struct dlm_reply *rep = msg; | |
8257 | + struct dlm_query_request *qreq = msg; | |
8258 | + struct dlm_query_reply *qrep = msg; | |
8259 | + struct dlm_rcom *rc = msg; | |
4bf12011 | 8260 | + |
8261 | + /* Force into host byte order */ | |
8262 | + | |
8263 | + /* | |
8264 | + * Do the common header first | |
8265 | + */ | |
8266 | + | |
8267 | + head->rh_length = le16_to_cpu(head->rh_length); | |
8268 | + head->rh_lockspace = le32_to_cpu(head->rh_lockspace); | |
8269 | + /* Leave the lkid alone as it is transparent at the remote end */ | |
8270 | + | |
8271 | + /* | |
8272 | + * Do the fields in the remlockrequest or remlockreply structs | |
8273 | + */ | |
8274 | + | |
8275 | + switch (req->rr_header.rh_cmd) { | |
8276 | + | |
8277 | + case GDLM_REMCMD_LOCKREQUEST: | |
8278 | + case GDLM_REMCMD_CONVREQUEST: | |
8279 | + req->rr_range_start = le64_to_cpu(req->rr_range_start); | |
8280 | + req->rr_range_end = le64_to_cpu(req->rr_range_end); | |
8281 | + case GDLM_REMCMD_LOOKUP: | |
8282 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
8283 | + case GDLM_REMCMD_LOCKGRANT: | |
8284 | + case GDLM_REMCMD_SENDBAST: | |
8285 | + case GDLM_REMCMD_SENDCAST: | |
8286 | + case GDLM_REMCMD_REM_RESDATA: | |
8287 | + /* Actually, not much to do here as the remote lock IDs are | |
8288 | + * transparent too */ | |
8289 | + req->rr_flags = le32_to_cpu(req->rr_flags); | |
8290 | + req->rr_status = le32_to_cpu(req->rr_status); | |
8291 | + break; | |
8292 | + | |
8293 | + case GDLM_REMCMD_LOCKREPLY: | |
10d56c87 AM |
8294 | + rep->rl_lockstate = le32_to_cpu(rep->rl_lockstate); |
8295 | + rep->rl_nodeid = le32_to_cpu(rep->rl_nodeid); | |
8296 | + rep->rl_status = le32_to_cpu(rep->rl_status); | |
4bf12011 | 8297 | + break; |
8298 | + | |
8299 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8300 | + case GDLM_REMCMD_RECOVERREPLY: | |
8301 | + rc->rc_msgid = le32_to_cpu(rc->rc_msgid); | |
8302 | + rc->rc_datalen = le16_to_cpu(rc->rc_datalen); | |
8303 | + break; | |
8304 | + | |
8305 | + | |
8306 | + case GDLM_REMCMD_QUERY: | |
10d56c87 AM |
8307 | + qreq->rq_mstlkid = le32_to_cpu(qreq->rq_mstlkid); |
8308 | + qreq->rq_query = le32_to_cpu(qreq->rq_query); | |
8309 | + qreq->rq_maxlocks = le32_to_cpu(qreq->rq_maxlocks); | |
4bf12011 | 8310 | + break; |
8311 | + | |
8312 | + case GDLM_REMCMD_QUERYREPLY: | |
10d56c87 AM |
8313 | + qrep->rq_numlocks = le32_to_cpu(qrep->rq_numlocks); |
8314 | + qrep->rq_status = le32_to_cpu(qrep->rq_status); | |
8315 | + qrep->rq_grantcount = le32_to_cpu(qrep->rq_grantcount); | |
8316 | + qrep->rq_waitcount = le32_to_cpu(qrep->rq_waitcount); | |
8317 | + qrep->rq_convcount = le32_to_cpu(qrep->rq_convcount); | |
4bf12011 | 8318 | + break; |
8319 | + | |
8320 | + default: | |
8321 | + printk("dlm: warning, unknown REMCMD type %u\n", | |
8322 | + req->rr_header.rh_cmd); | |
8323 | + } | |
8324 | +} | |
8325 | + | |
8326 | +static void copy_from_cb(void *dst, const void *base, unsigned offset, | |
8327 | + unsigned len, unsigned limit) | |
8328 | +{ | |
8329 | + unsigned copy = len; | |
8330 | + | |
8331 | + if ((copy + offset) > limit) | |
8332 | + copy = limit - offset; | |
8333 | + memcpy(dst, base + offset, copy); | |
8334 | + len -= copy; | |
8335 | + if (len) | |
8336 | + memcpy(dst + copy, base, len); | |
8337 | +} | |
8338 | + | |
8339 | +static void khexdump(const unsigned char *c, int len) | |
8340 | +{ | |
8341 | + while (len > 16) { | |
8342 | + printk(KERN_INFO | |
8343 | + "%02x %02x %02x %02x %02x %02x %02x %02x-%02x %02x %02x %02x %02x %02x %02x %02x\n", | |
8344 | + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], | |
8345 | + c[9], c[10], c[11], c[12], c[13], c[14], c[15]); | |
8346 | + len -= 16; | |
8347 | + } | |
8348 | + while (len > 4) { | |
8349 | + printk(KERN_INFO "%02x %02x %02x %02x\n", c[0], c[1], c[2], | |
8350 | + c[3]); | |
8351 | + len -= 4; | |
8352 | + } | |
8353 | + while (len > 0) { | |
8354 | + printk(KERN_INFO "%02x\n", c[0]); | |
8355 | + len--; | |
8356 | + } | |
8357 | +} | |
8358 | + | |
8359 | +/* | |
8360 | + * Called from the low-level comms layer to process a buffer of | |
8361 | + * commands. | |
8362 | + * | |
8363 | + * Only complete messages are processed here, any "spare" bytes from | |
8364 | + * the end of a buffer are saved and tacked onto the front of the next | |
8365 | + * message that comes in. I doubt this will happen very often but we | |
8366 | + * need to be able to cope with it and I don't want the task to be waiting | |
8367 | + * for packets to come in when there is useful work to be done. | |
8368 | + * | |
8369 | + */ | |
8370 | +int midcomms_process_incoming_buffer(int nodeid, const void *base, | |
8371 | + unsigned offset, unsigned len, | |
8372 | + unsigned limit) | |
8373 | +{ | |
10d56c87 AM |
8374 | + unsigned char __tmp[sizeof(struct dlm_header) + 64]; |
8375 | + struct dlm_header *msg = (struct dlm_header *) __tmp; | |
4bf12011 | 8376 | + int ret = 0; |
8377 | + int err = 0; | |
8378 | + unsigned msglen; | |
8379 | + __u32 id, space; | |
8380 | + | |
10d56c87 | 8381 | + while (len > sizeof(struct dlm_header)) { |
4bf12011 | 8382 | + /* Get message header and check it over */ |
10d56c87 | 8383 | + copy_from_cb(msg, base, offset, sizeof(struct dlm_header), |
4bf12011 | 8384 | + limit); |
8385 | + msglen = le16_to_cpu(msg->rh_length); | |
8386 | + id = msg->rh_lkid; | |
8387 | + space = msg->rh_lockspace; | |
8388 | + | |
8389 | + /* Check message size */ | |
8390 | + err = -EINVAL; | |
10d56c87 | 8391 | + if (msglen < sizeof(struct dlm_header)) |
4bf12011 | 8392 | + break; |
8393 | + err = -E2BIG; | |
8394 | + if (msglen > dlm_config.buffer_size) { | |
8395 | + printk("dlm: message size too big %d\n", msglen); | |
8396 | + break; | |
8397 | + } | |
8398 | + err = 0; | |
8399 | + | |
8400 | + /* Not enough in buffer yet? wait for some more */ | |
8401 | + if (msglen > len) | |
8402 | + break; | |
8403 | + | |
8404 | + /* Make sure our temp buffer is large enough */ | |
8405 | + if (msglen > sizeof(__tmp) && | |
10d56c87 | 8406 | + msg == (struct dlm_header *) __tmp) { |
4bf12011 | 8407 | + msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); |
8408 | + if (msg == NULL) | |
8409 | + return ret; | |
8410 | + } | |
8411 | + | |
8412 | + copy_from_cb(msg, base, offset, msglen, limit); | |
8413 | + BUG_ON(id != msg->rh_lkid); | |
8414 | + BUG_ON(space != msg->rh_lockspace); | |
8415 | + ret += msglen; | |
8416 | + offset += msglen; | |
8417 | + offset &= (limit - 1); | |
8418 | + len -= msglen; | |
8419 | + network_to_host(msg); | |
8420 | + | |
8421 | + if ((msg->rh_cmd > 32) || | |
8422 | + (msg->rh_cmd == 0) || | |
10d56c87 | 8423 | + (msg->rh_length < sizeof(struct dlm_header)) || |
4bf12011 | 8424 | + (msg->rh_length > dlm_config.buffer_size)) { |
8425 | + | |
8426 | + printk("dlm: midcomms: cmd=%u, flags=%u, length=%hu, " | |
8427 | + "lkid=%u, lockspace=%u\n", | |
8428 | + msg->rh_cmd, msg->rh_flags, msg->rh_length, | |
8429 | + msg->rh_lkid, msg->rh_lockspace); | |
8430 | + | |
8431 | + printk("dlm: midcomms: base=%p, offset=%u, len=%u, " | |
8432 | + "ret=%u, limit=%08x newbuf=%d\n", | |
8433 | + base, offset, len, ret, limit, | |
10d56c87 | 8434 | + ((struct dlm_header *) __tmp == msg)); |
4bf12011 | 8435 | + |
8436 | + khexdump((const unsigned char *) msg, msg->rh_length); | |
8437 | + | |
8438 | + return -EBADMSG; | |
8439 | + } | |
8440 | + | |
8441 | + switch (msg->rh_cmd) { | |
8442 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8443 | + case GDLM_REMCMD_RECOVERREPLY: | |
8444 | + process_recovery_comm(nodeid, msg); | |
8445 | + break; | |
8446 | + default: | |
8447 | + process_cluster_request(nodeid, msg, FALSE); | |
8448 | + } | |
8449 | + } | |
8450 | + | |
10d56c87 | 8451 | + if (msg != (struct dlm_header *) __tmp) |
4bf12011 | 8452 | + kfree(msg); |
8453 | + | |
8454 | + return err ? err : ret; | |
8455 | +} | |
8456 | + | |
8457 | +/* | |
8458 | + * Send a lowcomms buffer | |
8459 | + */ | |
8460 | + | |
10d56c87 | 8461 | +void midcomms_send_buffer(struct dlm_header *msg, struct writequeue_entry *e) |
4bf12011 | 8462 | +{ |
8463 | + host_to_network(msg); | |
8464 | + lowcomms_commit_buffer(e); | |
8465 | +} | |
8466 | + | |
8467 | +/* | |
8468 | + * Make the message into network byte order and send it | |
8469 | + */ | |
8470 | + | |
10d56c87 | 8471 | +int midcomms_send_message(uint32_t nodeid, struct dlm_header *msg, |
4bf12011 | 8472 | + int allocation) |
8473 | +{ | |
8474 | + int len = msg->rh_length; | |
8475 | + | |
8476 | + host_to_network(msg); | |
8477 | + | |
8478 | + /* | |
8479 | + * Loopback. In fact, the locking code pretty much prevents this from | |
8480 | + * being needed but it can happen when the directory node is also the | |
8481 | + * local node. | |
8482 | + */ | |
8483 | + | |
8484 | + if (nodeid == our_nodeid()) | |
8485 | + return midcomms_process_incoming_buffer(nodeid, (char *) msg, 0, | |
8486 | + len, len); | |
8487 | + | |
8488 | + return lowcomms_send_message(nodeid, (char *) msg, len, allocation); | |
8489 | +} | |
8490 | diff -urN linux-orig/cluster/dlm/midcomms.h linux-patched/cluster/dlm/midcomms.h | |
8491 | --- linux-orig/cluster/dlm/midcomms.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 8492 | +++ linux-patched/cluster/dlm/midcomms.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 8493 | @@ -0,0 +1,24 @@ |
8494 | +/****************************************************************************** | |
8495 | +******************************************************************************* | |
8496 | +** | |
8497 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8498 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8499 | +** | |
8500 | +** This copyrighted material is made available to anyone wishing to use, | |
8501 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8502 | +** of the GNU General Public License v.2. | |
8503 | +** | |
8504 | +******************************************************************************* | |
8505 | +******************************************************************************/ | |
8506 | + | |
8507 | +#ifndef __MIDCOMMS_DOT_H__ | |
8508 | +#define __MIDCOMMS_DOT_H__ | |
8509 | + | |
10d56c87 | 8510 | +int midcomms_send_message(uint32_t csid, struct dlm_header *msg, |
4bf12011 | 8511 | + int allocation); |
8512 | +int midcomms_process_incoming_buffer(int csid, const void *buf, unsigned offset, | |
8513 | + unsigned len, unsigned limit); | |
10d56c87 | 8514 | +void midcomms_send_buffer(struct dlm_header *msg, |
4bf12011 | 8515 | + struct writequeue_entry *e); |
8516 | + | |
8517 | +#endif /* __MIDCOMMS_DOT_H__ */ | |
8518 | diff -urN linux-orig/cluster/dlm/nodes.c linux-patched/cluster/dlm/nodes.c | |
8519 | --- linux-orig/cluster/dlm/nodes.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 8520 | +++ linux-patched/cluster/dlm/nodes.c 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 8521 | @@ -0,0 +1,325 @@ |
8522 | +/****************************************************************************** | |
8523 | +******************************************************************************* | |
8524 | +** | |
8525 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8526 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8527 | +** | |
8528 | +** This copyrighted material is made available to anyone wishing to use, | |
8529 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8530 | +** of the GNU General Public License v.2. | |
8531 | +** | |
8532 | +******************************************************************************* | |
8533 | +******************************************************************************/ | |
8534 | + | |
8535 | +#include <net/sock.h> | |
8536 | +#include <cluster/cnxman.h> | |
8537 | + | |
8538 | +#include "dlm_internal.h" | |
8539 | +#include "lowcomms.h" | |
8540 | +#include "nodes.h" | |
8541 | +#include "recover.h" | |
8542 | +#include "reccomms.h" | |
8543 | +#include "util.h" | |
8544 | + | |
8545 | +static struct list_head cluster_nodes; | |
8546 | +static spinlock_t node_lock; | |
8547 | +static uint32_t local_nodeid; | |
8548 | +static struct semaphore local_init_lock; | |
8549 | + | |
8550 | + | |
8551 | +void dlm_nodes_init(void) | |
8552 | +{ | |
8553 | + INIT_LIST_HEAD(&cluster_nodes); | |
8554 | + spin_lock_init(&node_lock); | |
8555 | + local_nodeid = 0; | |
8556 | + init_MUTEX(&local_init_lock); | |
8557 | +} | |
8558 | + | |
10d56c87 | 8559 | +static struct dlm_node *search_node(uint32_t nodeid) |
4bf12011 | 8560 | +{ |
10d56c87 | 8561 | + struct dlm_node *node; |
4bf12011 | 8562 | + |
10d56c87 AM |
8563 | + list_for_each_entry(node, &cluster_nodes, list) { |
8564 | + if (node->nodeid == nodeid) | |
4bf12011 | 8565 | + goto out; |
8566 | + } | |
8567 | + node = NULL; | |
8568 | + out: | |
8569 | + return node; | |
8570 | +} | |
8571 | + | |
10d56c87 | 8572 | +static void put_node(struct dlm_node *node) |
4bf12011 | 8573 | +{ |
8574 | + spin_lock(&node_lock); | |
10d56c87 AM |
8575 | + node->refcount--; |
8576 | + if (node->refcount == 0) { | |
8577 | + list_del(&node->list); | |
4bf12011 | 8578 | + spin_unlock(&node_lock); |
8579 | + kfree(node); | |
8580 | + return; | |
8581 | + } | |
8582 | + spin_unlock(&node_lock); | |
8583 | +} | |
8584 | + | |
10d56c87 | 8585 | +static int get_node(uint32_t nodeid, struct dlm_node **ndp) |
4bf12011 | 8586 | +{ |
10d56c87 | 8587 | + struct dlm_node *node, *node2; |
4bf12011 | 8588 | + int error = -ENOMEM; |
8589 | + | |
8590 | + spin_lock(&node_lock); | |
8591 | + node = search_node(nodeid); | |
8592 | + if (node) | |
10d56c87 | 8593 | + node->refcount++; |
4bf12011 | 8594 | + spin_unlock(&node_lock); |
8595 | + | |
8596 | + if (node) | |
8597 | + goto out; | |
8598 | + | |
10d56c87 | 8599 | + node = (struct dlm_node *) kmalloc(sizeof(struct dlm_node), GFP_KERNEL); |
4bf12011 | 8600 | + if (!node) |
8601 | + goto fail; | |
8602 | + | |
10d56c87 AM |
8603 | + memset(node, 0, sizeof(struct dlm_node)); |
8604 | + node->nodeid = nodeid; | |
4bf12011 | 8605 | + |
8606 | + spin_lock(&node_lock); | |
8607 | + node2 = search_node(nodeid); | |
8608 | + if (node2) { | |
10d56c87 | 8609 | + node2->refcount++; |
4bf12011 | 8610 | + spin_unlock(&node_lock); |
8611 | + kfree(node); | |
8612 | + node = node2; | |
8613 | + goto out; | |
8614 | + } | |
8615 | + | |
10d56c87 AM |
8616 | + node->refcount = 1; |
8617 | + list_add_tail(&node->list, &cluster_nodes); | |
4bf12011 | 8618 | + spin_unlock(&node_lock); |
8619 | + | |
8620 | + out: | |
8621 | + *ndp = node; | |
8622 | + return 0; | |
8623 | + | |
8624 | + fail: | |
8625 | + return error; | |
8626 | +} | |
8627 | + | |
10d56c87 | 8628 | +int init_new_csb(uint32_t nodeid, struct dlm_csb **ret_csb) |
4bf12011 | 8629 | +{ |
10d56c87 AM |
8630 | + struct dlm_csb *csb; |
8631 | + struct dlm_node *node; | |
4bf12011 | 8632 | + int error = -ENOMEM; |
8633 | + | |
10d56c87 | 8634 | + csb = (struct dlm_csb *) kmalloc(sizeof(struct dlm_csb), GFP_KERNEL); |
4bf12011 | 8635 | + if (!csb) |
8636 | + goto fail; | |
8637 | + | |
10d56c87 | 8638 | + memset(csb, 0, sizeof(struct dlm_csb)); |
4bf12011 | 8639 | + |
8640 | + error = get_node(nodeid, &node); | |
8641 | + if (error) | |
8642 | + goto fail_free; | |
8643 | + | |
10d56c87 | 8644 | + csb->node = node; |
4bf12011 | 8645 | + |
8646 | + down(&local_init_lock); | |
8647 | + | |
8648 | + if (!local_nodeid) { | |
8649 | + if (nodeid == our_nodeid()) { | |
10d56c87 | 8650 | + local_nodeid = node->nodeid; |
4bf12011 | 8651 | + } |
8652 | + } | |
8653 | + up(&local_init_lock); | |
8654 | + | |
8655 | + *ret_csb = csb; | |
8656 | + return 0; | |
8657 | + | |
8658 | + fail_free: | |
8659 | + kfree(csb); | |
8660 | + fail: | |
8661 | + return error; | |
8662 | +} | |
8663 | + | |
10d56c87 | 8664 | +void release_csb(struct dlm_csb *csb) |
4bf12011 | 8665 | +{ |
10d56c87 | 8666 | + put_node(csb->node); |
4bf12011 | 8667 | + kfree(csb); |
8668 | +} | |
8669 | + | |
8670 | +uint32_t our_nodeid(void) | |
8671 | +{ | |
8672 | + return lowcomms_our_nodeid(); | |
8673 | +} | |
8674 | + | |
10d56c87 | 8675 | +int nodes_reconfig_wait(struct dlm_ls *ls) |
4bf12011 | 8676 | +{ |
8677 | + int error; | |
8678 | + | |
8679 | + if (ls->ls_low_nodeid == our_nodeid()) { | |
10d56c87 | 8680 | + error = dlm_wait_status_all(ls, NODES_VALID); |
4bf12011 | 8681 | + if (!error) |
8682 | + set_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags); | |
8683 | + | |
8684 | + /* Experimental: this delay should allow any final messages | |
8685 | + * from the previous node to be received before beginning | |
8686 | + * recovery. */ | |
8687 | + | |
8688 | + if (ls->ls_num_nodes == 1) { | |
8689 | + current->state = TASK_UNINTERRUPTIBLE; | |
8690 | + schedule_timeout((2) * HZ); | |
8691 | + } | |
8692 | + | |
8693 | + } else | |
10d56c87 | 8694 | + error = dlm_wait_status_low(ls, NODES_ALL_VALID); |
4bf12011 | 8695 | + |
8696 | + return error; | |
8697 | +} | |
8698 | + | |
10d56c87 | 8699 | +static void add_ordered_node(struct dlm_ls *ls, struct dlm_csb *new) |
4bf12011 | 8700 | +{ |
10d56c87 | 8701 | + struct dlm_csb *csb = NULL; |
4bf12011 | 8702 | + struct list_head *tmp; |
10d56c87 | 8703 | + struct list_head *newlist = &new->list; |
4bf12011 | 8704 | + struct list_head *head = &ls->ls_nodes; |
8705 | + | |
8706 | + list_for_each(tmp, head) { | |
10d56c87 | 8707 | + csb = list_entry(tmp, struct dlm_csb, list); |
4bf12011 | 8708 | + |
10d56c87 | 8709 | + if (new->node->nodeid < csb->node->nodeid) |
4bf12011 | 8710 | + break; |
8711 | + } | |
8712 | + | |
8713 | + if (!csb) | |
8714 | + list_add_tail(newlist, head); | |
8715 | + else { | |
8716 | + /* FIXME: can use list macro here */ | |
8717 | + newlist->prev = tmp->prev; | |
8718 | + newlist->next = tmp; | |
8719 | + tmp->prev->next = newlist; | |
8720 | + tmp->prev = newlist; | |
8721 | + } | |
8722 | +} | |
8723 | + | |
10d56c87 | 8724 | +int ls_nodes_reconfig(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) |
4bf12011 | 8725 | +{ |
10d56c87 | 8726 | + struct dlm_csb *csb, *safe; |
4bf12011 | 8727 | + int error, i, found, pos = 0, neg = 0; |
8728 | + uint32_t low = (uint32_t) (-1); | |
8729 | + | |
8730 | + /* | |
8731 | + * Remove (and save) departed nodes from lockspace's nodes list | |
8732 | + */ | |
8733 | + | |
10d56c87 | 8734 | + list_for_each_entry_safe(csb, safe, &ls->ls_nodes, list) { |
4bf12011 | 8735 | + found = FALSE; |
10d56c87 AM |
8736 | + for (i = 0; i < rv->node_count; i++) { |
8737 | + if (csb->node->nodeid == rv->nodeids[i]) { | |
4bf12011 | 8738 | + found = TRUE; |
8739 | + break; | |
8740 | + } | |
8741 | + } | |
8742 | + | |
8743 | + if (!found) { | |
8744 | + neg++; | |
10d56c87 AM |
8745 | + csb->gone_event = rv->event_id; |
8746 | + list_del(&csb->list); | |
8747 | + list_add_tail(&csb->list, &ls->ls_nodes_gone); | |
4bf12011 | 8748 | + ls->ls_num_nodes--; |
10d56c87 | 8749 | + log_all(ls, "remove node %u", csb->node->nodeid); |
4bf12011 | 8750 | + } |
8751 | + } | |
8752 | + | |
8753 | + /* | |
8754 | + * Add new nodes to lockspace's nodes list | |
8755 | + */ | |
8756 | + | |
10d56c87 | 8757 | + for (i = 0; i < rv->node_count; i++) { |
4bf12011 | 8758 | + found = FALSE; |
10d56c87 AM |
8759 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
8760 | + if (csb->node->nodeid == rv->nodeids[i]) { | |
4bf12011 | 8761 | + found = TRUE; |
8762 | + break; | |
8763 | + } | |
8764 | + } | |
8765 | + | |
8766 | + if (!found) { | |
8767 | + pos++; | |
8768 | + | |
10d56c87 AM |
8769 | + error = init_new_csb(rv->nodeids[i], &csb); |
8770 | + DLM_ASSERT(!error,); | |
4bf12011 | 8771 | + |
8772 | + add_ordered_node(ls, csb); | |
8773 | + ls->ls_num_nodes++; | |
10d56c87 | 8774 | + log_all(ls, "add node %u", csb->node->nodeid); |
4bf12011 | 8775 | + } |
8776 | + } | |
8777 | + | |
10d56c87 AM |
8778 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
8779 | + if (csb->node->nodeid < low) | |
8780 | + low = csb->node->nodeid; | |
4bf12011 | 8781 | + } |
8782 | + | |
8783 | + rcom_log_clear(ls); | |
8784 | + ls->ls_low_nodeid = low; | |
10d56c87 | 8785 | + ls->ls_nodes_mask = dlm_next_power2(ls->ls_num_nodes) - 1; |
4bf12011 | 8786 | + set_bit(LSFL_NODES_VALID, &ls->ls_flags); |
8787 | + *neg_out = neg; | |
8788 | + | |
8789 | + error = nodes_reconfig_wait(ls); | |
8790 | + | |
8791 | + log_all(ls, "total nodes %d", ls->ls_num_nodes); | |
8792 | + | |
8793 | + return error; | |
8794 | +} | |
8795 | + | |
10d56c87 | 8796 | +int ls_nodes_init(struct dlm_ls *ls, struct dlm_recover *rv) |
4bf12011 | 8797 | +{ |
10d56c87 | 8798 | + struct dlm_csb *csb; |
4bf12011 | 8799 | + int i, error; |
8800 | + uint32_t low = (uint32_t) (-1); | |
8801 | + | |
8802 | + log_all(ls, "add nodes"); | |
8803 | + | |
10d56c87 AM |
8804 | + for (i = 0; i < rv->node_count; i++) { |
8805 | + error = init_new_csb(rv->nodeids[i], &csb); | |
4bf12011 | 8806 | + if (error) |
8807 | + goto fail; | |
8808 | + | |
8809 | + add_ordered_node(ls, csb); | |
8810 | + ls->ls_num_nodes++; | |
8811 | + | |
10d56c87 AM |
8812 | + if (csb->node->nodeid < low) |
8813 | + low = csb->node->nodeid; | |
4bf12011 | 8814 | + } |
8815 | + | |
8816 | + ls->ls_low_nodeid = low; | |
10d56c87 | 8817 | + ls->ls_nodes_mask = dlm_next_power2(ls->ls_num_nodes) - 1; |
4bf12011 | 8818 | + set_bit(LSFL_NODES_VALID, &ls->ls_flags); |
8819 | + | |
8820 | + error = nodes_reconfig_wait(ls); | |
8821 | + | |
8822 | + log_all(ls, "total nodes %d", ls->ls_num_nodes); | |
8823 | + | |
8824 | + return error; | |
8825 | + | |
8826 | + fail: | |
8827 | + while (!list_empty(&ls->ls_nodes)) { | |
10d56c87 AM |
8828 | + csb = list_entry(ls->ls_nodes.next, struct dlm_csb, list); |
8829 | + list_del(&csb->list); | |
4bf12011 | 8830 | + release_csb(csb); |
8831 | + } | |
8832 | + ls->ls_num_nodes = 0; | |
8833 | + | |
8834 | + return error; | |
8835 | +} | |
8836 | + | |
10d56c87 | 8837 | +int in_nodes_gone(struct dlm_ls *ls, uint32_t nodeid) |
4bf12011 | 8838 | +{ |
10d56c87 | 8839 | + struct dlm_csb *csb; |
4bf12011 | 8840 | + |
10d56c87 AM |
8841 | + list_for_each_entry(csb, &ls->ls_nodes_gone, list) { |
8842 | + if (csb->node->nodeid == nodeid) | |
4bf12011 | 8843 | + return TRUE; |
8844 | + } | |
8845 | + return FALSE; | |
8846 | +} | |
8847 | diff -urN linux-orig/cluster/dlm/nodes.h linux-patched/cluster/dlm/nodes.h | |
8848 | --- linux-orig/cluster/dlm/nodes.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 8849 | +++ linux-patched/cluster/dlm/nodes.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 8850 | @@ -0,0 +1,25 @@ |
8851 | +/****************************************************************************** | |
8852 | +******************************************************************************* | |
8853 | +** | |
8854 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8855 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8856 | +** | |
8857 | +** This copyrighted material is made available to anyone wishing to use, | |
8858 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8859 | +** of the GNU General Public License v.2. | |
8860 | +** | |
8861 | +******************************************************************************* | |
8862 | +******************************************************************************/ | |
8863 | + | |
8864 | +#ifndef __NODES_DOT_H__ | |
8865 | +#define __NODES_DOT_H__ | |
8866 | + | |
8867 | +void dlm_nodes_init(void); | |
10d56c87 AM |
8868 | +int init_new_csb(uint32_t nodeid, struct dlm_csb ** ret_csb); |
8869 | +void release_csb(struct dlm_csb * csb); | |
4bf12011 | 8870 | +uint32_t our_nodeid(void); |
10d56c87 AM |
8871 | +int ls_nodes_reconfig(struct dlm_ls * ls, struct dlm_recover * gr, int *neg); |
8872 | +int ls_nodes_init(struct dlm_ls * ls, struct dlm_recover * gr); | |
8873 | +int in_nodes_gone(struct dlm_ls * ls, uint32_t nodeid); | |
4bf12011 | 8874 | + |
8875 | +#endif /* __NODES_DOT_H__ */ | |
8876 | diff -urN linux-orig/cluster/dlm/proc.c linux-patched/cluster/dlm/proc.c | |
8877 | --- linux-orig/cluster/dlm/proc.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
8878 | +++ linux-patched/cluster/dlm/proc.c 2004-07-13 18:57:22.000000000 +0800 |
8879 | @@ -0,0 +1,473 @@ | |
4bf12011 | 8880 | +/****************************************************************************** |
8881 | +******************************************************************************* | |
8882 | +** | |
8883 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8884 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8885 | +** | |
8886 | +** This copyrighted material is made available to anyone wishing to use, | |
8887 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8888 | +** of the GNU General Public License v.2. | |
8889 | +** | |
8890 | +******************************************************************************* | |
8891 | +******************************************************************************/ | |
8892 | + | |
8893 | +#include <linux/init.h> | |
8894 | +#include <linux/proc_fs.h> | |
8895 | +#include <linux/ctype.h> | |
8896 | +#include <linux/seq_file.h> | |
8897 | +#include <linux/module.h> | |
8898 | + | |
8899 | +#include "dlm_internal.h" | |
8900 | +#include "lockspace.h" | |
8901 | + | |
8902 | +#if defined(DLM_DEBUG) | |
8903 | +#define DLM_DEBUG_SIZE (1024) | |
8904 | +#define MAX_DEBUG_MSG_LEN (64) | |
8905 | +#else | |
8906 | +#define DLM_DEBUG_SIZE (0) | |
8907 | +#define MAX_DEBUG_MSG_LEN (0) | |
8908 | +#endif | |
8909 | + | |
8910 | +static char * debug_buf; | |
8911 | +static unsigned int debug_size; | |
8912 | +static unsigned int debug_point; | |
8913 | +static int debug_wrap; | |
8914 | +static spinlock_t debug_lock; | |
8915 | +static struct proc_dir_entry * debug_proc_entry = NULL; | |
8916 | +static struct proc_dir_entry * rcom_proc_entry = NULL; | |
8917 | +static char proc_ls_name[255] = ""; | |
8918 | + | |
8919 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
8920 | +static struct proc_dir_entry * locks_proc_entry = NULL; | |
8921 | +static struct seq_operations locks_info_op; | |
8922 | + | |
8923 | + | |
8924 | +static int locks_open(struct inode *inode, struct file *file) | |
8925 | +{ | |
8926 | + return seq_open(file, &locks_info_op); | |
8927 | +} | |
8928 | + | |
8929 | +/* Write simply sets the lockspace to use */ | |
8930 | +static ssize_t locks_write(struct file *file, const char *buf, | |
8931 | + size_t count, loff_t * ppos) | |
8932 | +{ | |
8933 | + if (count < sizeof(proc_ls_name)) { | |
8934 | + copy_from_user(proc_ls_name, buf, count); | |
8935 | + proc_ls_name[count] = '\0'; | |
8936 | + | |
8937 | + /* Remove any trailing LF so that lazy users | |
8938 | + can just echo "lsname" > /proc/cluster/dlm_locks */ | |
8939 | + if (proc_ls_name[count - 1] == '\n') | |
8940 | + proc_ls_name[count - 1] = '\0'; | |
8941 | + | |
8942 | + return count; | |
8943 | + } | |
8944 | + return 0; | |
8945 | +} | |
8946 | + | |
8947 | +static struct file_operations locks_fops = { | |
8948 | + open:locks_open, | |
8949 | + write:locks_write, | |
8950 | + read:seq_read, | |
8951 | + llseek:seq_lseek, | |
8952 | + release:seq_release, | |
8953 | +}; | |
8954 | + | |
8955 | +struct ls_dumpinfo { | |
8956 | + int entry; | |
8957 | + struct list_head *next; | |
10d56c87 AM |
8958 | + struct dlm_ls *ls; |
8959 | + struct dlm_rsb *rsb; | |
4bf12011 | 8960 | +}; |
8961 | + | |
10d56c87 | 8962 | +static int print_resource(struct dlm_rsb * res, struct seq_file *s); |
4bf12011 | 8963 | + |
8964 | +static struct ls_dumpinfo *next_rsb(struct ls_dumpinfo *di) | |
8965 | +{ | |
10d56c87 AM |
8966 | + int i; |
8967 | + | |
4bf12011 | 8968 | + if (!di->next) { |
8969 | + /* Find the next non-empty hash bucket */ | |
10d56c87 AM |
8970 | + for (i = di->entry; i < di->ls->ls_rsbtbl_size; i++) { |
8971 | + read_lock(&di->ls->ls_rsbtbl[i].lock); | |
8972 | + if (!list_empty(&di->ls->ls_rsbtbl[i].list)) { | |
8973 | + di->next = di->ls->ls_rsbtbl[i].list.next; | |
8974 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); | |
8975 | + break; | |
8976 | + } | |
8977 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); | |
4bf12011 | 8978 | + } |
10d56c87 | 8979 | + di->entry = i; |
4bf12011 | 8980 | + |
10d56c87 AM |
8981 | + if (di->entry >= di->ls->ls_rsbtbl_size) |
8982 | + return NULL; /* End of hash list */ | |
4bf12011 | 8983 | + } else { /* Find the next entry in the list */ |
10d56c87 AM |
8984 | + i = di->entry; |
8985 | + read_lock(&di->ls->ls_rsbtbl[i].lock); | |
4bf12011 | 8986 | + di->next = di->next->next; |
10d56c87 | 8987 | + if (di->next->next == di->ls->ls_rsbtbl[i].list.next) { |
4bf12011 | 8988 | + /* End of list - move to next bucket */ |
8989 | + di->next = NULL; | |
8990 | + di->entry++; | |
10d56c87 | 8991 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); |
4bf12011 | 8992 | + return next_rsb(di); /* do the top half of this conditional */ |
8993 | + } | |
10d56c87 | 8994 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); |
4bf12011 | 8995 | + } |
10d56c87 | 8996 | + di->rsb = list_entry(di->next, struct dlm_rsb, res_hashchain); |
4bf12011 | 8997 | + |
8998 | + return di; | |
8999 | +} | |
9000 | + | |
9001 | +static void *s_start(struct seq_file *m, loff_t * pos) | |
9002 | +{ | |
9003 | + struct ls_dumpinfo *di; | |
10d56c87 | 9004 | + struct dlm_ls *ls; |
4bf12011 | 9005 | + int i; |
9006 | + | |
9007 | + ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name)); | |
9008 | + if (!ls) | |
9009 | + return NULL; | |
9010 | + | |
9011 | + di = kmalloc(sizeof(struct ls_dumpinfo), GFP_KERNEL); | |
9012 | + if (!di) | |
9013 | + return NULL; | |
9014 | + | |
9015 | + if (*pos == 0) | |
9016 | + seq_printf(m, "DLM lockspace '%s'\n", proc_ls_name); | |
9017 | + | |
9018 | + di->entry = 0; | |
9019 | + di->next = NULL; | |
9020 | + di->ls = ls; | |
9021 | + | |
9022 | + for (i = 0; i < *pos; i++) | |
9023 | + if (next_rsb(di) == NULL) | |
9024 | + return NULL; | |
9025 | + | |
9026 | + return next_rsb(di); | |
9027 | +} | |
9028 | + | |
9029 | +static void *s_next(struct seq_file *m, void *p, loff_t * pos) | |
9030 | +{ | |
9031 | + struct ls_dumpinfo *di = p; | |
9032 | + | |
9033 | + *pos += 1; | |
9034 | + | |
9035 | + return next_rsb(di); | |
9036 | +} | |
9037 | + | |
9038 | +static int s_show(struct seq_file *m, void *p) | |
9039 | +{ | |
9040 | + struct ls_dumpinfo *di = p; | |
9041 | + return print_resource(di->rsb, m); | |
9042 | +} | |
9043 | + | |
9044 | +static void s_stop(struct seq_file *m, void *p) | |
9045 | +{ | |
9046 | + kfree(p); | |
9047 | +} | |
9048 | + | |
9049 | +static struct seq_operations locks_info_op = { | |
9050 | + start:s_start, | |
9051 | + next:s_next, | |
9052 | + stop:s_stop, | |
9053 | + show:s_show | |
9054 | +}; | |
9055 | + | |
9056 | +static char *print_lockmode(int mode) | |
9057 | +{ | |
9058 | + switch (mode) { | |
9059 | + case DLM_LOCK_IV: | |
9060 | + return "--"; | |
9061 | + case DLM_LOCK_NL: | |
9062 | + return "NL"; | |
9063 | + case DLM_LOCK_CR: | |
9064 | + return "CR"; | |
9065 | + case DLM_LOCK_CW: | |
9066 | + return "CW"; | |
9067 | + case DLM_LOCK_PR: | |
9068 | + return "PR"; | |
9069 | + case DLM_LOCK_PW: | |
9070 | + return "PW"; | |
9071 | + case DLM_LOCK_EX: | |
9072 | + return "EX"; | |
9073 | + default: | |
9074 | + return "??"; | |
9075 | + } | |
9076 | +} | |
9077 | + | |
10d56c87 | 9078 | +static void print_lock(struct seq_file *s, struct dlm_lkb * lkb, struct dlm_rsb * res) |
4bf12011 | 9079 | +{ |
9080 | + | |
9081 | + seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); | |
9082 | + | |
9083 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9084 | + || lkb->lkb_status == GDLM_LKSTS_WAITING) | |
9085 | + seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode)); | |
9086 | + | |
9087 | + if (lkb->lkb_range) { | |
9088 | + /* This warns on Alpha. Tough. Only I see it */ | |
9089 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9090 | + || lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
9091 | + seq_printf(s, " %" PRIx64 "-%" PRIx64, | |
9092 | + lkb->lkb_range[GR_RANGE_START], | |
9093 | + lkb->lkb_range[GR_RANGE_END]); | |
9094 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9095 | + || lkb->lkb_status == GDLM_LKSTS_WAITING) | |
9096 | + seq_printf(s, " (%" PRIx64 "-%" PRIx64 ")", | |
9097 | + lkb->lkb_range[RQ_RANGE_START], | |
9098 | + lkb->lkb_range[RQ_RANGE_END]); | |
9099 | + } | |
9100 | + | |
9101 | + if (lkb->lkb_nodeid) { | |
9102 | + if (lkb->lkb_nodeid != res->res_nodeid) | |
9103 | + seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid, | |
9104 | + lkb->lkb_remid); | |
9105 | + else | |
9106 | + seq_printf(s, " Master: %08x", lkb->lkb_remid); | |
9107 | + } | |
9108 | + | |
9109 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
9110 | + seq_printf(s, " LQ: %d", lkb->lkb_lockqueue_state); | |
9111 | + | |
9112 | + seq_printf(s, "\n"); | |
9113 | +} | |
9114 | + | |
10d56c87 | 9115 | +static int print_resource(struct dlm_rsb *res, struct seq_file *s) |
4bf12011 | 9116 | +{ |
9117 | + int i; | |
9118 | + struct list_head *locklist; | |
9119 | + | |
9120 | + seq_printf(s, "\nResource %p (parent %p). Name (len=%d) \"", res, | |
9121 | + res->res_parent, res->res_length); | |
9122 | + for (i = 0; i < res->res_length; i++) { | |
9123 | + if (isprint(res->res_name[i])) | |
9124 | + seq_printf(s, "%c", res->res_name[i]); | |
9125 | + else | |
9126 | + seq_printf(s, "%c", '.'); | |
9127 | + } | |
9128 | + if (res->res_nodeid) | |
9129 | + seq_printf(s, "\" \nLocal Copy, Master is node %d\n", | |
9130 | + res->res_nodeid); | |
9131 | + else | |
9132 | + seq_printf(s, "\" \nMaster Copy\n"); | |
9133 | + | |
9134 | + /* Print the LVB: */ | |
9135 | + if (res->res_lvbptr) { | |
9136 | + seq_printf(s, "LVB: "); | |
9137 | + for (i = 0; i < DLM_LVB_LEN; i++) { | |
9138 | + if (i == DLM_LVB_LEN / 2) | |
9139 | + seq_printf(s, "\n "); | |
9140 | + seq_printf(s, "%02x ", | |
9141 | + (unsigned char) res->res_lvbptr[i]); | |
9142 | + } | |
9143 | + seq_printf(s, "\n"); | |
9144 | + } | |
9145 | + | |
9146 | + /* Print the locks attached to this resource */ | |
9147 | + seq_printf(s, "Granted Queue\n"); | |
9148 | + list_for_each(locklist, &res->res_grantqueue) { | |
10d56c87 AM |
9149 | + struct dlm_lkb *this_lkb = |
9150 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
4bf12011 | 9151 | + print_lock(s, this_lkb, res); |
9152 | + } | |
9153 | + | |
9154 | + seq_printf(s, "Conversion Queue\n"); | |
9155 | + list_for_each(locklist, &res->res_convertqueue) { | |
10d56c87 AM |
9156 | + struct dlm_lkb *this_lkb = |
9157 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
4bf12011 | 9158 | + print_lock(s, this_lkb, res); |
9159 | + } | |
9160 | + | |
9161 | + seq_printf(s, "Waiting Queue\n"); | |
9162 | + list_for_each(locklist, &res->res_waitqueue) { | |
10d56c87 AM |
9163 | + struct dlm_lkb *this_lkb = |
9164 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
4bf12011 | 9165 | + print_lock(s, this_lkb, res); |
9166 | + } | |
9167 | + return 0; | |
9168 | +} | |
9169 | +#endif /* CONFIG_CLUSTER_DLM_PROCLOCKS */ | |
9170 | + | |
10d56c87 | 9171 | +void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...) |
4bf12011 | 9172 | +{ |
9173 | + va_list va; | |
9174 | + int i, n, size, len; | |
9175 | + char buf[MAX_DEBUG_MSG_LEN+1]; | |
9176 | + | |
9177 | + spin_lock(&debug_lock); | |
9178 | + | |
9179 | + if (!debug_buf) | |
9180 | + goto out; | |
9181 | + | |
9182 | + size = MAX_DEBUG_MSG_LEN; | |
9183 | + memset(buf, 0, size+1); | |
9184 | + | |
9185 | + n = snprintf(buf, size, "%s ", ls->ls_name); | |
9186 | + size -= n; | |
9187 | + | |
9188 | + va_start(va, fmt); | |
9189 | + vsnprintf(buf+n, size, fmt, va); | |
9190 | + va_end(va); | |
9191 | + | |
9192 | + len = strlen(buf); | |
9193 | + if (len > MAX_DEBUG_MSG_LEN-1) | |
9194 | + len = MAX_DEBUG_MSG_LEN-1; | |
9195 | + buf[len] = '\n'; | |
9196 | + buf[len+1] = '\0'; | |
9197 | + | |
9198 | + for (i = 0; i < strlen(buf); i++) { | |
9199 | + debug_buf[debug_point++] = buf[i]; | |
9200 | + | |
9201 | + if (debug_point == debug_size) { | |
9202 | + debug_point = 0; | |
9203 | + debug_wrap = 1; | |
9204 | + } | |
9205 | + } | |
9206 | + out: | |
9207 | + spin_unlock(&debug_lock); | |
9208 | +} | |
9209 | + | |
9210 | +void dlm_debug_dump(void) | |
9211 | +{ | |
9212 | + int i; | |
9213 | + | |
9214 | + spin_lock(&debug_lock); | |
9215 | + if (debug_wrap) { | |
9216 | + for (i = debug_point; i < debug_size; i++) | |
9217 | + printk("%c", debug_buf[i]); | |
9218 | + } | |
9219 | + for (i = 0; i < debug_point; i++) | |
9220 | + printk("%c", debug_buf[i]); | |
9221 | + spin_unlock(&debug_lock); | |
9222 | +} | |
9223 | + | |
9224 | +void dlm_debug_setup(int size) | |
9225 | +{ | |
9226 | + char *b = NULL; | |
9227 | + | |
9228 | + if (size > PAGE_SIZE) | |
9229 | + size = PAGE_SIZE; | |
9230 | + if (size) | |
9231 | + b = kmalloc(size, GFP_KERNEL); | |
9232 | + | |
9233 | + spin_lock(&debug_lock); | |
9234 | + if (debug_buf) | |
9235 | + kfree(debug_buf); | |
9236 | + if (!size || !b) | |
9237 | + goto out; | |
9238 | + debug_size = size; | |
9239 | + debug_point = 0; | |
9240 | + debug_wrap = 0; | |
9241 | + debug_buf = b; | |
9242 | + memset(debug_buf, 0, debug_size); | |
9243 | + out: | |
9244 | + spin_unlock(&debug_lock); | |
9245 | +} | |
9246 | + | |
9247 | +static void dlm_debug_init(void) | |
9248 | +{ | |
9249 | + debug_buf = NULL; | |
9250 | + debug_size = 0; | |
9251 | + debug_point = 0; | |
9252 | + debug_wrap = 0; | |
9253 | + spin_lock_init(&debug_lock); | |
9254 | + | |
9255 | + dlm_debug_setup(DLM_DEBUG_SIZE); | |
9256 | +} | |
9257 | + | |
9258 | +#ifdef CONFIG_PROC_FS | |
9259 | +int dlm_debug_info(char *b, char **start, off_t offset, int length) | |
9260 | +{ | |
9261 | + int i, n = 0; | |
9262 | + | |
9263 | + spin_lock(&debug_lock); | |
9264 | + | |
9265 | + if (debug_wrap) { | |
9266 | + for (i = debug_point; i < debug_size; i++) | |
9267 | + n += sprintf(b + n, "%c", debug_buf[i]); | |
9268 | + } | |
9269 | + for (i = 0; i < debug_point; i++) | |
9270 | + n += sprintf(b + n, "%c", debug_buf[i]); | |
9271 | + | |
9272 | + spin_unlock(&debug_lock); | |
9273 | + | |
9274 | + return n; | |
9275 | +} | |
9276 | + | |
9277 | +int dlm_rcom_info(char *b, char **start, off_t offset, int length) | |
9278 | +{ | |
10d56c87 AM |
9279 | + struct dlm_ls *ls; |
9280 | + struct dlm_csb *csb; | |
4bf12011 | 9281 | + int n = 0; |
9282 | + | |
9283 | + ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name)); | |
9284 | + if (!ls) | |
9285 | + return 0; | |
9286 | + | |
9287 | + n += sprintf(b + n, "nodeid names_send_count names_send_msgid " | |
9288 | + "names_recv_count names_recv_msgid " | |
9289 | + "locks_send_count locks_send_msgid " | |
9290 | + "locks_recv_count locks_recv_msgid\n"); | |
9291 | + | |
10d56c87 | 9292 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
4bf12011 | 9293 | + n += sprintf(b + n, "%u %u %u %u %u %u %u %u %u\n", |
10d56c87 AM |
9294 | + csb->node->nodeid, |
9295 | + csb->names_send_count, | |
9296 | + csb->names_send_msgid, | |
9297 | + csb->names_recv_count, | |
9298 | + csb->names_recv_msgid, | |
9299 | + csb->locks_send_count, | |
9300 | + csb->locks_send_msgid, | |
9301 | + csb->locks_recv_count, | |
9302 | + csb->locks_recv_msgid); | |
4bf12011 | 9303 | + } |
9304 | + return n; | |
9305 | +} | |
9306 | +#endif | |
9307 | + | |
9308 | +void dlm_proc_init(void) | |
9309 | +{ | |
9310 | +#ifdef CONFIG_PROC_FS | |
9311 | + debug_proc_entry = create_proc_entry("cluster/dlm_debug", S_IRUGO, | |
9312 | + NULL); | |
9313 | + if (!debug_proc_entry) | |
9314 | + return; | |
9315 | + | |
9316 | + debug_proc_entry->get_info = &dlm_debug_info; | |
9317 | + | |
9318 | + rcom_proc_entry = create_proc_entry("cluster/dlm_rcom", S_IRUGO, NULL); | |
9319 | + if (!rcom_proc_entry) | |
9320 | + return; | |
9321 | + | |
9322 | + rcom_proc_entry->get_info = &dlm_rcom_info; | |
9323 | +#endif | |
9324 | + dlm_debug_init(); | |
9325 | + | |
9326 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
9327 | + locks_proc_entry = create_proc_read_entry("cluster/dlm_locks", | |
9328 | + S_IFREG | 0400, | |
9329 | + NULL, NULL, NULL); | |
9330 | + if (!locks_proc_entry) | |
9331 | + return; | |
9332 | + locks_proc_entry->proc_fops = &locks_fops; | |
9333 | +#endif | |
9334 | +} | |
9335 | + | |
9336 | +void dlm_proc_exit(void) | |
9337 | +{ | |
9338 | +#ifdef CONFIG_PROC_FS | |
9339 | + if (debug_proc_entry) { | |
9340 | + remove_proc_entry("cluster/dlm_debug", NULL); | |
9341 | + dlm_debug_setup(0); | |
9342 | + } | |
9343 | + | |
9344 | + if (rcom_proc_entry) | |
9345 | + remove_proc_entry("cluster/dlm_rcom", NULL); | |
9346 | +#endif | |
9347 | + | |
9348 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
9349 | + if (locks_proc_entry) | |
9350 | + remove_proc_entry("cluster/dlm_locks", NULL); | |
9351 | +#endif | |
9352 | +} | |
9353 | diff -urN linux-orig/cluster/dlm/queries.c linux-patched/cluster/dlm/queries.c | |
9354 | --- linux-orig/cluster/dlm/queries.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 9355 | +++ linux-patched/cluster/dlm/queries.c 2004-07-13 18:57:22.000000000 +0800 |
5cdbd17b | 9356 | @@ -0,0 +1,696 @@ |
4bf12011 | 9357 | +/****************************************************************************** |
9358 | +******************************************************************************* | |
9359 | +** | |
9360 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
9361 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
9362 | +** | |
9363 | +** This copyrighted material is made available to anyone wishing to use, | |
9364 | +** modify, copy, or redistribute it subject to the terms and conditions | |
9365 | +** of the GNU General Public License v.2. | |
9366 | +** | |
9367 | +******************************************************************************* | |
9368 | +******************************************************************************/ | |
9369 | + | |
9370 | +/* | |
9371 | + * queries.c | |
9372 | + * | |
9373 | + * This file provides the kernel query interface to the DLM. | |
9374 | + * | |
9375 | + */ | |
9376 | + | |
9377 | +#define EXPORT_SYMTAB | |
9378 | +#include <linux/module.h> | |
9379 | + | |
9380 | +#include "dlm_internal.h" | |
5cdbd17b | 9381 | +#include "lockspace.h" |
4bf12011 | 9382 | +#include "lockqueue.h" |
9383 | +#include "locking.h" | |
9384 | +#include "lkb.h" | |
9385 | +#include "nodes.h" | |
9386 | +#include "dir.h" | |
9387 | +#include "ast.h" | |
9388 | +#include "memory.h" | |
9389 | +#include "lowcomms.h" | |
9390 | +#include "midcomms.h" | |
9391 | +#include "rsb.h" | |
9392 | + | |
10d56c87 AM |
9393 | +static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo); |
9394 | +static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo); | |
4bf12011 | 9395 | + |
9396 | +/* | |
9397 | + * API entry point. | |
9398 | + */ | |
9399 | +int dlm_query(void *lockspace, | |
9400 | + struct dlm_lksb *lksb, | |
9401 | + int query, | |
9402 | + struct dlm_queryinfo *qinfo, | |
9403 | + void (ast_routine(void *)), | |
9404 | + void *astarg) | |
9405 | +{ | |
9406 | + int status = -EINVAL; | |
10d56c87 AM |
9407 | + struct dlm_lkb *target_lkb; |
9408 | + struct dlm_lkb *query_lkb = NULL; /* Our temporary LKB */ | |
9409 | + struct dlm_ls *ls = (struct dlm_ls *) find_lockspace_by_local_id(lockspace); | |
4bf12011 | 9410 | + |
9411 | + | |
9412 | + if (!qinfo) | |
9413 | + goto out; | |
9414 | + if (!ls) | |
9415 | + goto out; | |
9416 | + if (!ast_routine) | |
9417 | + goto out; | |
9418 | + if (!lksb) | |
9419 | + goto out; | |
9420 | + | |
9421 | + if (!qinfo->gqi_lockinfo) | |
9422 | + qinfo->gqi_locksize = 0; | |
9423 | + | |
9424 | + /* Find the lkid */ | |
9425 | + target_lkb = find_lock_by_id(ls, lksb->sb_lkid); | |
9426 | + if (!target_lkb) | |
9427 | + goto out; | |
9428 | + | |
9429 | + /* If the user wants a list of locks that are blocking or | |
9430 | + not blocking this lock, then it must be waiting | |
9431 | + for something | |
9432 | + */ | |
9433 | + if (((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING || | |
9434 | + (query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK) && | |
9435 | + target_lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
9436 | + return -EINVAL; | |
9437 | + | |
9438 | + /* We now allocate an LKB for our own use (so we can hang | |
9439 | + * things like the AST routine and the lksb from it) */ | |
9440 | + lksb->sb_status = -EBUSY; | |
9441 | + query_lkb = create_lkb(ls); | |
9442 | + if (!query_lkb) { | |
9443 | + status = -ENOMEM; | |
9444 | + goto out; | |
9445 | + } | |
9446 | + query_lkb->lkb_astaddr = ast_routine; | |
9447 | + query_lkb->lkb_astparam = (long)astarg; | |
9448 | + query_lkb->lkb_resource = target_lkb->lkb_resource; | |
9449 | + query_lkb->lkb_lksb = lksb; | |
9450 | + | |
9451 | + /* Don't free the resource while we are querying it. This ref | |
9452 | + * will be dropped when the LKB is freed */ | |
9453 | + hold_rsb(query_lkb->lkb_resource); | |
9454 | + | |
9455 | + /* Fill in the stuff that's always local */ | |
9456 | + if (qinfo->gqi_resinfo) { | |
9457 | + if (target_lkb->lkb_resource->res_nodeid) | |
9458 | + qinfo->gqi_resinfo->rsi_masternode = | |
9459 | + target_lkb->lkb_resource->res_nodeid; | |
9460 | + else | |
9461 | + qinfo->gqi_resinfo->rsi_masternode = our_nodeid(); | |
9462 | + qinfo->gqi_resinfo->rsi_length = | |
9463 | + target_lkb->lkb_resource->res_length; | |
9464 | + memcpy(qinfo->gqi_resinfo->rsi_name, | |
9465 | + target_lkb->lkb_resource->res_name, | |
9466 | + qinfo->gqi_resinfo->rsi_length); | |
9467 | + } | |
9468 | + | |
9469 | + /* If the master is local (or the user doesn't want the overhead of a | |
9470 | + * remote call) - fill in the details here */ | |
9471 | + if (target_lkb->lkb_resource->res_nodeid == 0 || | |
9472 | + (query & DLM_QUERY_LOCAL)) { | |
9473 | + | |
9474 | + status = 0; | |
9475 | + /* Resource info */ | |
9476 | + if (qinfo->gqi_resinfo) { | |
9477 | + query_resource(target_lkb->lkb_resource, | |
9478 | + qinfo->gqi_resinfo); | |
9479 | + } | |
9480 | + | |
9481 | + /* Lock lists */ | |
9482 | + if (qinfo->gqi_lockinfo) { | |
9483 | + status = query_locks(query, target_lkb, qinfo); | |
9484 | + } | |
9485 | + | |
9486 | + query_lkb->lkb_retstatus = status; | |
5cdbd17b | 9487 | + queue_ast(query_lkb, AST_COMP | AST_DEL, 0); |
4bf12011 | 9488 | + wake_astd(); |
9489 | + | |
9490 | + /* An AST will be delivered so we must return success here */ | |
9491 | + status = 0; | |
9492 | + goto out; | |
9493 | + } | |
9494 | + | |
9495 | + /* Remote master */ | |
9496 | + if (target_lkb->lkb_resource->res_nodeid != 0) | |
9497 | + { | |
10d56c87 | 9498 | + struct dlm_query_request *remquery; |
4bf12011 | 9499 | + struct writequeue_entry *e; |
9500 | + | |
9501 | + /* Clear this cos the receiving end adds to it with | |
9502 | + each incoming packet */ | |
9503 | + qinfo->gqi_lockcount = 0; | |
9504 | + | |
9505 | + /* Squirrel a pointer to the query info struct | |
9506 | + somewhere illegal */ | |
10d56c87 | 9507 | + query_lkb->lkb_request = (struct dlm_request *) qinfo; |
4bf12011 | 9508 | + |
9509 | + e = lowcomms_get_buffer(query_lkb->lkb_resource->res_nodeid, | |
10d56c87 | 9510 | + sizeof(struct dlm_query_request), |
4bf12011 | 9511 | + ls->ls_allocation, |
9512 | + (char **) &remquery); | |
9513 | + if (!e) { | |
9514 | + status = -ENOBUFS; | |
9515 | + goto out; | |
9516 | + } | |
9517 | + | |
9518 | + /* Build remote packet */ | |
10d56c87 | 9519 | + memset(remquery, 0, sizeof(struct dlm_query_request)); |
4bf12011 | 9520 | + |
9521 | + remquery->rq_maxlocks = qinfo->gqi_locksize; | |
9522 | + remquery->rq_query = query; | |
9523 | + remquery->rq_mstlkid = target_lkb->lkb_remid; | |
9524 | + if (qinfo->gqi_lockinfo) | |
9525 | + remquery->rq_maxlocks = qinfo->gqi_locksize; | |
9526 | + | |
9527 | + remquery->rq_header.rh_cmd = GDLM_REMCMD_QUERY; | |
9528 | + remquery->rq_header.rh_flags = 0; | |
10d56c87 | 9529 | + remquery->rq_header.rh_length = sizeof(struct dlm_query_request); |
4bf12011 | 9530 | + remquery->rq_header.rh_lkid = query_lkb->lkb_id; |
9531 | + remquery->rq_header.rh_lockspace = ls->ls_global_id; | |
9532 | + | |
9533 | + midcomms_send_buffer(&remquery->rq_header, e); | |
9534 | + status = 0; | |
9535 | + } | |
9536 | + | |
9537 | + out: | |
9538 | + | |
9539 | + return status; | |
9540 | +} | |
9541 | + | |
9542 | +static inline int valid_range(struct dlm_range *r) | |
9543 | +{ | |
9544 | + if (r->ra_start != 0ULL || | |
9545 | + r->ra_end != 0xFFFFFFFFFFFFFFFFULL) | |
9546 | + return 1; | |
9547 | + else | |
9548 | + return 0; | |
9549 | +} | |
9550 | + | |
9551 | +static void put_int(int x, char *buf, int *offp) | |
9552 | +{ | |
9553 | + x = cpu_to_le32(x); | |
9554 | + memcpy(buf + *offp, &x, sizeof(int)); | |
9555 | + *offp += sizeof(int); | |
9556 | +} | |
9557 | + | |
9558 | +static void put_int64(uint64_t x, char *buf, int *offp) | |
9559 | +{ | |
9560 | + x = cpu_to_le64(x); | |
9561 | + memcpy(buf + *offp, &x, sizeof(uint64_t)); | |
9562 | + *offp += sizeof(uint64_t); | |
9563 | +} | |
9564 | + | |
9565 | +static int get_int(char *buf, int *offp) | |
9566 | +{ | |
9567 | + int value; | |
9568 | + memcpy(&value, buf + *offp, sizeof(int)); | |
9569 | + *offp += sizeof(int); | |
9570 | + return le32_to_cpu(value); | |
9571 | +} | |
9572 | + | |
9573 | +static uint64_t get_int64(char *buf, int *offp) | |
9574 | +{ | |
9575 | + uint64_t value; | |
9576 | + | |
9577 | + memcpy(&value, buf + *offp, sizeof(uint64_t)); | |
9578 | + *offp += sizeof(uint64_t); | |
9579 | + return le64_to_cpu(value); | |
9580 | +} | |
9581 | + | |
9582 | +#define LOCK_LEN (sizeof(int)*4 + sizeof(uint8_t)*4) | |
9583 | + | |
9584 | +/* Called from recvd to get lock info for a remote node */ | |
10d56c87 | 9585 | +int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg) |
4bf12011 | 9586 | +{ |
10d56c87 AM |
9587 | + struct dlm_query_request *query = (struct dlm_query_request *) msg; |
9588 | + struct dlm_query_reply *reply; | |
4bf12011 | 9589 | + struct dlm_resinfo resinfo; |
9590 | + struct dlm_queryinfo qinfo; | |
9591 | + struct writequeue_entry *e; | |
9592 | + char *buf; | |
10d56c87 | 9593 | + struct dlm_lkb *lkb; |
4bf12011 | 9594 | + int status = 0; |
9595 | + int bufidx; | |
9596 | + int finished = 0; | |
9597 | + int cur_lock = 0; | |
9598 | + int start_lock = 0; | |
9599 | + | |
9600 | + lkb = find_lock_by_id(ls, query->rq_mstlkid); | |
9601 | + if (!lkb) { | |
9602 | + status = -EINVAL; | |
9603 | + goto send_error; | |
9604 | + } | |
9605 | + | |
9606 | + qinfo.gqi_resinfo = &resinfo; | |
9607 | + qinfo.gqi_locksize = query->rq_maxlocks; | |
9608 | + | |
9609 | + /* Get the resource bits */ | |
9610 | + query_resource(lkb->lkb_resource, &resinfo); | |
9611 | + | |
9612 | + /* Now get the locks if wanted */ | |
9613 | + if (query->rq_maxlocks) { | |
9614 | + qinfo.gqi_lockinfo = kmalloc(sizeof(struct dlm_lockinfo) * query->rq_maxlocks, | |
9615 | + GFP_KERNEL); | |
9616 | + if (!qinfo.gqi_lockinfo) { | |
9617 | + status = -ENOMEM; | |
9618 | + goto send_error; | |
9619 | + } | |
9620 | + | |
9621 | + status = query_locks(query->rq_query, lkb, &qinfo); | |
9622 | + if (status && status != -E2BIG) { | |
9623 | + kfree(qinfo.gqi_lockinfo); | |
9624 | + goto send_error; | |
9625 | + } | |
9626 | + } | |
9627 | + else { | |
9628 | + qinfo.gqi_lockinfo = NULL; | |
9629 | + qinfo.gqi_lockcount = 0; | |
9630 | + } | |
9631 | + | |
9632 | + /* Send as many blocks as needed for all the locks */ | |
9633 | + do { | |
9634 | + int i; | |
10d56c87 | 9635 | + int msg_len = sizeof(struct dlm_query_reply); |
4bf12011 | 9636 | + int last_msg_len = msg_len; /* keeps compiler quiet */ |
9637 | + int last_lock; | |
9638 | + | |
9639 | + /* First work out how many locks we can fit into a block */ | |
9640 | + for (i=cur_lock; i < qinfo.gqi_lockcount && msg_len < PAGE_SIZE; i++) { | |
9641 | + | |
9642 | + last_msg_len = msg_len; | |
9643 | + | |
9644 | + msg_len += LOCK_LEN; | |
9645 | + if (valid_range(&qinfo.gqi_lockinfo[i].lki_grrange) || | |
9646 | + valid_range(&qinfo.gqi_lockinfo[i].lki_rqrange)) { | |
9647 | + | |
9648 | + msg_len += sizeof(uint64_t) * 4; | |
9649 | + } | |
9650 | + } | |
9651 | + | |
9652 | + /* There must be a neater way of doing this... */ | |
9653 | + if (msg_len > PAGE_SIZE) { | |
9654 | + last_lock = i-1; | |
9655 | + msg_len = last_msg_len; | |
9656 | + } | |
9657 | + else { | |
9658 | + last_lock = i; | |
9659 | + } | |
9660 | + | |
9661 | + e = lowcomms_get_buffer(nodeid, | |
9662 | + msg_len, | |
9663 | + ls->ls_allocation, | |
9664 | + (char **) &reply); | |
9665 | + if (!e) { | |
9666 | + kfree(qinfo.gqi_lockinfo); | |
9667 | + status = -ENOBUFS; | |
9668 | + goto out; | |
9669 | + } | |
9670 | + | |
9671 | + reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY; | |
9672 | + reply->rq_header.rh_length = msg_len; | |
9673 | + reply->rq_header.rh_lkid = msg->rh_lkid; | |
9674 | + reply->rq_header.rh_lockspace = msg->rh_lockspace; | |
9675 | + | |
9676 | + reply->rq_status = status; | |
9677 | + reply->rq_startlock = cur_lock; | |
9678 | + reply->rq_grantcount = qinfo.gqi_resinfo->rsi_grantcount; | |
9679 | + reply->rq_convcount = qinfo.gqi_resinfo->rsi_convcount; | |
9680 | + reply->rq_waitcount = qinfo.gqi_resinfo->rsi_waitcount; | |
9681 | + memcpy(reply->rq_valblk, qinfo.gqi_resinfo->rsi_valblk, DLM_LVB_LEN); | |
9682 | + | |
9683 | + buf = (char *)reply; | |
10d56c87 | 9684 | + bufidx = sizeof(struct dlm_query_reply); |
4bf12011 | 9685 | + |
9686 | + for (; cur_lock < last_lock; cur_lock++) { | |
9687 | + | |
9688 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_state; | |
9689 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_grmode; | |
9690 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_rqmode; | |
9691 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_lkid, buf, &bufidx); | |
9692 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_mstlkid, buf, &bufidx); | |
9693 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_parent, buf, &bufidx); | |
9694 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_node, buf, &bufidx); | |
9695 | + | |
9696 | + if (valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_grrange) || | |
9697 | + valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_rqrange)) { | |
9698 | + | |
9699 | + buf[bufidx++] = 1; | |
9700 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_start, buf, &bufidx); | |
9701 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_end, buf, &bufidx); | |
9702 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_start, buf, &bufidx); | |
9703 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_end, buf, &bufidx); | |
9704 | + } | |
9705 | + else { | |
9706 | + buf[bufidx++] = 0; | |
9707 | + } | |
9708 | + } | |
9709 | + | |
9710 | + if (cur_lock == qinfo.gqi_lockcount) { | |
9711 | + reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY; | |
9712 | + finished = 1; | |
9713 | + } | |
9714 | + else { | |
9715 | + reply->rq_header.rh_flags = 0; | |
9716 | + } | |
9717 | + | |
9718 | + reply->rq_numlocks = cur_lock - start_lock; | |
9719 | + start_lock = cur_lock; | |
9720 | + | |
9721 | + midcomms_send_buffer(&reply->rq_header, e); | |
9722 | + } while (!finished); | |
9723 | + | |
9724 | + kfree(qinfo.gqi_lockinfo); | |
9725 | + out: | |
9726 | + return status; | |
9727 | + | |
9728 | + send_error: | |
9729 | + e = lowcomms_get_buffer(nodeid, | |
10d56c87 | 9730 | + sizeof(struct dlm_query_reply), |
4bf12011 | 9731 | + ls->ls_allocation, |
9732 | + (char **) &reply); | |
9733 | + if (!e) { | |
9734 | + status = -ENOBUFS; | |
9735 | + goto out; | |
9736 | + } | |
9737 | + reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY; | |
9738 | + reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY; /* Don't support multiple blocks yet */ | |
10d56c87 | 9739 | + reply->rq_header.rh_length = sizeof(struct dlm_query_reply); |
4bf12011 | 9740 | + reply->rq_header.rh_lkid = msg->rh_lkid; |
9741 | + reply->rq_header.rh_lockspace = msg->rh_lockspace; | |
9742 | + reply->rq_status = status; | |
9743 | + reply->rq_numlocks = 0; | |
9744 | + reply->rq_startlock = 0; | |
9745 | + reply->rq_grantcount = 0; | |
9746 | + reply->rq_convcount = 0; | |
9747 | + reply->rq_waitcount = 0; | |
9748 | + | |
9749 | + midcomms_send_buffer(&reply->rq_header, e); | |
9750 | + | |
9751 | + return status; | |
9752 | +} | |
9753 | + | |
9754 | +/* Reply to a remote query */ | |
10d56c87 | 9755 | +int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg) |
4bf12011 | 9756 | +{ |
10d56c87 | 9757 | + struct dlm_lkb *query_lkb; |
4bf12011 | 9758 | + struct dlm_queryinfo *qinfo; |
10d56c87 | 9759 | + struct dlm_query_reply *reply; |
4bf12011 | 9760 | + char *buf; |
9761 | + int i; | |
9762 | + int bufidx; | |
9763 | + | |
9764 | + query_lkb = find_lock_by_id(ls, msg->rh_lkid); | |
9765 | + if (!query_lkb) | |
9766 | + return -EINVAL; | |
9767 | + | |
9768 | + qinfo = (struct dlm_queryinfo *) query_lkb->lkb_request; | |
10d56c87 | 9769 | + reply = (struct dlm_query_reply *) msg; |
4bf12011 | 9770 | + |
9771 | + /* Copy the easy bits first */ | |
9772 | + qinfo->gqi_lockcount += reply->rq_numlocks; | |
9773 | + if (qinfo->gqi_resinfo) { | |
9774 | + qinfo->gqi_resinfo->rsi_grantcount = reply->rq_grantcount; | |
9775 | + qinfo->gqi_resinfo->rsi_convcount = reply->rq_convcount; | |
9776 | + qinfo->gqi_resinfo->rsi_waitcount = reply->rq_waitcount; | |
9777 | + memcpy(qinfo->gqi_resinfo->rsi_valblk, reply->rq_valblk, | |
9778 | + DLM_LVB_LEN); | |
9779 | + } | |
9780 | + | |
9781 | + /* Now unpack the locks */ | |
10d56c87 | 9782 | + bufidx = sizeof(struct dlm_query_reply); |
4bf12011 | 9783 | + buf = (char *) msg; |
9784 | + | |
10d56c87 | 9785 | + DLM_ASSERT(reply->rq_startlock + reply->rq_numlocks <= qinfo->gqi_locksize, |
4bf12011 | 9786 | + printk("start = %d, num + %d. Max= %d\n", |
9787 | + reply->rq_startlock, reply->rq_numlocks, qinfo->gqi_locksize);); | |
9788 | + | |
9789 | + for (i = reply->rq_startlock; | |
9790 | + i < reply->rq_startlock + reply->rq_numlocks; i++) { | |
9791 | + qinfo->gqi_lockinfo[i].lki_state = buf[bufidx++]; | |
9792 | + qinfo->gqi_lockinfo[i].lki_grmode = buf[bufidx++]; | |
9793 | + qinfo->gqi_lockinfo[i].lki_rqmode = buf[bufidx++]; | |
9794 | + qinfo->gqi_lockinfo[i].lki_lkid = get_int(buf, &bufidx); | |
9795 | + qinfo->gqi_lockinfo[i].lki_mstlkid = get_int(buf, &bufidx); | |
9796 | + qinfo->gqi_lockinfo[i].lki_parent = get_int(buf, &bufidx); | |
9797 | + qinfo->gqi_lockinfo[i].lki_node = get_int(buf, &bufidx); | |
9798 | + if (buf[bufidx++]) { | |
9799 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_start = get_int64(buf, &bufidx); | |
9800 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_end = get_int64(buf, &bufidx); | |
9801 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = get_int64(buf, &bufidx); | |
9802 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = get_int64(buf, &bufidx); | |
9803 | + } | |
9804 | + else { | |
9805 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_start = 0ULL; | |
9806 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_end = 0xFFFFFFFFFFFFFFFFULL; | |
9807 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = 0ULL; | |
9808 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = 0xFFFFFFFFFFFFFFFFULL; | |
9809 | + } | |
9810 | + } | |
9811 | + | |
9812 | + /* If this was the last block then now tell the user */ | |
9813 | + if (msg->rh_flags & GDLM_REMFLAG_ENDQUERY) { | |
9814 | + query_lkb->lkb_retstatus = reply->rq_status; | |
5cdbd17b | 9815 | + queue_ast(query_lkb, AST_COMP | AST_DEL, 0); |
4bf12011 | 9816 | + wake_astd(); |
9817 | + } | |
9818 | + | |
9819 | + return 0; | |
9820 | +} | |
9821 | + | |
9822 | +/* Aggregate resource information */ | |
10d56c87 | 9823 | +static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo) |
4bf12011 | 9824 | +{ |
9825 | + struct list_head *tmp; | |
9826 | + | |
9827 | + | |
9828 | + if (rsb->res_lvbptr) | |
9829 | + memcpy(resinfo->rsi_valblk, rsb->res_lvbptr, DLM_LVB_LEN); | |
9830 | + | |
9831 | + resinfo->rsi_grantcount = 0; | |
9832 | + list_for_each(tmp, &rsb->res_grantqueue) { | |
9833 | + resinfo->rsi_grantcount++; | |
9834 | + } | |
9835 | + | |
9836 | + resinfo->rsi_waitcount = 0; | |
9837 | + list_for_each(tmp, &rsb->res_waitqueue) { | |
9838 | + resinfo->rsi_waitcount++; | |
9839 | + } | |
9840 | + | |
9841 | + resinfo->rsi_convcount = 0; | |
9842 | + list_for_each(tmp, &rsb->res_convertqueue) { | |
9843 | + resinfo->rsi_convcount++; | |
9844 | + } | |
9845 | + | |
9846 | + return 0; | |
9847 | +} | |
9848 | + | |
10d56c87 | 9849 | +static int add_lock(struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo) |
4bf12011 | 9850 | +{ |
9851 | + int entry; | |
9852 | + | |
9853 | + /* Don't fill it in if the buffer is full */ | |
9854 | + if (qinfo->gqi_lockcount == qinfo->gqi_locksize) | |
9855 | + return -E2BIG; | |
9856 | + | |
9857 | + /* gqi_lockcount contains the number of locks we have returned */ | |
9858 | + entry = qinfo->gqi_lockcount++; | |
9859 | + | |
9860 | + /* Fun with master copies */ | |
9861 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
9862 | + qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_remid; | |
9863 | + qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_id; | |
9864 | + } | |
9865 | + else { | |
9866 | + qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_id; | |
9867 | + qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_remid; | |
9868 | + } | |
9869 | + | |
9870 | + /* Also make sure we always have a valid nodeid in there, the | |
9871 | + calling end may not know which node "0" is */ | |
9872 | + if (lkb->lkb_nodeid) | |
9873 | + qinfo->gqi_lockinfo[entry].lki_node = lkb->lkb_nodeid; | |
9874 | + else | |
9875 | + qinfo->gqi_lockinfo[entry].lki_node = our_nodeid(); | |
9876 | + | |
9877 | + if (lkb->lkb_parent) | |
9878 | + qinfo->gqi_lockinfo[entry].lki_parent = lkb->lkb_parent->lkb_id; | |
9879 | + else | |
9880 | + qinfo->gqi_lockinfo[entry].lki_parent = 0; | |
9881 | + | |
9882 | + qinfo->gqi_lockinfo[entry].lki_state = lkb->lkb_status; | |
9883 | + qinfo->gqi_lockinfo[entry].lki_rqmode = lkb->lkb_rqmode; | |
9884 | + qinfo->gqi_lockinfo[entry].lki_grmode = lkb->lkb_grmode; | |
9885 | + | |
9886 | + if (lkb->lkb_range) { | |
9887 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = | |
9888 | + lkb->lkb_range[GR_RANGE_START]; | |
9889 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_end = | |
9890 | + lkb->lkb_range[GR_RANGE_END]; | |
9891 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = | |
9892 | + lkb->lkb_range[RQ_RANGE_START]; | |
9893 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_end = | |
9894 | + lkb->lkb_range[RQ_RANGE_END]; | |
9895 | + } else { | |
9896 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0ULL; | |
9897 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0xffffffffffffffffULL; | |
9898 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0ULL; | |
9899 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0xffffffffffffffffULL; | |
9900 | + } | |
9901 | + return 0; | |
9902 | +} | |
9903 | + | |
9904 | +static int query_lkb_queue(struct list_head *queue, int query, | |
9905 | + struct dlm_queryinfo *qinfo) | |
9906 | +{ | |
9907 | + struct list_head *tmp; | |
9908 | + int status = 0; | |
9909 | + int mode = query & DLM_QUERY_MODE_MASK; | |
9910 | + | |
9911 | + list_for_each(tmp, queue) { | |
10d56c87 | 9912 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 9913 | + int lkmode; |
9914 | + | |
9915 | + if (query & DLM_QUERY_RQMODE) | |
9916 | + lkmode = lkb->lkb_rqmode; | |
9917 | + else | |
9918 | + lkmode = lkb->lkb_grmode; | |
9919 | + | |
9920 | + /* Add the LKB info to the list if it matches the criteria in | |
9921 | + * the query bitmap */ | |
9922 | + switch (query & DLM_QUERY_MASK) { | |
9923 | + case DLM_QUERY_LOCKS_ALL: | |
9924 | + status = add_lock(lkb, qinfo); | |
9925 | + break; | |
9926 | + | |
9927 | + case DLM_QUERY_LOCKS_HIGHER: | |
9928 | + if (lkmode > mode) | |
9929 | + status = add_lock(lkb, qinfo); | |
9930 | + break; | |
9931 | + | |
9932 | + case DLM_QUERY_LOCKS_EQUAL: | |
9933 | + if (lkmode == mode) | |
9934 | + status = add_lock(lkb, qinfo); | |
9935 | + break; | |
9936 | + | |
9937 | + case DLM_QUERY_LOCKS_LOWER: | |
9938 | + if (lkmode < mode) | |
9939 | + status = add_lock(lkb, qinfo); | |
9940 | + break; | |
9941 | + } | |
9942 | + } | |
9943 | + return status; | |
9944 | +} | |
9945 | + | |
9946 | +/* | |
9947 | + * Return 1 if the locks' ranges overlap | |
9948 | + * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff | |
9949 | + */ | |
10d56c87 | 9950 | +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) |
4bf12011 | 9951 | +{ |
9952 | + if (!lkb1->lkb_range || !lkb2->lkb_range) | |
9953 | + return 1; | |
9954 | + | |
9955 | + if (lkb1->lkb_range[RQ_RANGE_END] <= lkb2->lkb_range[GR_RANGE_START] || | |
9956 | + lkb1->lkb_range[RQ_RANGE_START] >= lkb2->lkb_range[GR_RANGE_END]) | |
9957 | + return 0; | |
9958 | + | |
9959 | + return 1; | |
9960 | +} | |
9961 | +extern const int __dlm_compat_matrix[8][8]; | |
9962 | + | |
9963 | + | |
10d56c87 | 9964 | +static int get_blocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo) |
4bf12011 | 9965 | +{ |
9966 | + struct list_head *tmp; | |
9967 | + int status = 0; | |
9968 | + | |
9969 | + list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) { | |
10d56c87 | 9970 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 9971 | + |
9972 | + if (ranges_overlap(lkb, qlkb) && | |
9973 | + !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1]) | |
9974 | + status = add_lock(lkb, qinfo); | |
9975 | + } | |
9976 | + | |
9977 | + return status; | |
9978 | +} | |
9979 | + | |
10d56c87 | 9980 | +static int get_nonblocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo) |
4bf12011 | 9981 | +{ |
9982 | + struct list_head *tmp; | |
9983 | + int status = 0; | |
9984 | + | |
9985 | + list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) { | |
10d56c87 | 9986 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 9987 | + |
9988 | + if (!(ranges_overlap(lkb, qlkb) && | |
9989 | + !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1])) | |
9990 | + status = add_lock(lkb, qinfo); | |
9991 | + } | |
9992 | + | |
9993 | + return status; | |
9994 | +} | |
9995 | + | |
9996 | +/* Gather a list of appropriate locks */ | |
10d56c87 | 9997 | +static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo) |
4bf12011 | 9998 | +{ |
9999 | + int status = 0; | |
10000 | + | |
10001 | + | |
10002 | + /* Mask in the actual granted/requsted mode of the lock if LOCK_THIS | |
10003 | + * was requested as the mode | |
10004 | + */ | |
10005 | + if ((query & DLM_QUERY_MODE_MASK) == DLM_LOCK_THIS) { | |
10006 | + query &= ~DLM_QUERY_MODE_MASK; | |
10007 | + if (query & DLM_QUERY_RQMODE) | |
10008 | + query |= lkb->lkb_rqmode; | |
10009 | + else | |
10010 | + query |= lkb->lkb_grmode; | |
10011 | + } | |
10012 | + | |
10013 | + qinfo->gqi_lockcount = 0; | |
10014 | + | |
10015 | + /* BLOCKING/NOTBLOCK only look at the granted queue */ | |
10016 | + if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING) | |
10017 | + return get_blocking_locks(lkb, qinfo); | |
10018 | + | |
10019 | + if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK) | |
10020 | + return get_nonblocking_locks(lkb, qinfo); | |
10021 | + | |
10022 | + /* Do the lock queues that were requested */ | |
10023 | + if (query & DLM_QUERY_QUEUE_GRANT) { | |
10024 | + status = query_lkb_queue(&lkb->lkb_resource->res_grantqueue, | |
10025 | + query, qinfo); | |
10026 | + } | |
10027 | + | |
10028 | + if (!status && (query & DLM_QUERY_QUEUE_CONVERT)) { | |
10029 | + status = query_lkb_queue(&lkb->lkb_resource->res_convertqueue, | |
10030 | + query, qinfo); | |
10031 | + } | |
10032 | + | |
10033 | + if (!status && (query & DLM_QUERY_QUEUE_WAIT)) { | |
10034 | + status = query_lkb_queue(&lkb->lkb_resource->res_waitqueue, | |
10035 | + query, qinfo); | |
10036 | + } | |
10037 | + | |
10038 | + | |
10039 | + return status; | |
10040 | +} | |
10041 | + | |
10042 | +EXPORT_SYMBOL(dlm_query); | |
10043 | +/* | |
10044 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
10045 | + * Emacs will notice this stuff at the end of the file and automatically | |
10046 | + * adjust the settings for this buffer only. This must remain at the end | |
10047 | + * of the file. | |
10048 | + * --------------------------------------------------------------------------- | |
10049 | + * Local variables: | |
10050 | + * c-file-style: "linux" | |
10051 | + * End: | |
10052 | + */ | |
10053 | diff -urN linux-orig/cluster/dlm/queries.h linux-patched/cluster/dlm/queries.h | |
10054 | --- linux-orig/cluster/dlm/queries.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 10055 | +++ linux-patched/cluster/dlm/queries.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 10056 | @@ -0,0 +1,20 @@ |
10057 | +/****************************************************************************** | |
10058 | +******************************************************************************* | |
10059 | +** | |
10060 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
10061 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
10062 | +** | |
10063 | +** This copyrighted material is made available to anyone wishing to use, | |
10064 | +** modify, copy, or redistribute it subject to the terms and conditions | |
10065 | +** of the GNU General Public License v.2. | |
10066 | +** | |
10067 | +******************************************************************************* | |
10068 | +******************************************************************************/ | |
10069 | + | |
10070 | +#ifndef __QUERIES_DOT_H__ | |
10071 | +#define __QUERIES_DOT_H__ | |
10072 | + | |
10d56c87 AM |
10073 | +extern int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg); |
10074 | +extern int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg); | |
4bf12011 | 10075 | + |
10076 | +#endif /* __QUERIES_DOT_H__ */ | |
10077 | diff -urN linux-orig/cluster/dlm/rebuild.c linux-patched/cluster/dlm/rebuild.c | |
10078 | --- linux-orig/cluster/dlm/rebuild.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
10079 | +++ linux-patched/cluster/dlm/rebuild.c 2004-07-13 18:57:22.000000000 +0800 |
10080 | @@ -0,0 +1,1254 @@ | |
4bf12011 | 10081 | +/****************************************************************************** |
10082 | +******************************************************************************* | |
10083 | +** | |
10084 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
10085 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
10086 | +** | |
10087 | +** This copyrighted material is made available to anyone wishing to use, | |
10088 | +** modify, copy, or redistribute it subject to the terms and conditions | |
10089 | +** of the GNU General Public License v.2. | |
10090 | +** | |
10091 | +******************************************************************************* | |
10092 | +******************************************************************************/ | |
10093 | + | |
10094 | +/* | |
10095 | + * Rebuild RSB's on new masters. Functions for transferring locks and | |
10096 | + * subresources to new RSB masters during recovery. | |
10097 | + */ | |
10098 | + | |
10099 | +#include "dlm_internal.h" | |
10100 | +#include "reccomms.h" | |
10101 | +#include "lkb.h" | |
10102 | +#include "rsb.h" | |
10103 | +#include "nodes.h" | |
10104 | +#include "config.h" | |
10105 | +#include "memory.h" | |
10106 | +#include "recover.h" | |
10107 | + | |
10108 | + | |
10109 | +/* Types of entity serialised in remastering messages */ | |
10110 | +#define REMASTER_ROOTRSB 1 | |
10111 | +#define REMASTER_RSB 2 | |
10112 | +#define REMASTER_LKB 3 | |
10113 | + | |
10114 | +struct rcom_fill { | |
10115 | + char * outbuf; /* Beginning of data */ | |
10116 | + int offset; /* Current offset into outbuf */ | |
10117 | + int maxlen; /* Max value of offset */ | |
10118 | + int remasterid; | |
10119 | + int count; | |
10d56c87 AM |
10120 | + struct dlm_rsb * rsb; |
10121 | + struct dlm_rsb * subrsb; | |
10122 | + struct dlm_lkb * lkb; | |
4bf12011 | 10123 | + struct list_head * lkbqueue; |
10124 | + char more; | |
10125 | +}; | |
10126 | +typedef struct rcom_fill rcom_fill_t; | |
10127 | + | |
10128 | + | |
10129 | +struct rebuild_node { | |
10130 | + struct list_head list; | |
10131 | + int nodeid; | |
10d56c87 | 10132 | + struct dlm_rsb * rootrsb; |
4bf12011 | 10133 | +}; |
10134 | +typedef struct rebuild_node rebuild_node_t; | |
10135 | + | |
10136 | + | |
10137 | +/* | |
10138 | + * Root rsb passed in for which all lkb's (own and subrsbs) will be sent to new | |
10139 | + * master. The rsb will be "done" with recovery when the new master has | |
10140 | + * replied with all the new remote lockid's for this rsb's lkb's. | |
10141 | + */ | |
10142 | + | |
10d56c87 | 10143 | +void expect_new_lkids(struct dlm_rsb *rsb) |
4bf12011 | 10144 | +{ |
10145 | + rsb->res_newlkid_expect = 0; | |
10146 | + recover_list_add(rsb); | |
10147 | +} | |
10148 | + | |
10149 | +/* | |
10150 | + * This function is called on root rsb or subrsb when another lkb is being sent | |
10151 | + * to the new master for which we expect to receive a corresponding remote lkid | |
10152 | + */ | |
10153 | + | |
10d56c87 | 10154 | +void need_new_lkid(struct dlm_rsb *rsb) |
4bf12011 | 10155 | +{ |
10d56c87 | 10156 | + struct dlm_rsb *root = rsb; |
4bf12011 | 10157 | + |
10158 | + if (rsb->res_parent) | |
10159 | + root = rsb->res_root; | |
10160 | + | |
10161 | + if (!root->res_newlkid_expect) | |
10162 | + recover_list_add(root); | |
10163 | + else | |
10d56c87 | 10164 | + DLM_ASSERT(test_bit(RESFL_RECOVER_LIST, &root->res_flags),); |
4bf12011 | 10165 | + |
10166 | + root->res_newlkid_expect++; | |
10167 | +} | |
10168 | + | |
10169 | +/* | |
10170 | + * This function is called for each lkb for which a new remote lkid is | |
10171 | + * received. Decrement the expected number of remote lkids expected for the | |
10172 | + * root rsb. | |
10173 | + */ | |
10174 | + | |
10d56c87 | 10175 | +void have_new_lkid(struct dlm_lkb *lkb) |
4bf12011 | 10176 | +{ |
10d56c87 | 10177 | + struct dlm_rsb *root = lkb->lkb_resource; |
4bf12011 | 10178 | + |
10179 | + if (root->res_parent) | |
10180 | + root = root->res_root; | |
10181 | + | |
10182 | + down_write(&root->res_lock); | |
10183 | + | |
10d56c87 AM |
10184 | + DLM_ASSERT(root->res_newlkid_expect, |
10185 | + printk("newlkid_expect=%d\n", root->res_newlkid_expect);); | |
4bf12011 | 10186 | + |
10187 | + root->res_newlkid_expect--; | |
10188 | + | |
10189 | + if (!root->res_newlkid_expect) { | |
10190 | + clear_bit(RESFL_NEW_MASTER, &root->res_flags); | |
10191 | + recover_list_del(root); | |
10192 | + } | |
10193 | + up_write(&root->res_lock); | |
10194 | +} | |
10195 | + | |
10196 | +/* | |
10197 | + * Return the rebuild struct for a node - will create an entry on the rootrsb | |
10198 | + * list if necessary. | |
10199 | + * | |
10d56c87 | 10200 | + * Currently no locking is needed here as it all happens in the dlm_recvd |
4bf12011 | 10201 | + * thread |
10202 | + */ | |
10203 | + | |
10d56c87 | 10204 | +static rebuild_node_t *find_rebuild_root(struct dlm_ls *ls, int nodeid) |
4bf12011 | 10205 | +{ |
10206 | + rebuild_node_t *node = NULL; | |
10207 | + | |
10208 | + list_for_each_entry(node, &ls->ls_rebuild_rootrsb_list, list) { | |
10209 | + if (node->nodeid == nodeid) | |
10210 | + return node; | |
10211 | + } | |
10212 | + | |
10213 | + /* Not found, add one */ | |
10214 | + node = kmalloc(sizeof(rebuild_node_t), GFP_KERNEL); | |
10215 | + if (!node) | |
10216 | + return NULL; | |
10217 | + | |
10218 | + node->nodeid = nodeid; | |
10219 | + node->rootrsb = NULL; | |
10220 | + list_add(&node->list, &ls->ls_rebuild_rootrsb_list); | |
10221 | + | |
10222 | + return node; | |
10223 | +} | |
10224 | + | |
10225 | +/* | |
10226 | + * Tidy up after a rebuild run. Called when all recovery has finished | |
10227 | + */ | |
10228 | + | |
10d56c87 | 10229 | +void rebuild_freemem(struct dlm_ls *ls) |
4bf12011 | 10230 | +{ |
10231 | + rebuild_node_t *node = NULL, *s; | |
10232 | + | |
10233 | + list_for_each_entry_safe(node, s, &ls->ls_rebuild_rootrsb_list, list) { | |
10234 | + list_del(&node->list); | |
10235 | + kfree(node); | |
10236 | + } | |
10237 | +} | |
10238 | + | |
10239 | +static void put_int(int x, char *buf, int *offp) | |
10240 | +{ | |
10241 | + x = cpu_to_le32(x); | |
10242 | + memcpy(buf + *offp, &x, sizeof(int)); | |
10243 | + *offp += sizeof(int); | |
10244 | +} | |
10245 | + | |
10246 | +static void put_int64(uint64_t x, char *buf, int *offp) | |
10247 | +{ | |
10248 | + x = cpu_to_le64(x); | |
10249 | + memcpy(buf + *offp, &x, sizeof(uint64_t)); | |
10250 | + *offp += sizeof(uint64_t); | |
10251 | +} | |
10252 | + | |
10253 | +static void put_bytes(char *x, int len, char *buf, int *offp) | |
10254 | +{ | |
10255 | + put_int(len, buf, offp); | |
10256 | + memcpy(buf + *offp, x, len); | |
10257 | + *offp += len; | |
10258 | +} | |
10259 | + | |
10260 | +static void put_char(char x, char *buf, int *offp) | |
10261 | +{ | |
10262 | + buf[*offp] = x; | |
10263 | + *offp += 1; | |
10264 | +} | |
10265 | + | |
10266 | +static int get_int(char *buf, int *offp) | |
10267 | +{ | |
10268 | + int value; | |
10269 | + memcpy(&value, buf + *offp, sizeof(int)); | |
10270 | + *offp += sizeof(int); | |
10271 | + return le32_to_cpu(value); | |
10272 | +} | |
10273 | + | |
10274 | +static uint64_t get_int64(char *buf, int *offp) | |
10275 | +{ | |
10276 | + uint64_t value; | |
10277 | + | |
10278 | + memcpy(&value, buf + *offp, sizeof(uint64_t)); | |
10279 | + *offp += sizeof(uint64_t); | |
10280 | + return le64_to_cpu(value); | |
10281 | +} | |
10282 | + | |
10283 | +static char get_char(char *buf, int *offp) | |
10284 | +{ | |
10285 | + char x = buf[*offp]; | |
10286 | + | |
10287 | + *offp += 1; | |
10288 | + return x; | |
10289 | +} | |
10290 | + | |
10291 | +static void get_bytes(char *bytes, int *len, char *buf, int *offp) | |
10292 | +{ | |
10293 | + *len = get_int(buf, offp); | |
10294 | + memcpy(bytes, buf + *offp, *len); | |
10295 | + *offp += *len; | |
10296 | +} | |
10297 | + | |
10d56c87 | 10298 | +static int lkb_length(struct dlm_lkb *lkb) |
4bf12011 | 10299 | +{ |
10300 | + int len = 0; | |
10301 | + | |
10302 | + len += sizeof(int); /* lkb_id */ | |
10303 | + len += sizeof(int); /* lkb_resource->res_reamasterid */ | |
10304 | + len += sizeof(int); /* lkb_flags */ | |
10305 | + len += sizeof(int); /* lkb_status */ | |
10306 | + len += sizeof(char); /* lkb_rqmode */ | |
10307 | + len += sizeof(char); /* lkb_grmode */ | |
10308 | + len += sizeof(int); /* lkb_childcnt */ | |
10309 | + len += sizeof(int); /* lkb_parent->lkb_id */ | |
10310 | + len += sizeof(int); /* lkb_bastaddr */ | |
10311 | + | |
10312 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
10313 | + len += sizeof(int); /* number of lvb bytes */ | |
10314 | + len += DLM_LVB_LEN; | |
10315 | + } | |
10316 | + | |
10317 | + if (lkb->lkb_range) { | |
10318 | + len += sizeof(uint64_t); | |
10319 | + len += sizeof(uint64_t); | |
10320 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT) { | |
10321 | + len += sizeof(uint64_t); | |
10322 | + len += sizeof(uint64_t); | |
10323 | + } | |
10324 | + } | |
10325 | + | |
10326 | + return len; | |
10327 | +} | |
10328 | + | |
10329 | +/* | |
10330 | + * It's up to the caller to be sure there's enough space in the buffer. | |
10331 | + */ | |
10332 | + | |
10d56c87 | 10333 | +static void serialise_lkb(struct dlm_lkb *lkb, char *buf, int *offp) |
4bf12011 | 10334 | +{ |
10335 | + int flags; | |
10336 | + | |
10337 | + /* Need to tell the remote end if we have a range */ | |
10338 | + flags = lkb->lkb_flags; | |
10339 | + if (lkb->lkb_range) | |
10340 | + flags |= GDLM_LKFLG_RANGE; | |
10341 | + | |
10342 | + /* | |
10343 | + * See lkb_length() | |
10344 | + * Total: 30 (no lvb) or 66 (with lvb) bytes | |
10345 | + */ | |
10346 | + | |
10347 | + put_int(lkb->lkb_id, buf, offp); | |
10348 | + put_int(lkb->lkb_resource->res_remasterid, buf, offp); | |
10349 | + put_int(flags, buf, offp); | |
10350 | + put_int(lkb->lkb_status, buf, offp); | |
10351 | + put_char(lkb->lkb_rqmode, buf, offp); | |
10352 | + put_char(lkb->lkb_grmode, buf, offp); | |
10353 | + put_int(atomic_read(&lkb->lkb_childcnt), buf, offp); | |
10354 | + | |
10355 | + if (lkb->lkb_parent) | |
10356 | + put_int(lkb->lkb_parent->lkb_id, buf, offp); | |
10357 | + else | |
10358 | + put_int(0, buf, offp); | |
10359 | + | |
10360 | + if (lkb->lkb_bastaddr) | |
10361 | + put_int(1, buf, offp); | |
10362 | + else | |
10363 | + put_int(0, buf, offp); | |
10364 | + | |
10365 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
10d56c87 | 10366 | + DLM_ASSERT(lkb->lkb_lvbptr,); |
4bf12011 | 10367 | + put_bytes(lkb->lkb_lvbptr, DLM_LVB_LEN, buf, offp); |
10368 | + } | |
10369 | + | |
10370 | + /* Only send the range we actually need */ | |
10371 | + if (lkb->lkb_range) { | |
10372 | + switch (lkb->lkb_status) { | |
10373 | + case GDLM_LKSTS_CONVERT: | |
10374 | + put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp); | |
10375 | + put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp); | |
10376 | + put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp); | |
10377 | + put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp); | |
10378 | + break; | |
10379 | + case GDLM_LKSTS_WAITING: | |
10380 | + put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp); | |
10381 | + put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp); | |
10382 | + break; | |
10383 | + case GDLM_LKSTS_GRANTED: | |
10384 | + put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp); | |
10385 | + put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp); | |
10386 | + break; | |
10387 | + default: | |
10d56c87 | 10388 | + DLM_ASSERT(0,); |
4bf12011 | 10389 | + } |
10390 | + } | |
10391 | +} | |
10392 | + | |
10d56c87 | 10393 | +static int rsb_length(struct dlm_rsb *rsb) |
4bf12011 | 10394 | +{ |
10395 | + int len = 0; | |
10396 | + | |
10397 | + len += sizeof(int); /* number of res_name bytes */ | |
10398 | + len += rsb->res_length; /* res_name */ | |
10399 | + len += sizeof(int); /* res_remasterid */ | |
10400 | + len += sizeof(int); /* res_parent->res_remasterid */ | |
10401 | + | |
10402 | + return len; | |
10403 | +} | |
10404 | + | |
10d56c87 | 10405 | +static inline struct dlm_rsb *next_subrsb(struct dlm_rsb *subrsb) |
4bf12011 | 10406 | +{ |
10407 | + struct list_head *tmp; | |
10d56c87 | 10408 | + struct dlm_rsb *r; |
4bf12011 | 10409 | + |
10410 | + tmp = subrsb->res_subreslist.next; | |
10d56c87 | 10411 | + r = list_entry(tmp, struct dlm_rsb, res_subreslist); |
4bf12011 | 10412 | + |
10413 | + return r; | |
10414 | +} | |
10415 | + | |
10d56c87 | 10416 | +static inline int last_in_list(struct dlm_rsb *r, struct list_head *head) |
4bf12011 | 10417 | +{ |
10d56c87 AM |
10418 | + struct dlm_rsb *last; |
10419 | + last = list_entry(head->prev, struct dlm_rsb, res_subreslist); | |
4bf12011 | 10420 | + if (last == r) |
10421 | + return 1; | |
10422 | + return 0; | |
10423 | +} | |
10424 | + | |
10425 | +/* | |
10426 | + * Used to decide if an rsb should be rebuilt on a new master. An rsb only | |
10427 | + * needs to be rebuild if we have lkb's queued on it. NOREBUILD lkb's on the | |
10428 | + * wait queue are not rebuilt. | |
10429 | + */ | |
10430 | + | |
10d56c87 | 10431 | +static int lkbs_to_remaster(struct dlm_rsb *r) |
4bf12011 | 10432 | +{ |
10d56c87 AM |
10433 | + struct dlm_lkb *lkb; |
10434 | + struct dlm_rsb *sub; | |
4bf12011 | 10435 | + |
10436 | + if (!list_empty(&r->res_grantqueue) || | |
10437 | + !list_empty(&r->res_convertqueue)) | |
10438 | + return TRUE; | |
10439 | + | |
10440 | + list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue) { | |
10441 | + if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD) | |
10442 | + continue; | |
10443 | + return TRUE; | |
10444 | + } | |
10445 | + | |
10446 | + list_for_each_entry(sub, &r->res_subreslist, res_subreslist) { | |
10447 | + if (!list_empty(&sub->res_grantqueue) || | |
10448 | + !list_empty(&sub->res_convertqueue)) | |
10449 | + return TRUE; | |
10450 | + | |
10451 | + list_for_each_entry(lkb, &sub->res_waitqueue, lkb_statequeue) { | |
10452 | + if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD) | |
10453 | + continue; | |
10454 | + return TRUE; | |
10455 | + } | |
10456 | + } | |
10457 | + | |
10458 | + return FALSE; | |
10459 | +} | |
10460 | + | |
10d56c87 | 10461 | +static void serialise_rsb(struct dlm_rsb *rsb, char *buf, int *offp) |
4bf12011 | 10462 | +{ |
10463 | + /* | |
10464 | + * See rsb_length() | |
10465 | + * Total: 36 bytes (4 + 24 + 4 + 4) | |
10466 | + */ | |
10467 | + | |
10468 | + put_bytes(rsb->res_name, rsb->res_length, buf, offp); | |
10469 | + put_int(rsb->res_remasterid, buf, offp); | |
10470 | + | |
10471 | + if (rsb->res_parent) | |
10472 | + put_int(rsb->res_parent->res_remasterid, buf, offp); | |
10473 | + else | |
10474 | + put_int(0, buf, offp); | |
10475 | + | |
10d56c87 | 10476 | + DLM_ASSERT(!rsb->res_lvbptr,); |
4bf12011 | 10477 | +} |
10478 | + | |
10479 | +/* | |
10480 | + * Flatten an LKB into a buffer for sending to the new RSB master. As a | |
10481 | + * side-effect the nodeid of the lock is set to the nodeid of the new RSB | |
10482 | + * master. | |
10483 | + */ | |
10484 | + | |
10d56c87 AM |
10485 | +static int pack_one_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, |
10486 | + rcom_fill_t *fill) | |
4bf12011 | 10487 | +{ |
10488 | + if (fill->offset + 1 + lkb_length(lkb) > fill->maxlen) | |
10489 | + goto nospace; | |
10490 | + | |
10491 | + lkb->lkb_nodeid = r->res_nodeid; | |
10492 | + | |
10493 | + put_char(REMASTER_LKB, fill->outbuf, &fill->offset); | |
10494 | + serialise_lkb(lkb, fill->outbuf, &fill->offset); | |
10495 | + | |
10496 | + fill->count++; | |
10497 | + need_new_lkid(r); | |
10498 | + return 0; | |
10499 | + | |
10500 | + nospace: | |
10501 | + return -ENOSPC; | |
10502 | +} | |
10503 | + | |
10504 | +/* | |
10505 | + * Pack all LKB's from a given queue, except for those with the NOREBUILD flag. | |
10506 | + */ | |
10507 | + | |
10d56c87 | 10508 | +static int pack_lkb_queue(struct dlm_rsb *r, struct list_head *queue, |
4bf12011 | 10509 | + rcom_fill_t *fill) |
10510 | +{ | |
10d56c87 | 10511 | + struct dlm_lkb *lkb; |
4bf12011 | 10512 | + int error; |
10513 | + | |
10514 | + list_for_each_entry(lkb, queue, lkb_statequeue) { | |
10515 | + if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD) | |
10516 | + continue; | |
10517 | + | |
10518 | + error = pack_one_lkb(r, lkb, fill); | |
10519 | + if (error) | |
10520 | + goto nospace; | |
10521 | + } | |
10522 | + | |
10523 | + return 0; | |
10524 | + | |
10525 | + nospace: | |
10526 | + fill->lkb = lkb; | |
10527 | + fill->lkbqueue = queue; | |
10528 | + | |
10529 | + return error; | |
10530 | +} | |
10531 | + | |
10d56c87 | 10532 | +static int pack_lkb_queues(struct dlm_rsb *r, rcom_fill_t *fill) |
4bf12011 | 10533 | +{ |
10534 | + int error; | |
10535 | + | |
10536 | + error = pack_lkb_queue(r, &r->res_grantqueue, fill); | |
10537 | + if (error) | |
10538 | + goto nospace; | |
10539 | + | |
10540 | + error = pack_lkb_queue(r, &r->res_convertqueue, fill); | |
10541 | + if (error) | |
10542 | + goto nospace; | |
10543 | + | |
10544 | + error = pack_lkb_queue(r, &r->res_waitqueue, fill); | |
10545 | + | |
10546 | + nospace: | |
10547 | + return error; | |
10548 | +} | |
10549 | + | |
10550 | +/* | |
10551 | + * Pack remaining lkb's for rsb or subrsb. This may include a partial lkb | |
10552 | + * queue and full lkb queues. | |
10553 | + */ | |
10554 | + | |
10d56c87 | 10555 | +static int pack_lkb_remaining(struct dlm_rsb *r, rcom_fill_t *fill) |
4bf12011 | 10556 | +{ |
10557 | + struct list_head *tmp, *start, *end; | |
10d56c87 | 10558 | + struct dlm_lkb *lkb; |
4bf12011 | 10559 | + int error; |
10560 | + | |
10561 | + /* | |
10562 | + * Beginning with fill->lkb, pack remaining lkb's on fill->lkbqueue. | |
10563 | + */ | |
10564 | + | |
10565 | + error = pack_one_lkb(r, fill->lkb, fill); | |
10566 | + if (error) | |
10567 | + goto out; | |
10568 | + | |
10569 | + start = fill->lkb->lkb_statequeue.next; | |
10570 | + end = fill->lkbqueue; | |
10571 | + | |
10572 | + for (tmp = start; tmp != end; tmp = tmp->next) { | |
10d56c87 | 10573 | + lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
4bf12011 | 10574 | + |
10575 | + error = pack_one_lkb(r, lkb, fill); | |
10576 | + if (error) { | |
10577 | + fill->lkb = lkb; | |
10578 | + goto out; | |
10579 | + } | |
10580 | + } | |
10581 | + | |
10582 | + /* | |
10583 | + * Pack all lkb's on r's queues following fill->lkbqueue. | |
10584 | + */ | |
10585 | + | |
10586 | + if (fill->lkbqueue == &r->res_waitqueue) | |
10587 | + goto out; | |
10588 | + if (fill->lkbqueue == &r->res_convertqueue) | |
10589 | + goto skip; | |
10590 | + | |
10d56c87 | 10591 | + DLM_ASSERT(fill->lkbqueue == &r->res_grantqueue,); |
4bf12011 | 10592 | + |
10593 | + error = pack_lkb_queue(r, &r->res_convertqueue, fill); | |
10594 | + if (error) | |
10595 | + goto out; | |
10596 | + skip: | |
10597 | + error = pack_lkb_queue(r, &r->res_waitqueue, fill); | |
10598 | + | |
10599 | + out: | |
10600 | + return error; | |
10601 | +} | |
10602 | + | |
10d56c87 AM |
10603 | +static int pack_one_subrsb(struct dlm_rsb *rsb, struct dlm_rsb *subrsb, |
10604 | + rcom_fill_t *fill) | |
4bf12011 | 10605 | +{ |
10606 | + int error; | |
10607 | + | |
10608 | + down_write(&subrsb->res_lock); | |
10609 | + | |
10610 | + if (fill->offset + 1 + rsb_length(subrsb) > fill->maxlen) | |
10611 | + goto nospace; | |
10612 | + | |
10613 | + subrsb->res_nodeid = rsb->res_nodeid; | |
10614 | + subrsb->res_remasterid = ++fill->remasterid; | |
10615 | + | |
10616 | + put_char(REMASTER_RSB, fill->outbuf, &fill->offset); | |
10617 | + serialise_rsb(subrsb, fill->outbuf, &fill->offset); | |
10618 | + | |
10619 | + error = pack_lkb_queues(subrsb, fill); | |
10620 | + if (error) | |
10621 | + goto nospace; | |
10622 | + | |
10623 | + up_write(&subrsb->res_lock); | |
10624 | + | |
10625 | + return 0; | |
10626 | + | |
10627 | + nospace: | |
10628 | + up_write(&subrsb->res_lock); | |
10629 | + fill->subrsb = subrsb; | |
10630 | + | |
10631 | + return -ENOSPC; | |
10632 | +} | |
10633 | + | |
10d56c87 AM |
10634 | +static int pack_subrsbs(struct dlm_rsb *rsb, struct dlm_rsb *in_subrsb, |
10635 | + rcom_fill_t *fill) | |
4bf12011 | 10636 | +{ |
10d56c87 | 10637 | + struct dlm_rsb *subrsb; |
4bf12011 | 10638 | + int error = 0; |
10639 | + | |
10640 | + /* | |
10641 | + * When an initial subrsb is given, we know it needs to be packed. | |
10642 | + * When no initial subrsb is given, begin with the first (if any exist). | |
10643 | + */ | |
10644 | + | |
10645 | + if (!in_subrsb) { | |
10646 | + if (list_empty(&rsb->res_subreslist)) | |
10647 | + goto out; | |
10648 | + | |
10d56c87 | 10649 | + subrsb = list_entry(rsb->res_subreslist.next, struct dlm_rsb, |
4bf12011 | 10650 | + res_subreslist); |
10651 | + } else | |
10652 | + subrsb = in_subrsb; | |
10653 | + | |
10654 | + for (;;) { | |
10655 | + error = pack_one_subrsb(rsb, subrsb, fill); | |
10656 | + if (error) | |
10657 | + goto out; | |
10658 | + | |
10659 | + if (last_in_list(subrsb, &rsb->res_subreslist)) | |
10660 | + break; | |
10661 | + | |
10662 | + subrsb = next_subrsb(subrsb); | |
10663 | + } | |
10664 | + | |
10665 | + out: | |
10666 | + return error; | |
10667 | +} | |
10668 | + | |
10669 | +/* | |
10670 | + * Finish packing whatever is left in an rsb tree. If space runs out while | |
10671 | + * finishing, save subrsb/lkb and this will be called again for the same rsb. | |
10672 | + * | |
10673 | + * !subrsb && lkb, we left off part way through root rsb's lkbs. | |
10674 | + * subrsb && !lkb, we left off just before starting a new subrsb. | |
10675 | + * subrsb && lkb, we left off part way through a subrsb's lkbs. | |
10676 | + * !subrsb && !lkb, we shouldn't be in this function, but starting | |
10677 | + * a new rsb in pack_rsb_tree(). | |
10678 | + */ | |
10679 | + | |
10d56c87 | 10680 | +static int pack_rsb_tree_remaining(struct dlm_ls *ls, struct dlm_rsb *rsb, |
4bf12011 | 10681 | + rcom_fill_t *fill) |
10682 | +{ | |
10d56c87 | 10683 | + struct dlm_rsb *subrsb = NULL; |
4bf12011 | 10684 | + int error = 0; |
10685 | + | |
10686 | + if (!fill->subrsb && fill->lkb) { | |
10687 | + error = pack_lkb_remaining(rsb, fill); | |
10688 | + if (error) | |
10689 | + goto out; | |
10690 | + | |
10691 | + error = pack_subrsbs(rsb, NULL, fill); | |
10692 | + if (error) | |
10693 | + goto out; | |
10694 | + } | |
10695 | + | |
10696 | + else if (fill->subrsb && !fill->lkb) { | |
10697 | + error = pack_subrsbs(rsb, fill->subrsb, fill); | |
10698 | + if (error) | |
10699 | + goto out; | |
10700 | + } | |
10701 | + | |
10702 | + else if (fill->subrsb && fill->lkb) { | |
10703 | + error = pack_lkb_remaining(fill->subrsb, fill); | |
10704 | + if (error) | |
10705 | + goto out; | |
10706 | + | |
10707 | + if (last_in_list(fill->subrsb, &fill->rsb->res_subreslist)) | |
10708 | + goto out; | |
10709 | + | |
10710 | + subrsb = next_subrsb(fill->subrsb); | |
10711 | + | |
10712 | + error = pack_subrsbs(rsb, subrsb, fill); | |
10713 | + if (error) | |
10714 | + goto out; | |
10715 | + } | |
10716 | + | |
10717 | + fill->subrsb = NULL; | |
10718 | + fill->lkb = NULL; | |
10719 | + | |
10720 | + out: | |
10721 | + return error; | |
10722 | +} | |
10723 | + | |
10724 | +/* | |
10725 | + * Pack an RSB, all its LKB's, all its subrsb's and all their LKB's into a | |
10726 | + * buffer. When the buffer runs out of space, save the place to restart (the | |
10727 | + * queue+lkb, subrsb, or subrsb+queue+lkb which wouldn't fit). | |
10728 | + */ | |
10729 | + | |
10d56c87 AM |
10730 | +static int pack_rsb_tree(struct dlm_ls *ls, struct dlm_rsb *rsb, |
10731 | + rcom_fill_t *fill) | |
4bf12011 | 10732 | +{ |
10733 | + int error = -ENOSPC; | |
10734 | + | |
10735 | + fill->remasterid = 0; | |
10736 | + | |
10737 | + /* | |
10738 | + * Pack the root rsb itself. A 1 byte type precedes the serialised | |
10739 | + * rsb. Then pack the lkb's for the root rsb. | |
10740 | + */ | |
10741 | + | |
10742 | + down_write(&rsb->res_lock); | |
10743 | + | |
10744 | + if (fill->offset + 1 + rsb_length(rsb) > fill->maxlen) | |
10745 | + goto out; | |
10746 | + | |
10747 | + rsb->res_remasterid = ++fill->remasterid; | |
10748 | + put_char(REMASTER_ROOTRSB, fill->outbuf, &fill->offset); | |
10749 | + serialise_rsb(rsb, fill->outbuf, &fill->offset); | |
10750 | + | |
10751 | + error = pack_lkb_queues(rsb, fill); | |
10752 | + if (error) | |
10753 | + goto out; | |
10754 | + | |
10755 | + up_write(&rsb->res_lock); | |
10756 | + | |
10757 | + /* | |
10758 | + * Pack subrsb/lkb's under the root rsb. | |
10759 | + */ | |
10760 | + | |
10761 | + error = pack_subrsbs(rsb, NULL, fill); | |
10762 | + | |
10763 | + return error; | |
10764 | + | |
10765 | + out: | |
10766 | + up_write(&rsb->res_lock); | |
10767 | + return error; | |
10768 | +} | |
10769 | + | |
10770 | +/* | |
10771 | + * Given an RSB, return the next RSB that should be sent to a new master. | |
10772 | + */ | |
10773 | + | |
10d56c87 AM |
10774 | +static struct dlm_rsb *next_remastered_rsb(struct dlm_ls *ls, |
10775 | + struct dlm_rsb *rsb) | |
4bf12011 | 10776 | +{ |
10777 | + struct list_head *tmp, *start, *end; | |
10d56c87 | 10778 | + struct dlm_rsb *r; |
4bf12011 | 10779 | + |
10780 | + if (!rsb) | |
10781 | + start = ls->ls_rootres.next; | |
10782 | + else | |
10783 | + start = rsb->res_rootlist.next; | |
10784 | + | |
10785 | + end = &ls->ls_rootres; | |
10786 | + | |
10787 | + for (tmp = start; tmp != end; tmp = tmp->next) { | |
10d56c87 | 10788 | + r = list_entry(tmp, struct dlm_rsb, res_rootlist); |
4bf12011 | 10789 | + |
10790 | + if (test_bit(RESFL_NEW_MASTER, &r->res_flags)) { | |
10791 | + if (r->res_nodeid && lkbs_to_remaster(r)) { | |
10792 | + expect_new_lkids(r); | |
10793 | + return r; | |
10794 | + } else | |
10795 | + clear_bit(RESFL_NEW_MASTER, &r->res_flags); | |
10796 | + } | |
10797 | + } | |
10798 | + | |
10799 | + return NULL; | |
10800 | +} | |
10801 | + | |
10802 | +/* | |
10803 | + * Given an rcom buffer, fill it with RSB's that need to be sent to a single | |
10804 | + * new master node. In the case where all the data to send to one node | |
10805 | + * requires multiple messages, this function needs to resume filling each | |
10806 | + * successive buffer from the point where it left off when the previous buffer | |
10807 | + * filled up. | |
10808 | + */ | |
10809 | + | |
10d56c87 AM |
10810 | +static void fill_rcom_buffer(struct dlm_ls *ls, rcom_fill_t *fill, |
10811 | + uint32_t *nodeid) | |
4bf12011 | 10812 | +{ |
10d56c87 | 10813 | + struct dlm_rsb *rsb, *prev_rsb = fill->rsb; |
4bf12011 | 10814 | + int error; |
10815 | + | |
10816 | + fill->offset = 0; | |
10817 | + | |
10818 | + if (!prev_rsb) { | |
10819 | + | |
10820 | + /* | |
10821 | + * The first time this function is called. | |
10822 | + */ | |
10823 | + | |
10824 | + rsb = next_remastered_rsb(ls, NULL); | |
10825 | + if (!rsb) | |
10826 | + goto no_more; | |
10827 | + | |
10828 | + } else if (fill->subrsb || fill->lkb) { | |
10829 | + | |
10830 | + /* | |
10831 | + * Continue packing an rsb tree that was partially packed last | |
10832 | + * time (fill->subrsb/lkb indicates where packing of last block | |
10833 | + * left off) | |
10834 | + */ | |
10835 | + | |
10836 | + rsb = prev_rsb; | |
10837 | + *nodeid = rsb->res_nodeid; | |
10838 | + | |
10839 | + error = pack_rsb_tree_remaining(ls, rsb, fill); | |
10840 | + if (error == -ENOSPC) | |
10841 | + goto more; | |
10842 | + | |
10843 | + rsb = next_remastered_rsb(ls, prev_rsb); | |
10844 | + if (!rsb) | |
10845 | + goto no_more; | |
10846 | + | |
10847 | + if (rsb->res_nodeid != prev_rsb->res_nodeid) | |
10848 | + goto more; | |
10849 | + } else { | |
10850 | + rsb = prev_rsb; | |
10851 | + } | |
10852 | + | |
10853 | + /* | |
10854 | + * Pack rsb trees into the buffer until we run out of space, run out of | |
10855 | + * new rsb's or hit a new nodeid. | |
10856 | + */ | |
10857 | + | |
10858 | + *nodeid = rsb->res_nodeid; | |
10859 | + | |
10860 | + for (;;) { | |
10861 | + error = pack_rsb_tree(ls, rsb, fill); | |
10862 | + if (error == -ENOSPC) | |
10863 | + goto more; | |
10864 | + | |
10865 | + prev_rsb = rsb; | |
10866 | + | |
10867 | + rsb = next_remastered_rsb(ls, prev_rsb); | |
10868 | + if (!rsb) | |
10869 | + goto no_more; | |
10870 | + | |
10871 | + if (rsb->res_nodeid != prev_rsb->res_nodeid) | |
10872 | + goto more; | |
10873 | + } | |
10874 | + | |
10875 | + more: | |
10876 | + fill->more = 1; | |
10877 | + fill->rsb = rsb; | |
10878 | + return; | |
10879 | + | |
10880 | + no_more: | |
10881 | + fill->more = 0; | |
10882 | +} | |
10883 | + | |
10884 | +/* | |
10885 | + * Send lkb's (and subrsb/lkbs) for remastered root rsbs to new masters. | |
10886 | + */ | |
10887 | + | |
10d56c87 | 10888 | +int rebuild_rsbs_send(struct dlm_ls *ls) |
4bf12011 | 10889 | +{ |
10d56c87 | 10890 | + struct dlm_rcom *rc; |
4bf12011 | 10891 | + rcom_fill_t fill; |
10892 | + uint32_t nodeid; | |
10893 | + int error; | |
10894 | + | |
10d56c87 | 10895 | + DLM_ASSERT(recover_list_empty(ls),); |
4bf12011 | 10896 | + |
10897 | + log_all(ls, "rebuild locks"); | |
10898 | + | |
10899 | + error = -ENOMEM; | |
10900 | + rc = allocate_rcom_buffer(ls); | |
10901 | + if (!rc) | |
10902 | + goto ret; | |
10903 | + | |
10904 | + error = 0; | |
10905 | + memset(&fill, 0, sizeof(rcom_fill_t)); | |
10906 | + fill.outbuf = rc->rc_buf; | |
10d56c87 | 10907 | + fill.maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); |
4bf12011 | 10908 | + |
10909 | + do { | |
10910 | + fill_rcom_buffer(ls, &fill, &nodeid); | |
10911 | + if (!fill.offset) | |
10912 | + break; | |
10913 | + | |
10914 | + rc->rc_datalen = fill.offset; | |
10915 | + error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKS, rc, 0); | |
10916 | + if (error) | |
10917 | + goto out; | |
10918 | + | |
10919 | + schedule(); | |
10d56c87 | 10920 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 10921 | + if (error) |
10922 | + goto out; | |
10923 | + } | |
10924 | + while (fill.more); | |
10925 | + | |
10d56c87 | 10926 | + error = dlm_wait_function(ls, &recover_list_empty); |
4bf12011 | 10927 | + |
10928 | + log_all(ls, "rebuilt %d locks", fill.count); | |
10929 | + | |
10930 | + out: | |
10931 | + rebuild_freemem(ls); | |
10932 | + free_rcom_buffer(rc); | |
10933 | + | |
10934 | + ret: | |
10935 | + return error; | |
10936 | +} | |
10937 | + | |
10d56c87 AM |
10938 | +static struct dlm_rsb *find_by_remasterid(struct dlm_ls *ls, int remasterid, |
10939 | + struct dlm_rsb *rootrsb) | |
4bf12011 | 10940 | +{ |
10d56c87 | 10941 | + struct dlm_rsb *rsb; |
4bf12011 | 10942 | + |
10d56c87 | 10943 | + DLM_ASSERT(rootrsb,); |
4bf12011 | 10944 | + |
10945 | + if (rootrsb->res_remasterid == remasterid) { | |
10946 | + rsb = rootrsb; | |
10947 | + goto out; | |
10948 | + } | |
10949 | + | |
10950 | + list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) { | |
10951 | + if (rsb->res_remasterid == remasterid) | |
10952 | + goto out; | |
10953 | + } | |
10954 | + rsb = NULL; | |
10955 | + | |
10956 | + out: | |
10957 | + return rsb; | |
10958 | +} | |
10959 | + | |
10960 | +/* | |
10961 | + * Search a queue for the given remote lock id (remlkid). | |
10962 | + */ | |
10963 | + | |
10d56c87 AM |
10964 | +static struct dlm_lkb *search_remlkid(struct list_head *statequeue, int nodeid, |
10965 | + int remid) | |
4bf12011 | 10966 | +{ |
10d56c87 | 10967 | + struct dlm_lkb *lkb; |
4bf12011 | 10968 | + |
10969 | + list_for_each_entry(lkb, statequeue, lkb_statequeue) { | |
10970 | + if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) { | |
10971 | + return lkb; | |
10972 | + } | |
10973 | + } | |
10974 | + | |
10975 | + return NULL; | |
10976 | +} | |
10977 | + | |
10978 | +/* | |
10979 | + * Given a remote lock ID (and a parent resource), return the local LKB for it | |
10980 | + * Hopefully we dont need to do this too often on deep lock trees. This is | |
10981 | + * VERY suboptimal for anything but the smallest lock trees. It searches the | |
10982 | + * lock tree for an LKB with the remote id "remid" and the node "nodeid" and | |
10983 | + * returns the LKB address. OPTIMISATION: we should keep a list of these while | |
10984 | + * we are building up the remastered LKBs | |
10985 | + */ | |
10986 | + | |
10d56c87 AM |
10987 | +static struct dlm_lkb *find_by_remlkid(struct dlm_rsb *rootrsb, int nodeid, |
10988 | + int remid) | |
4bf12011 | 10989 | +{ |
10d56c87 AM |
10990 | + struct dlm_lkb *lkb; |
10991 | + struct dlm_rsb *rsb; | |
4bf12011 | 10992 | + |
10993 | + lkb = search_remlkid(&rootrsb->res_grantqueue, nodeid, remid); | |
10994 | + if (lkb) | |
10995 | + goto out; | |
10996 | + | |
10997 | + lkb = search_remlkid(&rootrsb->res_convertqueue, nodeid, remid); | |
10998 | + if (lkb) | |
10999 | + goto out; | |
11000 | + | |
11001 | + lkb = search_remlkid(&rootrsb->res_waitqueue, nodeid, remid); | |
11002 | + if (lkb) | |
11003 | + goto out; | |
11004 | + | |
11005 | + list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) { | |
11006 | + lkb = search_remlkid(&rsb->res_grantqueue, nodeid, remid); | |
11007 | + if (lkb) | |
11008 | + goto out; | |
11009 | + | |
11010 | + lkb = search_remlkid(&rsb->res_convertqueue, nodeid, remid); | |
11011 | + if (lkb) | |
11012 | + goto out; | |
11013 | + | |
11014 | + lkb = search_remlkid(&rsb->res_waitqueue, nodeid, remid); | |
11015 | + if (lkb) | |
11016 | + goto out; | |
11017 | + } | |
11018 | + lkb = NULL; | |
11019 | + | |
11020 | + out: | |
11021 | + return lkb; | |
11022 | +} | |
11023 | + | |
11024 | +/* | |
11025 | + * Unpack an LKB from a remaster operation | |
11026 | + */ | |
11027 | + | |
10d56c87 AM |
11028 | +static int deserialise_lkb(struct dlm_ls *ls, int rem_nodeid, |
11029 | + struct dlm_rsb *rootrsb, char *buf, int *ptr, | |
11030 | + char *outbuf, int *outoffp) | |
4bf12011 | 11031 | +{ |
10d56c87 AM |
11032 | + struct dlm_lkb *lkb; |
11033 | + struct dlm_rsb *rsb; | |
4bf12011 | 11034 | + int error = -ENOMEM, parentid, rsb_rmid, remote_lkid, status, temp; |
11035 | + | |
11036 | + remote_lkid = get_int(buf, ptr); | |
11037 | + | |
11038 | + rsb_rmid = get_int(buf, ptr); | |
11039 | + rsb = find_by_remasterid(ls, rsb_rmid, rootrsb); | |
10d56c87 | 11040 | + DLM_ASSERT(rsb, printk("no RSB for remasterid %d\n", rsb_rmid);); |
4bf12011 | 11041 | + |
11042 | + /* | |
11043 | + * We could have received this lkb already from a previous recovery | |
11044 | + * that was interrupted. If so, just return the lkid to the remote | |
11045 | + * node. | |
11046 | + */ | |
11047 | + lkb = find_by_remlkid(rsb, rem_nodeid, remote_lkid); | |
11048 | + if (lkb) | |
11049 | + goto put_lkid; | |
11050 | + | |
11051 | + lkb = create_lkb(rsb->res_ls); | |
11052 | + if (!lkb) | |
11053 | + goto out; | |
11054 | + | |
11055 | + lkb->lkb_remid = remote_lkid; | |
11056 | + lkb->lkb_flags = get_int(buf, ptr); | |
11057 | + status = get_int(buf, ptr); | |
11058 | + lkb->lkb_rqmode = get_char(buf, ptr); | |
11059 | + lkb->lkb_grmode = get_char(buf, ptr); | |
11060 | + atomic_set(&lkb->lkb_childcnt, get_int(buf, ptr)); | |
11061 | + | |
11062 | + parentid = get_int(buf, ptr); | |
11063 | + lkb->lkb_bastaddr = (void *) (long) get_int(buf, ptr); | |
11064 | + | |
11065 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
11066 | + lkb->lkb_lvbptr = allocate_lvb(ls); | |
11067 | + if (!lkb->lkb_lvbptr) | |
11068 | + goto out; | |
11069 | + get_bytes(lkb->lkb_lvbptr, &temp, buf, ptr); | |
11070 | + } | |
11071 | + | |
11072 | + if (lkb->lkb_flags & GDLM_LKFLG_RANGE) { | |
11073 | + uint64_t start, end; | |
11074 | + | |
11075 | + /* Don't need to keep the range flag, for comms use only */ | |
11076 | + lkb->lkb_flags &= ~GDLM_LKFLG_RANGE; | |
11077 | + start = get_int64(buf, ptr); | |
11078 | + end = get_int64(buf, ptr); | |
11079 | + | |
11080 | + lkb->lkb_range = allocate_range(rsb->res_ls); | |
11081 | + if (!lkb->lkb_range) | |
11082 | + goto out; | |
11083 | + | |
11084 | + switch (status) { | |
11085 | + case GDLM_LKSTS_CONVERT: | |
11086 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
11087 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
11088 | + start = get_int64(buf, ptr); | |
11089 | + end = get_int64(buf, ptr); | |
11090 | + lkb->lkb_range[GR_RANGE_START] = start; | |
11091 | + lkb->lkb_range[GR_RANGE_END] = end; | |
11092 | + | |
11093 | + case GDLM_LKSTS_WAITING: | |
11094 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
11095 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
11096 | + break; | |
11097 | + | |
11098 | + case GDLM_LKSTS_GRANTED: | |
11099 | + lkb->lkb_range[GR_RANGE_START] = start; | |
11100 | + lkb->lkb_range[GR_RANGE_END] = end; | |
11101 | + break; | |
11102 | + default: | |
10d56c87 | 11103 | + DLM_ASSERT(0,); |
4bf12011 | 11104 | + } |
11105 | + } | |
11106 | + | |
11107 | + /* Resolve local lock LKB address from parent ID */ | |
11108 | + if (parentid) | |
11109 | + lkb->lkb_parent = find_by_remlkid(rootrsb, rem_nodeid, | |
11110 | + parentid); | |
11111 | + | |
11112 | + atomic_inc(&rsb->res_ref); | |
11113 | + lkb->lkb_resource = rsb; | |
11114 | + | |
11115 | + lkb->lkb_flags |= GDLM_LKFLG_MSTCPY; | |
11116 | + lkb->lkb_nodeid = rem_nodeid; | |
11117 | + | |
11118 | + /* | |
11119 | + * Put the lkb on an RSB queue. An lkb that's in the midst of a | |
11120 | + * conversion request (on the requesting node's lockqueue and has | |
11121 | + * LQCONVERT set) should be put on the granted queue. The convert | |
11122 | + * request will be resent by the requesting node. | |
11123 | + */ | |
11124 | + | |
11125 | + if (lkb->lkb_flags & GDLM_LKFLG_LQCONVERT) { | |
11126 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT; | |
10d56c87 | 11127 | + DLM_ASSERT(status == GDLM_LKSTS_CONVERT, |
4bf12011 | 11128 | + printk("status=%d\n", status);); |
11129 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
11130 | + status = GDLM_LKSTS_GRANTED; | |
11131 | + } | |
11132 | + | |
11133 | + lkb_enqueue(rsb, lkb, status); | |
11134 | + | |
11135 | + /* | |
11136 | + * Update the rsb lvb if the lkb's lvb is up to date (grmode > NL). | |
11137 | + */ | |
11138 | + | |
11139 | + if ((lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
11140 | + && lkb->lkb_grmode > DLM_LOCK_NL) { | |
11141 | + if (!rsb->res_lvbptr) | |
11142 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
11143 | + if (!rsb->res_lvbptr) | |
11144 | + goto out; | |
11145 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
11146 | + } | |
11147 | + | |
11148 | + /* | |
11149 | + * Clear flags that may have been sent over that are only relevant in | |
11150 | + * the context of the sender. | |
11151 | + */ | |
11152 | + | |
5cdbd17b AM |
11153 | + lkb->lkb_flags &= ~(GDLM_LKFLG_DELETED | GDLM_LKFLG_LQRESEND | |
11154 | + GDLM_LKFLG_NOREBUILD | GDLM_LKFLG_DEMOTED); | |
4bf12011 | 11155 | + |
11156 | + put_lkid: | |
11157 | + /* Return the new LKID to the caller's buffer */ | |
11158 | + put_int(lkb->lkb_id, outbuf, outoffp); | |
11159 | + put_int(lkb->lkb_remid, outbuf, outoffp); | |
11160 | + error = 0; | |
11161 | + | |
11162 | + out: | |
11163 | + return error; | |
11164 | +} | |
11165 | + | |
10d56c87 AM |
11166 | +static struct dlm_rsb *deserialise_rsb(struct dlm_ls *ls, int nodeid, |
11167 | + struct dlm_rsb *rootrsb, char *buf, | |
11168 | + int *ptr) | |
4bf12011 | 11169 | +{ |
11170 | + int length; | |
11171 | + int remasterid; | |
11172 | + int parent_remasterid; | |
11173 | + char name[DLM_RESNAME_MAXLEN]; | |
11174 | + int error; | |
10d56c87 AM |
11175 | + struct dlm_rsb *parent = NULL; |
11176 | + struct dlm_rsb *rsb; | |
4bf12011 | 11177 | + |
11178 | + get_bytes(name, &length, buf, ptr); | |
11179 | + remasterid = get_int(buf, ptr); | |
11180 | + parent_remasterid = get_int(buf, ptr); | |
11181 | + | |
11182 | + if (parent_remasterid) | |
11183 | + parent = find_by_remasterid(ls, parent_remasterid, rootrsb); | |
11184 | + | |
11185 | + /* | |
11186 | + * The rsb reference from this find_or_create_rsb() will keep the rsb | |
11187 | + * around while we add new lkb's to it from deserialise_lkb. Each of | |
11188 | + * the lkb's will add an rsb reference. The reference added here is | |
11189 | + * removed by release_rsb() after all lkb's are added. | |
11190 | + */ | |
11191 | + | |
11192 | + error = find_or_create_rsb(ls, parent, name, length, 1, &rsb); | |
10d56c87 | 11193 | + DLM_ASSERT(!error,); |
4bf12011 | 11194 | + |
11195 | + /* There is a case where the above needs to create the RSB. */ | |
11196 | + if (rsb->res_nodeid == -1) | |
11197 | + rsb->res_nodeid = our_nodeid(); | |
11198 | + | |
11199 | + rsb->res_remasterid = remasterid; | |
11200 | + | |
11201 | + return rsb; | |
11202 | +} | |
11203 | + | |
11204 | +/* | |
11205 | + * Processing at the receiving end of a NEWLOCKS message from a node in | |
11206 | + * rebuild_rsbs_send(). Rebuild a remastered lock tree. Nodeid is the remote | |
11207 | + * node whose locks we are now mastering. For a reply we need to send back the | |
11208 | + * new lockids of the remastered locks so that remote ops can find them. | |
11209 | + */ | |
11210 | + | |
10d56c87 | 11211 | +int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len) |
4bf12011 | 11212 | +{ |
10d56c87 AM |
11213 | + struct dlm_rcom *rc; |
11214 | + struct dlm_rsb *rsb = NULL; | |
4bf12011 | 11215 | + rebuild_node_t *rnode; |
11216 | + char *outbuf; | |
11217 | + int outptr, ptr = 0, error = -ENOMEM; | |
11218 | + | |
11219 | + rnode = find_rebuild_root(ls, nodeid); | |
11220 | + if (!rnode) | |
11221 | + goto out; | |
11222 | + | |
11223 | + /* | |
11224 | + * Allocate a buffer for the reply message which is a list of remote | |
11225 | + * lock IDs and their (new) local lock ids. It will always be big | |
11226 | + * enough to fit <n> ID pairs if it already fit <n> LKBs. | |
11227 | + */ | |
11228 | + | |
11229 | + rc = allocate_rcom_buffer(ls); | |
11230 | + if (!rc) | |
11231 | + goto out; | |
11232 | + outbuf = rc->rc_buf; | |
11233 | + outptr = 0; | |
11234 | + | |
11235 | + /* | |
11236 | + * Unpack RSBs and LKBs, saving new LKB id's in outbuf as they're | |
11237 | + * created. Each deserialise_rsb adds an rsb reference that must be | |
11238 | + * removed with release_rsb once all new lkb's for an rsb have been | |
11239 | + * added. | |
11240 | + */ | |
11241 | + | |
11242 | + while (ptr < len) { | |
11243 | + int type; | |
11244 | + | |
11245 | + type = get_char(buf, &ptr); | |
11246 | + | |
11247 | + switch (type) { | |
11248 | + case REMASTER_ROOTRSB: | |
11249 | + if (rsb) | |
11250 | + release_rsb(rsb); | |
11251 | + rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf, | |
11252 | + &ptr); | |
11253 | + rnode->rootrsb = rsb; | |
11254 | + break; | |
11255 | + | |
11256 | + case REMASTER_RSB: | |
11257 | + if (rsb) | |
11258 | + release_rsb(rsb); | |
11259 | + rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf, | |
11260 | + &ptr); | |
11261 | + break; | |
11262 | + | |
11263 | + case REMASTER_LKB: | |
11264 | + deserialise_lkb(ls, nodeid, rnode->rootrsb, buf, &ptr, | |
11265 | + outbuf, &outptr); | |
11266 | + break; | |
11267 | + | |
11268 | + default: | |
10d56c87 | 11269 | + DLM_ASSERT(0, printk("type=%d nodeid=%u ptr=%d " |
4bf12011 | 11270 | + "len=%d\n", type, nodeid, ptr, |
11271 | + len);); | |
11272 | + } | |
11273 | + } | |
11274 | + | |
11275 | + if (rsb) | |
11276 | + release_rsb(rsb); | |
11277 | + | |
11278 | + /* | |
11279 | + * Reply with the new lock IDs. | |
11280 | + */ | |
11281 | + | |
11282 | + rc->rc_datalen = outptr; | |
11283 | + error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKIDS, rc, 0); | |
11284 | + | |
11285 | + free_rcom_buffer(rc); | |
11286 | + | |
11287 | + out: | |
11288 | + return error; | |
11289 | +} | |
11290 | + | |
11291 | +/* | |
11292 | + * Processing for a NEWLOCKIDS message. Called when we get the reply from the | |
11293 | + * new master telling us what the new remote lock IDs are for the remastered | |
11294 | + * locks | |
11295 | + */ | |
11296 | + | |
10d56c87 | 11297 | +int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len) |
4bf12011 | 11298 | +{ |
11299 | + int offset = 0; | |
11300 | + | |
11301 | + if (len == 1) | |
11302 | + len = 0; | |
11303 | + | |
11304 | + while (offset < len) { | |
11305 | + int remote_id; | |
11306 | + int local_id; | |
10d56c87 | 11307 | + struct dlm_lkb *lkb; |
4bf12011 | 11308 | + |
11309 | + if (offset + 8 > len) { | |
11310 | + log_error(ls, "rebuild_rsbs_lkids_recv: bad data " | |
11311 | + "length nodeid=%d offset=%d len=%d", | |
11312 | + nodeid, offset, len); | |
11313 | + break; | |
11314 | + } | |
11315 | + | |
11316 | + remote_id = get_int(buf, &offset); | |
11317 | + local_id = get_int(buf, &offset); | |
11318 | + | |
11319 | + lkb = find_lock_by_id(ls, local_id); | |
11320 | + if (lkb) { | |
11321 | + lkb->lkb_remid = remote_id; | |
11322 | + have_new_lkid(lkb); | |
11323 | + } else { | |
11324 | + log_error(ls, "rebuild_rsbs_lkids_recv: unknown lkid " | |
11325 | + "nodeid=%d id=%x remid=%x offset=%d len=%d", | |
11326 | + nodeid, local_id, remote_id, offset, len); | |
11327 | + } | |
11328 | + } | |
11329 | + | |
11330 | + if (recover_list_empty(ls)) | |
11331 | + wake_up(&ls->ls_wait_general); | |
11332 | + | |
11333 | + return 0; | |
11334 | +} | |
11335 | diff -urN linux-orig/cluster/dlm/rebuild.h linux-patched/cluster/dlm/rebuild.h | |
11336 | --- linux-orig/cluster/dlm/rebuild.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 11337 | +++ linux-patched/cluster/dlm/rebuild.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 11338 | @@ -0,0 +1,22 @@ |
11339 | +/****************************************************************************** | |
11340 | +******************************************************************************* | |
11341 | +** | |
11342 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
11343 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
11344 | +** | |
11345 | +** This copyrighted material is made available to anyone wishing to use, | |
11346 | +** modify, copy, or redistribute it subject to the terms and conditions | |
11347 | +** of the GNU General Public License v.2. | |
11348 | +** | |
11349 | +******************************************************************************* | |
11350 | +******************************************************************************/ | |
11351 | + | |
11352 | +#ifndef __REBUILD_DOT_H__ | |
11353 | +#define __REBUILD_DOT_H__ | |
11354 | + | |
10d56c87 AM |
11355 | +int rebuild_rsbs_send(struct dlm_ls *ls); |
11356 | +int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len); | |
11357 | +int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len); | |
11358 | +int rebuild_freemem(struct dlm_ls *ls); | |
4bf12011 | 11359 | + |
11360 | +#endif /* __REBUILD_DOT_H__ */ | |
11361 | diff -urN linux-orig/cluster/dlm/reccomms.c linux-patched/cluster/dlm/reccomms.c | |
11362 | --- linux-orig/cluster/dlm/reccomms.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
11363 | +++ linux-patched/cluster/dlm/reccomms.c 2004-07-13 18:57:22.000000000 +0800 |
11364 | @@ -0,0 +1,504 @@ | |
4bf12011 | 11365 | +/****************************************************************************** |
11366 | +******************************************************************************* | |
11367 | +** | |
11368 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
11369 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
11370 | +** | |
11371 | +** This copyrighted material is made available to anyone wishing to use, | |
11372 | +** modify, copy, or redistribute it subject to the terms and conditions | |
11373 | +** of the GNU General Public License v.2. | |
11374 | +** | |
11375 | +******************************************************************************* | |
11376 | +******************************************************************************/ | |
11377 | + | |
11378 | +#include "dlm_internal.h" | |
11379 | +#include "lowcomms.h" | |
11380 | +#include "midcomms.h" | |
11381 | +#include "reccomms.h" | |
11382 | +#include "nodes.h" | |
11383 | +#include "lockspace.h" | |
11384 | +#include "recover.h" | |
11385 | +#include "dir.h" | |
11386 | +#include "config.h" | |
11387 | +#include "rebuild.h" | |
11388 | +#include "memory.h" | |
11389 | + | |
11390 | +/* Running on the basis that only a single recovery communication will be done | |
11391 | + * at a time per lockspace */ | |
11392 | + | |
10d56c87 | 11393 | +static void rcom_process_message(struct dlm_ls * ls, uint32_t nodeid, struct dlm_rcom * rc); |
4bf12011 | 11394 | + |
11395 | +/* | |
11396 | + * Track per-node progress/stats during recovery to help debugging. | |
11397 | + */ | |
11398 | + | |
10d56c87 | 11399 | +void rcom_log(struct dlm_ls *ls, int nodeid, struct dlm_rcom *rc, int send) |
4bf12011 | 11400 | +{ |
10d56c87 | 11401 | + struct dlm_csb *csb; |
4bf12011 | 11402 | + int found = 0; |
11403 | + | |
10d56c87 AM |
11404 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
11405 | + if (csb->node->nodeid == nodeid) { | |
4bf12011 | 11406 | + found = TRUE; |
11407 | + break; | |
11408 | + } | |
11409 | + } | |
11410 | + | |
11411 | + if (!found) | |
11412 | + return; | |
11413 | + | |
11414 | + if (rc->rc_subcmd == RECCOMM_RECOVERNAMES) { | |
11415 | + if (send) { | |
10d56c87 AM |
11416 | + csb->names_send_count++; |
11417 | + csb->names_send_msgid = rc->rc_msgid; | |
4bf12011 | 11418 | + } else { |
10d56c87 AM |
11419 | + csb->names_recv_count++; |
11420 | + csb->names_recv_msgid = rc->rc_msgid; | |
4bf12011 | 11421 | + } |
11422 | + } else if (rc->rc_subcmd == RECCOMM_NEWLOCKS) { | |
11423 | + if (send) { | |
10d56c87 AM |
11424 | + csb->locks_send_count++; |
11425 | + csb->locks_send_msgid = rc->rc_msgid; | |
4bf12011 | 11426 | + } else { |
10d56c87 AM |
11427 | + csb->locks_recv_count++; |
11428 | + csb->locks_recv_msgid = rc->rc_msgid; | |
4bf12011 | 11429 | + } |
11430 | + } | |
11431 | +} | |
11432 | + | |
10d56c87 | 11433 | +void rcom_log_clear(struct dlm_ls *ls) |
4bf12011 | 11434 | +{ |
10d56c87 | 11435 | + struct dlm_csb *csb; |
4bf12011 | 11436 | + |
10d56c87 AM |
11437 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
11438 | + csb->names_send_count = 0; | |
11439 | + csb->names_send_msgid = 0; | |
11440 | + csb->names_recv_count = 0; | |
11441 | + csb->names_recv_msgid = 0; | |
11442 | + csb->locks_send_count = 0; | |
11443 | + csb->locks_send_msgid = 0; | |
11444 | + csb->locks_recv_count = 0; | |
11445 | + csb->locks_recv_msgid = 0; | |
4bf12011 | 11446 | + } |
11447 | +} | |
11448 | + | |
10d56c87 | 11449 | +static int rcom_response(struct dlm_ls *ls) |
4bf12011 | 11450 | +{ |
11451 | + return test_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
11452 | +} | |
11453 | + | |
11454 | +/** | |
11455 | + * rcom_send_message - send or request recovery data | |
11456 | + * @ls: the lockspace | |
11457 | + * @nodeid: node to which the message is sent | |
11458 | + * @type: type of recovery message | |
11459 | + * @rc: the rc buffer to send | |
11460 | + * @need_reply: wait for reply if this is set | |
11461 | + * | |
11462 | + * Using this interface | |
11463 | + * i) Allocate an rc buffer: | |
11464 | + * rc = allocate_rcom_buffer(ls); | |
11465 | + * ii) Copy data to send beginning at rc->rc_buf: | |
11466 | + * memcpy(rc->rc_buf, mybuf, mylen); | |
11467 | + * iii) Set rc->rc_datalen to the number of bytes copied in (ii): | |
11468 | + * rc->rc_datalen = mylen | |
11469 | + * iv) Submit the rc to this function: | |
11470 | + * rcom_send_message(rc); | |
11471 | + * | |
10d56c87 AM |
11472 | + * The max value of "mylen" is dlm_config.buffer_size - sizeof(struct |
11473 | + * dlm_rcom). If more data must be passed in one send, use | |
11474 | + * rcom_expand_buffer() which incrementally increases the size of the rc buffer | |
11475 | + * by dlm_config.buffer_size bytes. | |
4bf12011 | 11476 | + * |
11477 | + * Any data returned for the message (when need_reply is set) will saved in | |
11478 | + * rc->rc_buf when this function returns and rc->rc_datalen will be set to the | |
11479 | + * number of bytes copied into rc->rc_buf. | |
11480 | + * | |
11481 | + * Returns: 0 on success, -EXXX on failure | |
11482 | + */ | |
11483 | + | |
10d56c87 AM |
11484 | +int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type, |
11485 | + struct dlm_rcom *rc, int need_reply) | |
4bf12011 | 11486 | +{ |
11487 | + int error = 0; | |
11488 | + | |
11489 | + if (!rc->rc_datalen) | |
11490 | + rc->rc_datalen = 1; | |
11491 | + | |
11492 | + /* | |
11493 | + * Fill in the header. | |
11494 | + */ | |
11495 | + | |
11496 | + rc->rc_header.rh_cmd = GDLM_REMCMD_RECOVERMESSAGE; | |
11497 | + rc->rc_header.rh_lockspace = ls->ls_global_id; | |
10d56c87 | 11498 | + rc->rc_header.rh_length = sizeof(struct dlm_rcom) + rc->rc_datalen - 1; |
4bf12011 | 11499 | + rc->rc_subcmd = type; |
11500 | + rc->rc_msgid = ++ls->ls_rcom_msgid; | |
11501 | + | |
11502 | + rcom_log(ls, nodeid, rc, 1); | |
11503 | + | |
11504 | + /* | |
11505 | + * When a reply is received, the reply data goes back into this buffer. | |
11506 | + * Synchronous rcom requests (need_reply=1) are serialised because of | |
11507 | + * the single ls_rcom. | |
11508 | + */ | |
11509 | + | |
11510 | + if (need_reply) { | |
11511 | + down(&ls->ls_rcom_lock); | |
11512 | + ls->ls_rcom = rc; | |
11513 | + } | |
11514 | + | |
11515 | + /* | |
11516 | + * After sending the message we'll wait at the end of this function to | |
11517 | + * get a reply. The READY flag will be set when the reply has been | |
11518 | + * received and requested data has been copied into | |
11519 | + * ls->ls_rcom->rc_buf; | |
11520 | + */ | |
11521 | + | |
10d56c87 | 11522 | + DLM_ASSERT(!test_bit(LSFL_RECCOMM_READY, &ls->ls_flags),); |
4bf12011 | 11523 | + |
11524 | + /* | |
11525 | + * The WAIT bit indicates that we're waiting for and willing to accept a | |
11526 | + * reply. Any replies are ignored unless this bit is set. | |
11527 | + */ | |
11528 | + | |
11529 | + set_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags); | |
11530 | + | |
11531 | + /* | |
11532 | + * Process the message locally. | |
11533 | + */ | |
11534 | + | |
11535 | + if (nodeid == our_nodeid()) { | |
11536 | + rcom_process_message(ls, nodeid, rc); | |
11537 | + goto out; | |
11538 | + } | |
11539 | + | |
11540 | + /* | |
11541 | + * Send the message. | |
11542 | + */ | |
11543 | + | |
11544 | + log_debug(ls, "rcom send %d to %u id %u", type, nodeid, rc->rc_msgid); | |
11545 | + | |
10d56c87 | 11546 | + error = midcomms_send_message(nodeid, (struct dlm_header *) rc, |
4bf12011 | 11547 | + GFP_KERNEL); |
10d56c87 | 11548 | + DLM_ASSERT(error >= 0, printk("error = %d\n", error);); |
4bf12011 | 11549 | + error = 0; |
11550 | + | |
11551 | + /* | |
11552 | + * Wait for a reply. Once a reply is processed from midcomms, the | |
10d56c87 | 11553 | + * READY bit will be set and we'll be awoken (dlm_wait_function will |
4bf12011 | 11554 | + * return 0). |
11555 | + */ | |
11556 | + | |
11557 | + if (need_reply) { | |
10d56c87 | 11558 | + error = dlm_wait_function(ls, &rcom_response); |
4bf12011 | 11559 | + if (error) |
11560 | + log_debug(ls, "rcom wait error %d", error); | |
11561 | + } | |
11562 | + | |
11563 | + out: | |
11564 | + clear_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags); | |
11565 | + clear_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
11566 | + | |
11567 | + if (need_reply) | |
11568 | + up(&ls->ls_rcom_lock); | |
11569 | + | |
11570 | + return error; | |
11571 | +} | |
11572 | + | |
11573 | +/* | |
11574 | + * Runs in same context as midcomms. | |
11575 | + */ | |
11576 | + | |
10d56c87 | 11577 | +static void rcom_process_message(struct dlm_ls *ls, uint32_t nodeid, struct dlm_rcom *rc) |
4bf12011 | 11578 | +{ |
10d56c87 AM |
11579 | + struct dlm_rcom rc_stack; |
11580 | + struct dlm_rcom *reply = NULL; | |
4bf12011 | 11581 | + int status, datalen, maxlen; |
10d56c87 | 11582 | + uint32_t r_nodeid, be_nodeid; |
4bf12011 | 11583 | + |
11584 | + if (!ls) | |
11585 | + return; | |
11586 | + | |
11587 | + rcom_log(ls, nodeid, rc, 0); | |
11588 | + | |
10d56c87 | 11589 | + if (dlm_recovery_stopped(ls) && (rc->rc_subcmd != RECCOMM_STATUS)) { |
4bf12011 | 11590 | + log_error(ls, "ignoring recovery message %x from %u", |
11591 | + rc->rc_subcmd, nodeid); | |
11592 | + return; | |
11593 | + } | |
11594 | + | |
11595 | + switch (rc->rc_subcmd) { | |
11596 | + | |
11597 | + case RECCOMM_STATUS: | |
11598 | + | |
10d56c87 | 11599 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
4bf12011 | 11600 | + reply = &rc_stack; |
11601 | + | |
11602 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
11603 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
11604 | + reply->rc_subcmd = rc->rc_subcmd; | |
11605 | + reply->rc_msgid = rc->rc_msgid; | |
11606 | + reply->rc_buf[0] = 0; | |
11607 | + | |
11608 | + if (test_bit(LSFL_RESDIR_VALID, &ls->ls_flags)) | |
11609 | + reply->rc_buf[0] |= RESDIR_VALID; | |
11610 | + | |
11611 | + if (test_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags)) | |
11612 | + reply->rc_buf[0] |= RESDIR_ALL_VALID; | |
11613 | + | |
11614 | + if (test_bit(LSFL_NODES_VALID, &ls->ls_flags)) | |
11615 | + reply->rc_buf[0] |= NODES_VALID; | |
11616 | + | |
11617 | + if (test_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags)) | |
11618 | + reply->rc_buf[0] |= NODES_ALL_VALID; | |
11619 | + | |
11620 | + reply->rc_datalen = 1; | |
11621 | + reply->rc_header.rh_length = | |
10d56c87 | 11622 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
4bf12011 | 11623 | + |
11624 | + log_debug(ls, "rcom status %x to %u", reply->rc_buf[0], nodeid); | |
11625 | + break; | |
11626 | + | |
11627 | + case RECCOMM_RECOVERNAMES: | |
11628 | + | |
11629 | + reply = allocate_rcom_buffer(ls); | |
10d56c87 AM |
11630 | + DLM_ASSERT(reply,); |
11631 | + maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | |
4bf12011 | 11632 | + |
11633 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
11634 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
11635 | + reply->rc_subcmd = rc->rc_subcmd; | |
11636 | + reply->rc_msgid = rc->rc_msgid; | |
11637 | + | |
11638 | + /* | |
11639 | + * The other node wants a bunch of resource names. The name of | |
11640 | + * the resource to begin with is in rc->rc_buf. | |
11641 | + */ | |
11642 | + | |
10d56c87 AM |
11643 | + datalen = dlm_dir_rebuild_send(ls, rc->rc_buf, rc->rc_datalen, |
11644 | + reply->rc_buf, maxlen, nodeid); | |
4bf12011 | 11645 | + |
11646 | + reply->rc_datalen = datalen; | |
11647 | + reply->rc_header.rh_length = | |
10d56c87 | 11648 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
4bf12011 | 11649 | + |
11650 | + log_debug(ls, "rcom names len %d to %u id %u", datalen, nodeid, | |
11651 | + reply->rc_msgid); | |
11652 | + break; | |
11653 | + | |
11654 | + case RECCOMM_GETMASTER: | |
11655 | + | |
11656 | + reply = allocate_rcom_buffer(ls); | |
10d56c87 | 11657 | + DLM_ASSERT(reply,); |
4bf12011 | 11658 | + |
11659 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
11660 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
11661 | + reply->rc_subcmd = rc->rc_subcmd; | |
11662 | + reply->rc_msgid = rc->rc_msgid; | |
11663 | + | |
11664 | + /* | |
11665 | + * The other node wants to know the master of a named resource. | |
11666 | + */ | |
11667 | + | |
10d56c87 AM |
11668 | + status = dlm_dir_lookup_recovery(ls, nodeid, rc->rc_buf, |
11669 | + rc->rc_datalen, &r_nodeid); | |
4bf12011 | 11670 | + if (status != 0) { |
11671 | + free_rcom_buffer(reply); | |
11672 | + reply = NULL; | |
11673 | + return; | |
11674 | + } | |
10d56c87 | 11675 | + be_nodeid = cpu_to_be32(r_nodeid); |
4bf12011 | 11676 | + memcpy(reply->rc_buf, &be_nodeid, sizeof(uint32_t)); |
11677 | + reply->rc_datalen = sizeof(uint32_t); | |
11678 | + reply->rc_header.rh_length = | |
10d56c87 | 11679 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
4bf12011 | 11680 | + break; |
11681 | + | |
11682 | + case RECCOMM_BULKLOOKUP: | |
11683 | + | |
11684 | + reply = allocate_rcom_buffer(ls); | |
10d56c87 | 11685 | + DLM_ASSERT(reply,); |
4bf12011 | 11686 | + |
11687 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
11688 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
11689 | + reply->rc_subcmd = rc->rc_subcmd; | |
11690 | + reply->rc_msgid = rc->rc_msgid; | |
11691 | + | |
11692 | + /* | |
11693 | + * This is a bulk version of the above and just returns a | |
11694 | + * buffer full of node ids to match the resources | |
11695 | + */ | |
11696 | + | |
11697 | + datalen = bulk_master_lookup(ls, nodeid, rc->rc_buf, | |
11698 | + rc->rc_datalen, reply->rc_buf); | |
11699 | + if (datalen < 0) { | |
11700 | + free_rcom_buffer(reply); | |
11701 | + reply = NULL; | |
11702 | + return; | |
11703 | + } | |
11704 | + | |
11705 | + reply->rc_datalen = datalen; | |
11706 | + reply->rc_header.rh_length = | |
10d56c87 | 11707 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
4bf12011 | 11708 | + break; |
11709 | + | |
11710 | + /* | |
11711 | + * These RECCOMM messages don't need replies. | |
11712 | + */ | |
11713 | + | |
11714 | + case RECCOMM_NEWLOCKS: | |
11715 | + rebuild_rsbs_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen); | |
11716 | + break; | |
11717 | + | |
11718 | + case RECCOMM_NEWLOCKIDS: | |
11719 | + rebuild_rsbs_lkids_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen); | |
11720 | + break; | |
11721 | + | |
11722 | + case RECCOMM_REMRESDATA: | |
10d56c87 | 11723 | + remove_resdata(ls, nodeid, rc->rc_buf, rc->rc_datalen); |
4bf12011 | 11724 | + break; |
11725 | + | |
11726 | + default: | |
10d56c87 | 11727 | + DLM_ASSERT(0, printk("cmd=%x\n", rc->rc_subcmd);); |
4bf12011 | 11728 | + } |
11729 | + | |
11730 | + if (reply) { | |
11731 | + if (nodeid == our_nodeid()) { | |
10d56c87 | 11732 | + DLM_ASSERT(rc == ls->ls_rcom,); |
4bf12011 | 11733 | + memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen); |
11734 | + rc->rc_datalen = reply->rc_datalen; | |
11735 | + } else { | |
11736 | + midcomms_send_message(nodeid, | |
10d56c87 | 11737 | + (struct dlm_header *) reply, |
4bf12011 | 11738 | + GFP_KERNEL); |
11739 | + } | |
11740 | + | |
11741 | + if (reply != &rc_stack) | |
11742 | + free_rcom_buffer(reply); | |
11743 | + } | |
11744 | +} | |
11745 | + | |
10d56c87 AM |
11746 | +static void process_reply_sync(struct dlm_ls *ls, uint32_t nodeid, |
11747 | + struct dlm_rcom *reply) | |
4bf12011 | 11748 | +{ |
10d56c87 | 11749 | + struct dlm_rcom *rc = ls->ls_rcom; |
4bf12011 | 11750 | + |
11751 | + if (!test_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags)) { | |
11752 | + log_error(ls, "unexpected rcom reply nodeid=%u", nodeid); | |
11753 | + return; | |
11754 | + } | |
11755 | + | |
11756 | + if (reply->rc_msgid != le32_to_cpu(rc->rc_msgid)) { | |
11757 | + log_error(ls, "unexpected rcom msgid %x/%x nodeid=%u", | |
11758 | + reply->rc_msgid, le32_to_cpu(rc->rc_msgid), nodeid); | |
11759 | + return; | |
11760 | + } | |
11761 | + | |
11762 | + memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen); | |
11763 | + rc->rc_datalen = reply->rc_datalen; | |
11764 | + | |
11765 | + /* | |
11766 | + * Tell the thread waiting in rcom_send_message() that it can go ahead. | |
11767 | + */ | |
11768 | + | |
11769 | + set_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
11770 | + wake_up(&ls->ls_wait_general); | |
11771 | +} | |
11772 | + | |
10d56c87 AM |
11773 | +static void process_reply_async(struct dlm_ls *ls, uint32_t nodeid, |
11774 | + struct dlm_rcom *reply) | |
4bf12011 | 11775 | +{ |
11776 | + restbl_rsb_update_recv(ls, nodeid, reply->rc_buf, reply->rc_datalen, | |
11777 | + reply->rc_msgid); | |
11778 | +} | |
11779 | + | |
11780 | +/* | |
11781 | + * Runs in same context as midcomms. | |
11782 | + */ | |
11783 | + | |
10d56c87 AM |
11784 | +static void rcom_process_reply(struct dlm_ls *ls, uint32_t nodeid, |
11785 | + struct dlm_rcom *reply) | |
4bf12011 | 11786 | +{ |
10d56c87 | 11787 | + if (dlm_recovery_stopped(ls)) { |
4bf12011 | 11788 | + log_error(ls, "ignoring recovery reply %x from %u", |
11789 | + reply->rc_subcmd, nodeid); | |
11790 | + return; | |
11791 | + } | |
11792 | + | |
11793 | + switch (reply->rc_subcmd) { | |
11794 | + case RECCOMM_GETMASTER: | |
11795 | + process_reply_async(ls, nodeid, reply); | |
11796 | + break; | |
11797 | + case RECCOMM_STATUS: | |
11798 | + case RECCOMM_NEWLOCKS: | |
11799 | + case RECCOMM_NEWLOCKIDS: | |
11800 | + case RECCOMM_RECOVERNAMES: | |
11801 | + process_reply_sync(ls, nodeid, reply); | |
11802 | + break; | |
11803 | + default: | |
11804 | + log_error(ls, "unknown rcom reply subcmd=%x nodeid=%u", | |
11805 | + reply->rc_subcmd, nodeid); | |
11806 | + } | |
11807 | +} | |
11808 | + | |
11809 | + | |
10d56c87 | 11810 | +static int send_ls_not_ready(uint32_t nodeid, struct dlm_header *header) |
4bf12011 | 11811 | +{ |
11812 | + struct writequeue_entry *wq; | |
10d56c87 AM |
11813 | + struct dlm_rcom *rc = (struct dlm_rcom *) header; |
11814 | + struct dlm_rcom *reply; | |
4bf12011 | 11815 | + |
10d56c87 | 11816 | + wq = lowcomms_get_buffer(nodeid, sizeof(struct dlm_rcom), GFP_KERNEL, |
4bf12011 | 11817 | + (char **)&reply); |
11818 | + if (!wq) | |
11819 | + return -ENOMEM; | |
11820 | + | |
11821 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
11822 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
11823 | + reply->rc_subcmd = rc->rc_subcmd; | |
11824 | + reply->rc_msgid = rc->rc_msgid; | |
11825 | + reply->rc_buf[0] = 0; | |
11826 | + | |
11827 | + reply->rc_datalen = 1; | |
10d56c87 | 11828 | + reply->rc_header.rh_length = sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
4bf12011 | 11829 | + |
10d56c87 | 11830 | + midcomms_send_buffer((struct dlm_header *)reply, wq); |
4bf12011 | 11831 | + return 0; |
11832 | +} | |
11833 | + | |
11834 | + | |
11835 | +/* | |
11836 | + * Runs in same context as midcomms. Both recovery requests and recovery | |
11837 | + * replies come through this function. | |
11838 | + */ | |
11839 | + | |
10d56c87 | 11840 | +void process_recovery_comm(uint32_t nodeid, struct dlm_header *header) |
4bf12011 | 11841 | +{ |
10d56c87 AM |
11842 | + struct dlm_ls *ls = find_lockspace_by_global_id(header->rh_lockspace); |
11843 | + struct dlm_rcom *rc = (struct dlm_rcom *) header; | |
4bf12011 | 11844 | + |
11845 | + /* If the lockspace doesn't exist then still send a status message | |
11846 | + back, it's possible that it just doesn't have it's global_id | |
11847 | + yet. */ | |
11848 | + if (!ls) { | |
11849 | + send_ls_not_ready(nodeid, header); | |
11850 | + return; | |
11851 | + } | |
11852 | + | |
11853 | + switch (header->rh_cmd) { | |
11854 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
11855 | + down_read(&ls->ls_rec_rsblist); | |
11856 | + rcom_process_message(ls, nodeid, rc); | |
11857 | + up_read(&ls->ls_rec_rsblist); | |
11858 | + break; | |
11859 | + | |
11860 | + case GDLM_REMCMD_RECOVERREPLY: | |
11861 | + rcom_process_reply(ls, nodeid, rc); | |
11862 | + break; | |
11863 | + | |
11864 | + default: | |
10d56c87 | 11865 | + DLM_ASSERT(0, printk("cmd=%x\n", header->rh_cmd);); |
4bf12011 | 11866 | + } |
11867 | +} | |
11868 | + | |
11869 | diff -urN linux-orig/cluster/dlm/reccomms.h linux-patched/cluster/dlm/reccomms.h | |
11870 | --- linux-orig/cluster/dlm/reccomms.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 11871 | +++ linux-patched/cluster/dlm/reccomms.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 11872 | @@ -0,0 +1,37 @@ |
11873 | +/****************************************************************************** | |
11874 | +******************************************************************************* | |
11875 | +** | |
11876 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
11877 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
11878 | +** | |
11879 | +** This copyrighted material is made available to anyone wishing to use, | |
11880 | +** modify, copy, or redistribute it subject to the terms and conditions | |
11881 | +** of the GNU General Public License v.2. | |
11882 | +** | |
11883 | +******************************************************************************* | |
11884 | +******************************************************************************/ | |
11885 | + | |
11886 | +#ifndef __RECCOMMS_DOT_H__ | |
11887 | +#define __RECCOMMS_DOT_H__ | |
11888 | + | |
11889 | +/* Bit flags */ | |
11890 | + | |
11891 | +#define RESDIR_VALID (1) | |
11892 | +#define RESDIR_ALL_VALID (2) | |
11893 | +#define NODES_VALID (4) | |
11894 | +#define NODES_ALL_VALID (8) | |
11895 | + | |
11896 | +#define RECCOMM_STATUS (1) | |
11897 | +#define RECCOMM_RECOVERNAMES (2) | |
11898 | +#define RECCOMM_GETMASTER (3) | |
11899 | +#define RECCOMM_BULKLOOKUP (4) | |
11900 | +#define RECCOMM_NEWLOCKS (5) | |
11901 | +#define RECCOMM_NEWLOCKIDS (6) | |
11902 | +#define RECCOMM_REMRESDATA (7) | |
11903 | + | |
10d56c87 AM |
11904 | +int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type, |
11905 | + struct dlm_rcom *rc, int need_reply); | |
11906 | +void process_recovery_comm(uint32_t nodeid, struct dlm_header *header); | |
11907 | +void rcom_log_clear(struct dlm_ls *ls); | |
4bf12011 | 11908 | + |
11909 | +#endif | |
11910 | diff -urN linux-orig/cluster/dlm/recover.c linux-patched/cluster/dlm/recover.c | |
11911 | --- linux-orig/cluster/dlm/recover.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
11912 | +++ linux-patched/cluster/dlm/recover.c 2004-07-13 18:57:22.000000000 +0800 |
11913 | @@ -0,0 +1,610 @@ | |
4bf12011 | 11914 | +/****************************************************************************** |
11915 | +******************************************************************************* | |
11916 | +** | |
11917 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
11918 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
11919 | +** | |
11920 | +** This copyrighted material is made available to anyone wishing to use, | |
11921 | +** modify, copy, or redistribute it subject to the terms and conditions | |
11922 | +** of the GNU General Public License v.2. | |
11923 | +** | |
11924 | +******************************************************************************* | |
11925 | +******************************************************************************/ | |
11926 | + | |
11927 | +#include "dlm_internal.h" | |
11928 | +#include "reccomms.h" | |
11929 | +#include "dir.h" | |
11930 | +#include "locking.h" | |
11931 | +#include "rsb.h" | |
11932 | +#include "lockspace.h" | |
11933 | +#include "lkb.h" | |
11934 | +#include "nodes.h" | |
11935 | +#include "config.h" | |
11936 | +#include "ast.h" | |
11937 | +#include "memory.h" | |
11938 | + | |
11939 | +/* | |
11940 | + * Called in recovery routines to check whether the recovery process has been | |
11941 | + * interrupted/stopped by another transition. A recovery in-process will abort | |
11942 | + * if the lockspace is "stopped" so that a new recovery process can start from | |
11943 | + * the beginning when the lockspace is "started" again. | |
11944 | + */ | |
11945 | + | |
10d56c87 | 11946 | +int dlm_recovery_stopped(struct dlm_ls *ls) |
4bf12011 | 11947 | +{ |
11948 | + return test_bit(LSFL_LS_STOP, &ls->ls_flags); | |
11949 | +} | |
11950 | + | |
10d56c87 | 11951 | +static void dlm_wait_timer_fn(unsigned long data) |
4bf12011 | 11952 | +{ |
10d56c87 | 11953 | + struct dlm_ls *ls = (struct dlm_ls *) data; |
4bf12011 | 11954 | + |
11955 | + wake_up(&ls->ls_wait_general); | |
11956 | +} | |
11957 | + | |
11958 | +/* | |
11959 | + * Wait until given function returns non-zero or lockspace is stopped (LS_STOP | |
11960 | + * set due to failure of a node in ls_nodes). When another function thinks it | |
11961 | + * could have completed the waited-on task, they should wake up ls_wait_general | |
11962 | + * to get an immediate response rather than waiting for the timer to detect the | |
11963 | + * result. A timer wakes us up periodically while waiting to see if we should | |
11964 | + * abort due to a node failure. | |
11965 | + */ | |
11966 | + | |
10d56c87 | 11967 | +int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls * ls)) |
4bf12011 | 11968 | +{ |
11969 | + struct timer_list timer; | |
11970 | + int error = 0; | |
11971 | + | |
11972 | + init_timer(&timer); | |
10d56c87 | 11973 | + timer.function = dlm_wait_timer_fn; |
4bf12011 | 11974 | + timer.data = (long) ls; |
11975 | + | |
11976 | + for (;;) { | |
11977 | + mod_timer(&timer, jiffies + (5 * HZ)); | |
11978 | + | |
11979 | + wchan_cond_sleep_intr(ls->ls_wait_general, | |
11980 | + !testfn(ls) && | |
11981 | + !test_bit(LSFL_LS_STOP, &ls->ls_flags)); | |
11982 | + | |
11983 | + if (timer_pending(&timer)) | |
11984 | + del_timer(&timer); | |
11985 | + | |
11986 | + if (testfn(ls)) | |
11987 | + break; | |
11988 | + | |
11989 | + if (test_bit(LSFL_LS_STOP, &ls->ls_flags)) { | |
11990 | + error = -1; | |
11991 | + break; | |
11992 | + } | |
11993 | + } | |
11994 | + | |
11995 | + return error; | |
11996 | +} | |
11997 | + | |
10d56c87 | 11998 | +int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status) |
4bf12011 | 11999 | +{ |
10d56c87 AM |
12000 | + struct dlm_rcom rc_stack, *rc; |
12001 | + struct dlm_csb *csb; | |
4bf12011 | 12002 | + int status; |
12003 | + int error = 0; | |
12004 | + | |
10d56c87 | 12005 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
4bf12011 | 12006 | + rc = &rc_stack; |
12007 | + rc->rc_datalen = 0; | |
12008 | + | |
10d56c87 | 12009 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
4bf12011 | 12010 | + for (;;) { |
10d56c87 | 12011 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 12012 | + if (error) |
12013 | + goto out; | |
12014 | + | |
10d56c87 | 12015 | + error = rcom_send_message(ls, csb->node->nodeid, |
4bf12011 | 12016 | + RECCOMM_STATUS, rc, 1); |
12017 | + if (error) | |
12018 | + goto out; | |
12019 | + | |
12020 | + status = rc->rc_buf[0]; | |
12021 | + if (status & wait_status) | |
12022 | + break; | |
12023 | + else { | |
12024 | + set_current_state(TASK_INTERRUPTIBLE); | |
12025 | + schedule_timeout(HZ >> 1); | |
12026 | + } | |
12027 | + } | |
12028 | + } | |
12029 | + | |
12030 | + out: | |
12031 | + return error; | |
12032 | +} | |
12033 | + | |
10d56c87 | 12034 | +int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status) |
4bf12011 | 12035 | +{ |
10d56c87 | 12036 | + struct dlm_rcom rc_stack, *rc; |
4bf12011 | 12037 | + uint32_t nodeid = ls->ls_low_nodeid; |
12038 | + int status; | |
12039 | + int error = 0; | |
12040 | + | |
10d56c87 | 12041 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
4bf12011 | 12042 | + rc = &rc_stack; |
12043 | + rc->rc_datalen = 0; | |
12044 | + | |
12045 | + for (;;) { | |
10d56c87 | 12046 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 12047 | + if (error) |
12048 | + goto out; | |
12049 | + | |
12050 | + error = rcom_send_message(ls, nodeid, RECCOMM_STATUS, rc, 1); | |
12051 | + if (error) | |
12052 | + break; | |
12053 | + | |
12054 | + status = rc->rc_buf[0]; | |
12055 | + if (status & wait_status) | |
12056 | + break; | |
12057 | + else { | |
12058 | + set_current_state(TASK_INTERRUPTIBLE); | |
12059 | + schedule_timeout(HZ >> 1); | |
12060 | + } | |
12061 | + } | |
12062 | + | |
12063 | + out: | |
12064 | + return error; | |
12065 | +} | |
12066 | + | |
10d56c87 | 12067 | +static int purge_queue(struct dlm_ls *ls, struct list_head *queue) |
4bf12011 | 12068 | +{ |
10d56c87 AM |
12069 | + struct dlm_lkb *lkb, *safe; |
12070 | + struct dlm_rsb *rsb; | |
4bf12011 | 12071 | + int count = 0; |
12072 | + | |
12073 | + list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { | |
12074 | + if (!lkb->lkb_nodeid) | |
12075 | + continue; | |
12076 | + | |
10d56c87 | 12077 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY,); |
4bf12011 | 12078 | + |
12079 | + if (in_nodes_gone(ls, lkb->lkb_nodeid)) { | |
12080 | + list_del(&lkb->lkb_statequeue); | |
12081 | + | |
12082 | + rsb = lkb->lkb_resource; | |
12083 | + lkb->lkb_status = 0; | |
12084 | + | |
12085 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
12086 | + && &lkb->lkb_duetime) | |
12087 | + remove_from_deadlockqueue(lkb); | |
12088 | + | |
12089 | + release_lkb(ls, lkb); | |
12090 | + release_rsb(rsb); | |
12091 | + count++; | |
12092 | + } | |
12093 | + } | |
12094 | + | |
12095 | + return count; | |
12096 | +} | |
12097 | + | |
12098 | +/* | |
12099 | + * Go through local restbl and for each rsb we're master of, clear out any | |
12100 | + * lkb's held by departed nodes. | |
12101 | + */ | |
12102 | + | |
10d56c87 | 12103 | +int restbl_lkb_purge(struct dlm_ls *ls) |
4bf12011 | 12104 | +{ |
12105 | + struct list_head *tmp2, *safe2; | |
12106 | + int count = 0; | |
10d56c87 | 12107 | + struct dlm_rsb *rootrsb, *safe, *rsb; |
4bf12011 | 12108 | + |
12109 | + log_all(ls, "purge locks of departed nodes"); | |
12110 | + | |
12111 | + list_for_each_entry_safe(rootrsb, safe, &ls->ls_rootres, res_rootlist) { | |
12112 | + | |
4bf12011 | 12113 | + if (rootrsb->res_nodeid) |
12114 | + continue; | |
12115 | + | |
12116 | + hold_rsb(rootrsb); | |
12117 | + down_write(&rootrsb->res_lock); | |
12118 | + | |
12119 | + /* This traverses the subreslist in reverse order so we purge | |
12120 | + * the children before their parents. */ | |
12121 | + | |
12122 | + for (tmp2 = rootrsb->res_subreslist.prev, safe2 = tmp2->prev; | |
12123 | + tmp2 != &rootrsb->res_subreslist; | |
12124 | + tmp2 = safe2, safe2 = safe2->prev) { | |
10d56c87 | 12125 | + rsb = list_entry(tmp2, struct dlm_rsb, res_subreslist); |
4bf12011 | 12126 | + |
12127 | + hold_rsb(rsb); | |
12128 | + purge_queue(ls, &rsb->res_grantqueue); | |
12129 | + purge_queue(ls, &rsb->res_convertqueue); | |
12130 | + purge_queue(ls, &rsb->res_waitqueue); | |
12131 | + release_rsb(rsb); | |
12132 | + } | |
12133 | + count += purge_queue(ls, &rootrsb->res_grantqueue); | |
12134 | + count += purge_queue(ls, &rootrsb->res_convertqueue); | |
12135 | + count += purge_queue(ls, &rootrsb->res_waitqueue); | |
12136 | + | |
12137 | + up_write(&rootrsb->res_lock); | |
12138 | + release_rsb(rootrsb); | |
12139 | + } | |
12140 | + | |
12141 | + log_all(ls, "purged %d locks", count); | |
12142 | + | |
12143 | + return 0; | |
12144 | +} | |
12145 | + | |
12146 | +/* | |
12147 | + * Grant any locks that have become grantable after a purge | |
12148 | + */ | |
12149 | + | |
10d56c87 | 12150 | +int restbl_grant_after_purge(struct dlm_ls *ls) |
4bf12011 | 12151 | +{ |
10d56c87 | 12152 | + struct dlm_rsb *root, *rsb, *safe; |
4bf12011 | 12153 | + int error = 0; |
12154 | + | |
12155 | + down_write(&ls->ls_gap_rsblist); | |
12156 | + | |
12157 | + list_for_each_entry_safe(root, safe, &ls->ls_rootres, res_rootlist) { | |
12158 | + /* only the rsb master grants locks */ | |
12159 | + if (root->res_nodeid) | |
12160 | + continue; | |
12161 | + | |
12162 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
12163 | + log_debug(ls, "restbl_grant_after_purge aborted"); | |
12164 | + error = -EINTR; | |
12165 | + up_write(&ls->ls_gap_rsblist); | |
12166 | + goto out; | |
12167 | + } | |
12168 | + | |
12169 | + down_write(&root->res_lock); | |
12170 | + grant_pending_locks(root); | |
12171 | + up_write(&root->res_lock); | |
12172 | + | |
12173 | + list_for_each_entry(rsb, &root->res_subreslist, res_subreslist){ | |
12174 | + down_write(&rsb->res_lock); | |
12175 | + grant_pending_locks(rsb); | |
12176 | + up_write(&rsb->res_lock); | |
12177 | + } | |
12178 | + } | |
12179 | + up_write(&ls->ls_gap_rsblist); | |
12180 | + wake_astd(); | |
12181 | + out: | |
12182 | + return error; | |
12183 | +} | |
12184 | + | |
12185 | +/* | |
12186 | + * Set the lock master for all LKBs in a lock queue | |
12187 | + */ | |
12188 | + | |
12189 | +static void set_lock_master(struct list_head *queue, int nodeid) | |
12190 | +{ | |
10d56c87 | 12191 | + struct dlm_lkb *lkb; |
4bf12011 | 12192 | + |
12193 | + list_for_each_entry(lkb, queue, lkb_statequeue) { | |
12194 | + /* Don't muck around with pre-exising sublocks */ | |
12195 | + if (!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY)) | |
12196 | + lkb->lkb_nodeid = nodeid; | |
12197 | + } | |
12198 | +} | |
12199 | + | |
10d56c87 | 12200 | +static void set_master_lkbs(struct dlm_rsb *rsb) |
4bf12011 | 12201 | +{ |
12202 | + set_lock_master(&rsb->res_grantqueue, rsb->res_nodeid); | |
12203 | + set_lock_master(&rsb->res_convertqueue, rsb->res_nodeid); | |
12204 | + set_lock_master(&rsb->res_waitqueue, rsb->res_nodeid); | |
12205 | +} | |
12206 | + | |
12207 | +/* | |
12208 | + * This rsb struct is now the master so it is responsible for keeping the | |
12209 | + * latest rsb. Find if any current lkb's have an up to date copy of the lvb to | |
12210 | + * be used as the rsb copy. An equivalent step occurs as new lkb's arrive for | |
12211 | + * this rsb in deserialise_lkb. | |
12212 | + */ | |
12213 | + | |
10d56c87 | 12214 | +static void set_rsb_lvb(struct dlm_rsb *rsb) |
4bf12011 | 12215 | +{ |
10d56c87 | 12216 | + struct dlm_lkb *lkb; |
4bf12011 | 12217 | + |
12218 | + list_for_each_entry(lkb, &rsb->res_grantqueue, lkb_statequeue) { | |
12219 | + | |
12220 | + if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) && | |
12221 | + (lkb->lkb_flags & GDLM_LKFLG_VALBLK) && | |
12222 | + (lkb->lkb_grmode > DLM_LOCK_NL)) | |
12223 | + { | |
12224 | + if (!rsb->res_lvbptr) | |
12225 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
12226 | + | |
12227 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
12228 | + return; | |
12229 | + } | |
12230 | + } | |
12231 | + | |
12232 | + list_for_each_entry(lkb, &rsb->res_convertqueue, lkb_statequeue) { | |
12233 | + | |
12234 | + if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) && | |
12235 | + (lkb->lkb_flags & GDLM_LKFLG_VALBLK) && | |
12236 | + (lkb->lkb_grmode > DLM_LOCK_NL)) | |
12237 | + { | |
12238 | + if (!rsb->res_lvbptr) | |
12239 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
12240 | + | |
12241 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
12242 | + return; | |
12243 | + } | |
12244 | + } | |
12245 | +} | |
12246 | + | |
12247 | +/* | |
12248 | + * Propogate the new master nodeid to locks, subrsbs, sublocks. | |
12249 | + * The NEW_MASTER flag tells rebuild_rsbs_send() which rsb's to consider. | |
12250 | + */ | |
12251 | + | |
10d56c87 | 12252 | +static void set_new_master(struct dlm_rsb *rsb) |
4bf12011 | 12253 | +{ |
10d56c87 | 12254 | + struct dlm_rsb *subrsb; |
4bf12011 | 12255 | + |
12256 | + down_write(&rsb->res_lock); | |
12257 | + | |
12258 | + if (rsb->res_nodeid == our_nodeid()) { | |
12259 | + rsb->res_nodeid = 0; | |
12260 | + set_rsb_lvb(rsb); | |
12261 | + } | |
12262 | + | |
12263 | + set_master_lkbs(rsb); | |
12264 | + | |
12265 | + list_for_each_entry(subrsb, &rsb->res_subreslist, res_subreslist) { | |
12266 | + subrsb->res_nodeid = rsb->res_nodeid; | |
12267 | + set_master_lkbs(subrsb); | |
12268 | + } | |
12269 | + | |
12270 | + up_write(&rsb->res_lock); | |
12271 | + | |
12272 | + set_bit(RESFL_NEW_MASTER, &rsb->res_flags); | |
12273 | +} | |
12274 | + | |
12275 | +/* | |
12276 | + * The recover_list contains all the rsb's for which we've requested the new | |
12277 | + * master nodeid. As replies are returned from the resource directories the | |
12278 | + * rsb's are removed from the list. When the list is empty we're done. | |
12279 | + * | |
12280 | + * The recover_list is later similarly used for all rsb's for which we've sent | |
12281 | + * new lkb's and need to receive new corresponding lkid's. | |
12282 | + */ | |
12283 | + | |
10d56c87 | 12284 | +int recover_list_empty(struct dlm_ls *ls) |
4bf12011 | 12285 | +{ |
12286 | + int empty; | |
12287 | + | |
12288 | + spin_lock(&ls->ls_recover_list_lock); | |
12289 | + empty = list_empty(&ls->ls_recover_list); | |
12290 | + spin_unlock(&ls->ls_recover_list_lock); | |
12291 | + | |
12292 | + return empty; | |
12293 | +} | |
12294 | + | |
10d56c87 | 12295 | +int recover_list_count(struct dlm_ls *ls) |
4bf12011 | 12296 | +{ |
12297 | + int count; | |
12298 | + | |
12299 | + spin_lock(&ls->ls_recover_list_lock); | |
12300 | + count = ls->ls_recover_list_count; | |
12301 | + spin_unlock(&ls->ls_recover_list_lock); | |
12302 | + | |
12303 | + return count; | |
12304 | +} | |
12305 | + | |
10d56c87 | 12306 | +void recover_list_add(struct dlm_rsb *rsb) |
4bf12011 | 12307 | +{ |
10d56c87 | 12308 | + struct dlm_ls *ls = rsb->res_ls; |
4bf12011 | 12309 | + |
12310 | + spin_lock(&ls->ls_recover_list_lock); | |
12311 | + if (!test_and_set_bit(RESFL_RECOVER_LIST, &rsb->res_flags)) { | |
12312 | + list_add_tail(&rsb->res_recover_list, &ls->ls_recover_list); | |
12313 | + ls->ls_recover_list_count++; | |
12314 | + hold_rsb(rsb); | |
12315 | + } | |
12316 | + spin_unlock(&ls->ls_recover_list_lock); | |
12317 | +} | |
12318 | + | |
10d56c87 | 12319 | +void recover_list_del(struct dlm_rsb *rsb) |
4bf12011 | 12320 | +{ |
10d56c87 | 12321 | + struct dlm_ls *ls = rsb->res_ls; |
4bf12011 | 12322 | + |
12323 | + spin_lock(&ls->ls_recover_list_lock); | |
12324 | + clear_bit(RESFL_RECOVER_LIST, &rsb->res_flags); | |
12325 | + list_del(&rsb->res_recover_list); | |
12326 | + ls->ls_recover_list_count--; | |
12327 | + spin_unlock(&ls->ls_recover_list_lock); | |
12328 | + | |
12329 | + release_rsb(rsb); | |
12330 | +} | |
12331 | + | |
10d56c87 | 12332 | +static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, int msgid) |
4bf12011 | 12333 | +{ |
10d56c87 | 12334 | + struct dlm_rsb *rsb = NULL; |
4bf12011 | 12335 | + |
12336 | + spin_lock(&ls->ls_recover_list_lock); | |
12337 | + | |
12338 | + list_for_each_entry(rsb, &ls->ls_recover_list, res_recover_list) { | |
12339 | + if (rsb->res_recover_msgid == msgid) | |
12340 | + goto rec_found; | |
12341 | + } | |
12342 | + rsb = NULL; | |
12343 | + | |
12344 | + rec_found: | |
12345 | + spin_unlock(&ls->ls_recover_list_lock); | |
12346 | + return rsb; | |
12347 | +} | |
12348 | + | |
12349 | +#if 0 | |
10d56c87 | 12350 | +static void recover_list_clear(struct dlm_ls *ls) |
4bf12011 | 12351 | +{ |
10d56c87 | 12352 | + struct dlm_rsb *rsb; |
4bf12011 | 12353 | + |
12354 | + | |
12355 | + spin_lock(&ls->ls_recover_list_lock); | |
12356 | + | |
12357 | + while (!list_empty(&ls->ls_recover_list)) { | |
10d56c87 | 12358 | + rsb = list_entry(ls->ls_recover_list.next, struct dlm_rsb, |
4bf12011 | 12359 | + res_recover_list); |
12360 | + list_del(&rsb->res_recover_list); | |
12361 | + ls->ls_recover_list_count--; | |
12362 | + } | |
12363 | + spin_unlock(&ls->ls_recover_list_lock); | |
12364 | + | |
12365 | +} | |
12366 | +#endif | |
12367 | + | |
10d56c87 | 12368 | +static int rsb_master_lookup(struct dlm_rsb *rsb, struct dlm_rcom *rc) |
4bf12011 | 12369 | +{ |
10d56c87 AM |
12370 | + struct dlm_ls *ls = rsb->res_ls; |
12371 | + uint32_t dir_nodeid, r_nodeid; | |
4bf12011 | 12372 | + int error; |
12373 | + | |
12374 | + dir_nodeid = get_directory_nodeid(rsb); | |
12375 | + | |
12376 | + if (dir_nodeid == our_nodeid()) { | |
10d56c87 AM |
12377 | + error = dlm_dir_lookup_recovery(ls, dir_nodeid, rsb->res_name, |
12378 | + rsb->res_length, &r_nodeid); | |
4bf12011 | 12379 | + if (error) |
12380 | + goto fail; | |
12381 | + | |
10d56c87 | 12382 | + rsb->res_nodeid = r_nodeid; |
4bf12011 | 12383 | + set_new_master(rsb); |
12384 | + } else { | |
12385 | + /* As we are the only thread doing recovery this | |
12386 | + should be safe. if not then we need to use a different | |
12387 | + ID somehow. We must set it in the RSB before rcom_send_msg | |
12388 | + completes cos we may get a reply quite quickly. | |
12389 | + */ | |
12390 | + rsb->res_recover_msgid = ls->ls_rcom_msgid + 1; | |
12391 | + | |
12392 | + recover_list_add(rsb); | |
12393 | + | |
12394 | + memcpy(rc->rc_buf, rsb->res_name, rsb->res_length); | |
12395 | + rc->rc_datalen = rsb->res_length; | |
12396 | + | |
12397 | + error = rcom_send_message(ls, dir_nodeid, RECCOMM_GETMASTER, | |
12398 | + rc, 0); | |
12399 | + if (error) | |
12400 | + goto fail; | |
12401 | + } | |
12402 | + | |
12403 | + fail: | |
12404 | + return error; | |
12405 | +} | |
12406 | + | |
12407 | +/* | |
12408 | + * Go through local root resources and for each rsb which has a master which | |
12409 | + * has departed, get the new master nodeid from the resdir. The resdir will | |
12410 | + * assign mastery to the first node to look up the new master. That means | |
12411 | + * we'll discover in this lookup if we're the new master of any rsb's. | |
12412 | + * | |
12413 | + * We fire off all the resdir requests individually and asynchronously to the | |
12414 | + * correct resdir node. The replies are processed in rsb_master_recv(). | |
12415 | + */ | |
12416 | + | |
10d56c87 | 12417 | +int restbl_rsb_update(struct dlm_ls *ls) |
4bf12011 | 12418 | +{ |
10d56c87 AM |
12419 | + struct dlm_rsb *rsb, *safe; |
12420 | + struct dlm_rcom *rc; | |
4bf12011 | 12421 | + int error = -ENOMEM; |
12422 | + int count = 0; | |
12423 | + | |
12424 | + log_all(ls, "update remastered resources"); | |
12425 | + | |
12426 | + rc = allocate_rcom_buffer(ls); | |
12427 | + if (!rc) | |
12428 | + goto out; | |
12429 | + | |
12430 | + list_for_each_entry_safe(rsb, safe, &ls->ls_rootres, res_rootlist) { | |
12431 | + if (!rsb->res_nodeid) | |
12432 | + continue; | |
12433 | + | |
10d56c87 | 12434 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 12435 | + if (error) |
12436 | + goto out_free; | |
12437 | + | |
12438 | + if (in_nodes_gone(ls, rsb->res_nodeid)) { | |
12439 | + error = rsb_master_lookup(rsb, rc); | |
12440 | + if (error) | |
12441 | + goto out_free; | |
12442 | + count++; | |
12443 | + } | |
12444 | + } | |
12445 | + | |
10d56c87 | 12446 | + error = dlm_wait_function(ls, &recover_list_empty); |
4bf12011 | 12447 | + |
12448 | + log_all(ls, "updated %d resources", count); | |
12449 | + | |
12450 | + out_free: | |
12451 | + free_rcom_buffer(rc); | |
12452 | + | |
12453 | + out: | |
12454 | + return error; | |
12455 | +} | |
12456 | + | |
10d56c87 AM |
12457 | +int restbl_rsb_update_recv(struct dlm_ls *ls, uint32_t nodeid, char *buf, |
12458 | + int length, int msgid) | |
4bf12011 | 12459 | +{ |
10d56c87 | 12460 | + struct dlm_rsb *rsb; |
4bf12011 | 12461 | + uint32_t be_nodeid; |
12462 | + | |
12463 | + rsb = recover_list_find(ls, msgid); | |
12464 | + if (!rsb) { | |
12465 | + log_error(ls, "restbl_rsb_update_recv rsb not found %d", msgid); | |
12466 | + goto out; | |
12467 | + } | |
12468 | + | |
12469 | + memcpy(&be_nodeid, buf, sizeof(uint32_t)); | |
12470 | + rsb->res_nodeid = be32_to_cpu(be_nodeid); | |
12471 | + set_new_master(rsb); | |
12472 | + recover_list_del(rsb); | |
12473 | + | |
12474 | + if (recover_list_empty(ls)) | |
12475 | + wake_up(&ls->ls_wait_general); | |
12476 | + | |
12477 | + out: | |
12478 | + return 0; | |
12479 | +} | |
12480 | + | |
12481 | +/* | |
12482 | + * This function not used any longer. | |
12483 | + */ | |
12484 | + | |
10d56c87 | 12485 | +int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen, |
4bf12011 | 12486 | + char *outbuf) |
12487 | +{ | |
12488 | + char *inbufptr, *outbufptr; | |
12489 | + | |
12490 | + /* | |
12491 | + * The other node wants nodeids matching the resource names in inbuf. | |
12492 | + * The resource names are packed into inbuf as | |
12493 | + * [len1][name1][len2][name2]... where lenX is 1 byte and nameX is | |
12494 | + * lenX bytes. Matching nodeids are packed into outbuf in order | |
12495 | + * [nodeid1][nodeid2]... | |
12496 | + */ | |
12497 | + | |
12498 | + inbufptr = inbuf; | |
12499 | + outbufptr = outbuf; | |
12500 | + | |
12501 | + while (inbufptr < inbuf + inlen) { | |
10d56c87 | 12502 | + uint32_t r_nodeid, be_nodeid; |
4bf12011 | 12503 | + int status; |
12504 | + | |
10d56c87 AM |
12505 | + status = dlm_dir_lookup_recovery(ls, nodeid, inbufptr + 1, |
12506 | + *inbufptr, &r_nodeid); | |
4bf12011 | 12507 | + if (status != 0) |
12508 | + goto fail; | |
12509 | + | |
12510 | + inbufptr += *inbufptr + 1; | |
12511 | + | |
10d56c87 | 12512 | + be_nodeid = cpu_to_be32(r_nodeid); |
4bf12011 | 12513 | + memcpy(outbufptr, &be_nodeid, sizeof(uint32_t)); |
12514 | + outbufptr += sizeof(uint32_t); | |
12515 | + | |
12516 | + /* add assertion that outbufptr - outbuf is not > than ... */ | |
12517 | + } | |
12518 | + | |
12519 | + return (outbufptr - outbuf); | |
12520 | + | |
12521 | + fail: | |
12522 | + return -1; | |
12523 | +} | |
12524 | diff -urN linux-orig/cluster/dlm/recover.h linux-patched/cluster/dlm/recover.h | |
12525 | --- linux-orig/cluster/dlm/recover.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
12526 | +++ linux-patched/cluster/dlm/recover.h 2004-07-13 18:57:22.000000000 +0800 |
12527 | @@ -0,0 +1,33 @@ | |
4bf12011 | 12528 | +/****************************************************************************** |
12529 | +******************************************************************************* | |
12530 | +** | |
12531 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12532 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12533 | +** | |
12534 | +** This copyrighted material is made available to anyone wishing to use, | |
12535 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12536 | +** of the GNU General Public License v.2. | |
12537 | +** | |
12538 | +******************************************************************************* | |
12539 | +******************************************************************************/ | |
12540 | + | |
12541 | +#ifndef __RECOVER_DOT_H__ | |
12542 | +#define __RECOVER_DOT_H__ | |
12543 | + | |
10d56c87 AM |
12544 | +int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls * ls)); |
12545 | +int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status); | |
12546 | +int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status); | |
12547 | +int dlm_recovery_stopped(struct dlm_ls *ls); | |
12548 | +int recover_list_empty(struct dlm_ls *ls); | |
12549 | +int recover_list_count(struct dlm_ls *ls); | |
12550 | +void recover_list_add(struct dlm_rsb *rsb); | |
12551 | +void recover_list_del(struct dlm_rsb *rsb); | |
12552 | +int restbl_lkb_purge(struct dlm_ls *ls); | |
12553 | +void restbl_grant_after_purge(struct dlm_ls *ls); | |
12554 | +int restbl_rsb_update(struct dlm_ls *ls); | |
12555 | +int restbl_rsb_update_recv(struct dlm_ls *ls, int nodeid, char *buf, int len, | |
4bf12011 | 12556 | + int msgid); |
10d56c87 | 12557 | +int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen, |
4bf12011 | 12558 | + char *outbuf); |
12559 | + | |
12560 | +#endif /* __RECOVER_DOT_H__ */ | |
12561 | diff -urN linux-orig/cluster/dlm/recoverd.c linux-patched/cluster/dlm/recoverd.c | |
12562 | --- linux-orig/cluster/dlm/recoverd.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
12563 | +++ linux-patched/cluster/dlm/recoverd.c 2004-07-13 18:57:22.000000000 +0800 |
12564 | @@ -0,0 +1,693 @@ | |
4bf12011 | 12565 | +/****************************************************************************** |
12566 | +******************************************************************************* | |
12567 | +** | |
12568 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12569 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12570 | +** | |
12571 | +** This copyrighted material is made available to anyone wishing to use, | |
12572 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12573 | +** of the GNU General Public License v.2. | |
12574 | +** | |
12575 | +******************************************************************************* | |
12576 | +******************************************************************************/ | |
12577 | + | |
12578 | +#include "dlm_internal.h" | |
12579 | +#include "nodes.h" | |
12580 | +#include "dir.h" | |
12581 | +#include "ast.h" | |
12582 | +#include "recover.h" | |
12583 | +#include "lockspace.h" | |
12584 | +#include "lowcomms.h" | |
12585 | +#include "lockqueue.h" | |
12586 | +#include "lkb.h" | |
12587 | +#include "rebuild.h" | |
12588 | + | |
12589 | +/* | |
12590 | + * next_move actions | |
12591 | + */ | |
12592 | + | |
12593 | +#define DO_STOP (1) | |
12594 | +#define DO_START (2) | |
12595 | +#define DO_FINISH (3) | |
12596 | +#define DO_FINISH_STOP (4) | |
12597 | +#define DO_FINISH_START (5) | |
12598 | + | |
12599 | +/* | |
12600 | + * recoverd_flags for thread | |
12601 | + */ | |
12602 | + | |
12603 | +#define THREAD_STOP (0) | |
12604 | + | |
12605 | +/* | |
12606 | + * local thread variables | |
12607 | + */ | |
12608 | + | |
12609 | +static unsigned long recoverd_flags; | |
12610 | +static struct completion recoverd_run; | |
12611 | +static wait_queue_head_t recoverd_wait; | |
12612 | +static struct task_struct *recoverd_task; | |
12613 | + | |
12614 | +/* | |
10d56c87 | 12615 | + * Queue of lockspaces (dlm_recover structs) which need to be |
4bf12011 | 12616 | + * started/recovered |
12617 | + */ | |
12618 | + | |
12619 | +static struct list_head recoverd_start_queue; | |
12620 | +static atomic_t recoverd_start_count; | |
12621 | + | |
12622 | +extern struct list_head lslist; | |
12623 | +extern spinlock_t lslist_lock; | |
12624 | + | |
12625 | +void dlm_recoverd_init(void) | |
12626 | +{ | |
12627 | + INIT_LIST_HEAD(&recoverd_start_queue); | |
12628 | + atomic_set(&recoverd_start_count, 0); | |
12629 | + | |
12630 | + init_completion(&recoverd_run); | |
12631 | + init_waitqueue_head(&recoverd_wait); | |
12632 | + memset(&recoverd_flags, 0, sizeof(unsigned long)); | |
12633 | +} | |
12634 | + | |
10d56c87 | 12635 | +static int enable_locking(struct dlm_ls *ls, int event_id) |
4bf12011 | 12636 | +{ |
12637 | + int error = 0; | |
12638 | + | |
12639 | + spin_lock(&ls->ls_recover_lock); | |
12640 | + if (ls->ls_last_stop < event_id) { | |
12641 | + set_bit(LSFL_LS_RUN, &ls->ls_flags); | |
12642 | + up_write(&ls->ls_in_recovery); | |
12643 | + } else { | |
12644 | + error = -EINTR; | |
12645 | + log_debug(ls, "enable_locking: abort %d", event_id); | |
12646 | + } | |
12647 | + spin_unlock(&ls->ls_recover_lock); | |
12648 | + return error; | |
12649 | +} | |
12650 | + | |
10d56c87 | 12651 | +static int ls_first_start(struct dlm_ls *ls, struct dlm_recover *rv) |
4bf12011 | 12652 | +{ |
12653 | + int error; | |
12654 | + | |
10d56c87 | 12655 | + log_all(ls, "recover event %u (first)", rv->event_id); |
4bf12011 | 12656 | + |
12657 | + kcl_global_service_id(ls->ls_local_id, &ls->ls_global_id); | |
12658 | + | |
10d56c87 | 12659 | + error = ls_nodes_init(ls, rv); |
4bf12011 | 12660 | + if (error) { |
12661 | + log_error(ls, "nodes_init failed %d", error); | |
12662 | + goto out; | |
12663 | + } | |
12664 | + | |
10d56c87 | 12665 | + error = dlm_dir_rebuild_local(ls); |
4bf12011 | 12666 | + if (error) { |
10d56c87 | 12667 | + log_error(ls, "dlm_dir_rebuild_local failed %d", error); |
4bf12011 | 12668 | + goto out; |
12669 | + } | |
12670 | + | |
10d56c87 | 12671 | + error = dlm_dir_rebuild_wait(ls); |
4bf12011 | 12672 | + if (error) { |
10d56c87 | 12673 | + log_error(ls, "dlm_dir_rebuild_wait failed %d", error); |
4bf12011 | 12674 | + goto out; |
12675 | + } | |
12676 | + | |
10d56c87 AM |
12677 | + log_all(ls, "recover event %u done", rv->event_id); |
12678 | + kcl_start_done(ls->ls_local_id, rv->event_id); | |
4bf12011 | 12679 | + |
12680 | + out: | |
12681 | + return error; | |
12682 | +} | |
12683 | + | |
12684 | +/* | |
12685 | + * We are given here a new group of nodes which are in the lockspace. We first | |
12686 | + * figure out the differences in ls membership from when we were last running. | |
12687 | + * If nodes from before are gone, then there will be some lock recovery to do. | |
12688 | + * If there are only nodes which have joined, then there's no lock recovery. | |
12689 | + * | |
12690 | + * note: cman requires an rc to finish starting on an revent (where nodes die) | |
12691 | + * before it allows an sevent (where nodes join) to be processed. This means | |
12692 | + * that we won't get start1 with nodeA gone, stop/cancel, start2 with nodeA | |
12693 | + * joined. | |
12694 | + */ | |
12695 | + | |
10d56c87 | 12696 | +static int ls_reconfig(struct dlm_ls *ls, struct dlm_recover *rv) |
4bf12011 | 12697 | +{ |
12698 | + int error, neg = 0; | |
12699 | + | |
10d56c87 | 12700 | + log_all(ls, "recover event %u", rv->event_id); |
4bf12011 | 12701 | + |
12702 | + /* | |
12703 | + * Add or remove nodes from the lockspace's ls_nodes list. | |
12704 | + */ | |
12705 | + | |
10d56c87 | 12706 | + error = ls_nodes_reconfig(ls, rv, &neg); |
4bf12011 | 12707 | + if (error) { |
12708 | + log_error(ls, "nodes_reconfig failed %d", error); | |
12709 | + goto fail; | |
12710 | + } | |
12711 | + | |
12712 | + /* | |
12713 | + * Rebuild our own share of the resdir by collecting from all other | |
12714 | + * nodes rsb name/master pairs for which the name hashes to us. | |
12715 | + */ | |
12716 | + | |
10d56c87 | 12717 | + error = dlm_dir_rebuild_local(ls); |
4bf12011 | 12718 | + if (error) { |
10d56c87 | 12719 | + log_error(ls, "dlm_dir_rebuild_local failed %d", error); |
4bf12011 | 12720 | + goto fail; |
12721 | + } | |
12722 | + | |
12723 | + /* | |
12724 | + * Purge resdir-related requests that are being held in requestqueue. | |
12725 | + * All resdir requests from before recovery started are invalid now due | |
12726 | + * to the resdir rebuild and will be resent by the requesting nodes. | |
12727 | + */ | |
12728 | + | |
12729 | + purge_requestqueue(ls); | |
12730 | + set_bit(LSFL_REQUEST_WARN, &ls->ls_flags); | |
12731 | + | |
12732 | + /* | |
12733 | + * Wait for all nodes to complete resdir rebuild. | |
12734 | + */ | |
12735 | + | |
10d56c87 | 12736 | + error = dlm_dir_rebuild_wait(ls); |
4bf12011 | 12737 | + if (error) { |
10d56c87 | 12738 | + log_error(ls, "dlm_dir_rebuild_wait failed %d", error); |
4bf12011 | 12739 | + goto fail; |
12740 | + } | |
12741 | + | |
12742 | + /* | |
12743 | + * Mark our own lkb's waiting in the lockqueue for remote replies from | |
12744 | + * nodes that are now departed. These will be resent to the new | |
12745 | + * masters in resend_cluster_requests. Also mark resdir lookup | |
12746 | + * requests for resending. | |
12747 | + */ | |
12748 | + | |
12749 | + lockqueue_lkb_mark(ls); | |
12750 | + | |
10d56c87 | 12751 | + error = dlm_recovery_stopped(ls); |
4bf12011 | 12752 | + if (error) |
12753 | + goto fail; | |
12754 | + | |
12755 | + if (neg) { | |
12756 | + /* | |
12757 | + * Clear lkb's for departed nodes. This can't fail since it | |
12758 | + * doesn't involve communicating with other nodes. | |
12759 | + */ | |
12760 | + | |
12761 | + down_write(&ls->ls_rec_rsblist); | |
12762 | + restbl_lkb_purge(ls); | |
12763 | + up_write(&ls->ls_rec_rsblist); | |
12764 | + | |
12765 | + down_read(&ls->ls_rec_rsblist); | |
12766 | + | |
12767 | + /* | |
12768 | + * Get new master id's for rsb's of departed nodes. This fails | |
12769 | + * if we can't communicate with other nodes. | |
12770 | + */ | |
12771 | + | |
12772 | + error = restbl_rsb_update(ls); | |
12773 | + if (error) { | |
12774 | + log_error(ls, "restbl_rsb_update failed %d", error); | |
12775 | + goto fail_up; | |
12776 | + } | |
12777 | + | |
12778 | + /* | |
12779 | + * Send our lkb info to new masters. This fails if we can't | |
12780 | + * communicate with a node. | |
12781 | + */ | |
12782 | + | |
12783 | + error = rebuild_rsbs_send(ls); | |
12784 | + if (error) { | |
12785 | + log_error(ls, "rebuild_rsbs_send failed %d", error); | |
12786 | + goto fail_up; | |
12787 | + } | |
12788 | + up_read(&ls->ls_rec_rsblist); | |
12789 | + } | |
12790 | + | |
12791 | + clear_bit(LSFL_REQUEST_WARN, &ls->ls_flags); | |
12792 | + | |
10d56c87 AM |
12793 | + log_all(ls, "recover event %u done", rv->event_id); |
12794 | + kcl_start_done(ls->ls_local_id, rv->event_id); | |
4bf12011 | 12795 | + return 0; |
12796 | + | |
12797 | + fail_up: | |
12798 | + up_read(&ls->ls_rec_rsblist); | |
12799 | + fail: | |
10d56c87 | 12800 | + log_all(ls, "recover event %d error %d", rv->event_id, error); |
4bf12011 | 12801 | + return error; |
12802 | +} | |
12803 | + | |
10d56c87 | 12804 | +static void clear_finished_nodes(struct dlm_ls *ls, int finish_event) |
4bf12011 | 12805 | +{ |
10d56c87 | 12806 | + struct dlm_csb *csb, *safe; |
4bf12011 | 12807 | + |
10d56c87 AM |
12808 | + list_for_each_entry_safe(csb, safe, &ls->ls_nodes_gone, list) { |
12809 | + if (csb->gone_event <= finish_event) { | |
12810 | + list_del(&csb->list); | |
4bf12011 | 12811 | + release_csb(csb); |
12812 | + } | |
12813 | + } | |
12814 | +} | |
12815 | + | |
12816 | +/* | |
12817 | + * Between calls to this routine for a ls, there can be multiple stop/start | |
12818 | + * events from cman where every start but the latest is cancelled by stops. | |
12819 | + * There can only be a single finish from cman because every finish requires us | |
12820 | + * to call start_done. A single finish event could be followed by multiple | |
12821 | + * stop/start events. This routine takes any combination of events from cman | |
12822 | + * and boils them down to one course of action. | |
12823 | + */ | |
12824 | + | |
10d56c87 AM |
12825 | +static int next_move(struct dlm_ls *ls, struct dlm_recover **rv_out, |
12826 | + int *finish_out) | |
4bf12011 | 12827 | +{ |
12828 | + LIST_HEAD(events); | |
12829 | + unsigned int cmd = 0, stop, start, finish; | |
12830 | + unsigned int last_stop, last_start, last_finish; | |
10d56c87 | 12831 | + struct dlm_recover *rv = NULL, *start_rv = NULL; |
4bf12011 | 12832 | + |
12833 | + /* | |
12834 | + * Grab the current state of cman/sm events. | |
12835 | + */ | |
12836 | + | |
12837 | + spin_lock(&ls->ls_recover_lock); | |
12838 | + | |
12839 | + stop = test_and_clear_bit(LSFL_LS_STOP, &ls->ls_flags) ? 1 : 0; | |
12840 | + start = test_and_clear_bit(LSFL_LS_START, &ls->ls_flags) ? 1 : 0; | |
12841 | + finish = test_and_clear_bit(LSFL_LS_FINISH, &ls->ls_flags) ? 1 : 0; | |
12842 | + | |
12843 | + last_stop = ls->ls_last_stop; | |
12844 | + last_start = ls->ls_last_start; | |
12845 | + last_finish = ls->ls_last_finish; | |
12846 | + | |
12847 | + while (!list_empty(&ls->ls_recover)) { | |
10d56c87 AM |
12848 | + rv = list_entry(ls->ls_recover.next, struct dlm_recover, list); |
12849 | + list_del(&rv->list); | |
12850 | + list_add_tail(&rv->list, &events); | |
4bf12011 | 12851 | + } |
12852 | + spin_unlock(&ls->ls_recover_lock); | |
12853 | + | |
12854 | + log_debug(ls, "move flags %u,%u,%u ids %u,%u,%u", stop, start, finish, | |
12855 | + last_stop, last_start, last_finish); | |
12856 | + | |
12857 | + /* | |
12858 | + * Toss start events which have since been cancelled. | |
12859 | + */ | |
12860 | + | |
12861 | + while (!list_empty(&events)) { | |
10d56c87 AM |
12862 | + DLM_ASSERT(start,); |
12863 | + rv = list_entry(events.next, struct dlm_recover, list); | |
12864 | + list_del(&rv->list); | |
12865 | + | |
12866 | + if (rv->event_id <= last_stop) { | |
12867 | + log_debug(ls, "move skip event %u", rv->event_id); | |
12868 | + kfree(rv->nodeids); | |
12869 | + kfree(rv); | |
12870 | + rv = NULL; | |
4bf12011 | 12871 | + } else { |
10d56c87 AM |
12872 | + log_debug(ls, "move use event %u", rv->event_id); |
12873 | + DLM_ASSERT(!start_rv,); | |
12874 | + start_rv = rv; | |
4bf12011 | 12875 | + } |
12876 | + } | |
12877 | + | |
12878 | + /* | |
12879 | + * Eight possible combinations of events. | |
12880 | + */ | |
12881 | + | |
12882 | + /* 0 */ | |
12883 | + if (!stop && !start && !finish) { | |
10d56c87 | 12884 | + DLM_ASSERT(!start_rv,); |
4bf12011 | 12885 | + cmd = 0; |
12886 | + goto out; | |
12887 | + } | |
12888 | + | |
12889 | + /* 1 */ | |
12890 | + if (!stop && !start && finish) { | |
10d56c87 AM |
12891 | + DLM_ASSERT(!start_rv,); |
12892 | + DLM_ASSERT(last_start > last_stop,); | |
12893 | + DLM_ASSERT(last_finish == last_start,); | |
4bf12011 | 12894 | + cmd = DO_FINISH; |
12895 | + *finish_out = last_finish; | |
12896 | + goto out; | |
12897 | + } | |
12898 | + | |
12899 | + /* 2 */ | |
12900 | + if (!stop && start && !finish) { | |
10d56c87 AM |
12901 | + DLM_ASSERT(start_rv,); |
12902 | + DLM_ASSERT(last_start > last_stop,); | |
4bf12011 | 12903 | + cmd = DO_START; |
10d56c87 | 12904 | + *rv_out = start_rv; |
4bf12011 | 12905 | + goto out; |
12906 | + } | |
12907 | + | |
12908 | + /* 3 */ | |
12909 | + if (!stop && start && finish) { | |
10d56c87 | 12910 | + DLM_ASSERT(0, printk("finish and start with no stop\n");); |
4bf12011 | 12911 | + } |
12912 | + | |
12913 | + /* 4 */ | |
12914 | + if (stop && !start && !finish) { | |
10d56c87 AM |
12915 | + DLM_ASSERT(!start_rv,); |
12916 | + DLM_ASSERT(last_start == last_stop,); | |
4bf12011 | 12917 | + cmd = DO_STOP; |
12918 | + goto out; | |
12919 | + } | |
12920 | + | |
12921 | + /* 5 */ | |
12922 | + if (stop && !start && finish) { | |
10d56c87 AM |
12923 | + DLM_ASSERT(!start_rv,); |
12924 | + DLM_ASSERT(last_finish == last_start,); | |
12925 | + DLM_ASSERT(last_stop == last_start,); | |
4bf12011 | 12926 | + cmd = DO_FINISH_STOP; |
12927 | + *finish_out = last_finish; | |
12928 | + goto out; | |
12929 | + } | |
12930 | + | |
12931 | + /* 6 */ | |
12932 | + if (stop && start && !finish) { | |
10d56c87 AM |
12933 | + if (start_rv) { |
12934 | + DLM_ASSERT(last_start > last_stop,); | |
4bf12011 | 12935 | + cmd = DO_START; |
10d56c87 | 12936 | + *rv_out = start_rv; |
4bf12011 | 12937 | + } else { |
10d56c87 | 12938 | + DLM_ASSERT(last_stop == last_start,); |
4bf12011 | 12939 | + cmd = DO_STOP; |
12940 | + } | |
12941 | + goto out; | |
12942 | + } | |
12943 | + | |
12944 | + /* 7 */ | |
12945 | + if (stop && start && finish) { | |
10d56c87 AM |
12946 | + if (start_rv) { |
12947 | + DLM_ASSERT(last_start > last_stop,); | |
12948 | + DLM_ASSERT(last_start > last_finish,); | |
4bf12011 | 12949 | + cmd = DO_FINISH_START; |
12950 | + *finish_out = last_finish; | |
10d56c87 | 12951 | + *rv_out = start_rv; |
4bf12011 | 12952 | + } else { |
10d56c87 AM |
12953 | + DLM_ASSERT(last_start == last_stop,); |
12954 | + DLM_ASSERT(last_start > last_finish,); | |
4bf12011 | 12955 | + cmd = DO_FINISH_STOP; |
12956 | + *finish_out = last_finish; | |
12957 | + } | |
12958 | + goto out; | |
12959 | + } | |
12960 | + | |
12961 | + out: | |
12962 | + return cmd; | |
12963 | +} | |
12964 | + | |
12965 | +/* | |
12966 | + * This function decides what to do given every combination of current | |
12967 | + * lockspace state and next lockspace state. | |
12968 | + */ | |
12969 | + | |
10d56c87 | 12970 | +static void do_ls_recovery(struct dlm_ls *ls) |
4bf12011 | 12971 | +{ |
10d56c87 | 12972 | + struct dlm_recover *rv = NULL; |
4bf12011 | 12973 | + int error, cur_state, next_state = 0, do_now, finish_event = 0; |
12974 | + | |
10d56c87 | 12975 | + do_now = next_move(ls, &rv, &finish_event); |
4bf12011 | 12976 | + if (!do_now) |
12977 | + goto out; | |
12978 | + | |
12979 | + cur_state = ls->ls_state; | |
12980 | + next_state = 0; | |
12981 | + | |
10d56c87 | 12982 | + DLM_ASSERT(!test_bit(LSFL_LS_RUN, &ls->ls_flags), |
4bf12011 | 12983 | + log_error(ls, "curstate=%d donow=%d", cur_state, do_now);); |
12984 | + | |
12985 | + /* | |
12986 | + * LSST_CLEAR - we're not in any recovery state. We can get a stop or | |
12987 | + * a stop and start which equates with a START. | |
12988 | + */ | |
12989 | + | |
12990 | + if (cur_state == LSST_CLEAR) { | |
12991 | + switch (do_now) { | |
12992 | + case DO_STOP: | |
12993 | + next_state = LSST_WAIT_START; | |
12994 | + break; | |
12995 | + | |
12996 | + case DO_START: | |
10d56c87 | 12997 | + error = ls_reconfig(ls, rv); |
4bf12011 | 12998 | + if (error) |
12999 | + next_state = LSST_WAIT_START; | |
13000 | + else | |
13001 | + next_state = LSST_RECONFIG_DONE; | |
13002 | + break; | |
13003 | + | |
13004 | + case DO_FINISH: /* invalid */ | |
13005 | + case DO_FINISH_STOP: /* invalid */ | |
13006 | + case DO_FINISH_START: /* invalid */ | |
13007 | + default: | |
10d56c87 | 13008 | + DLM_ASSERT(0,); |
4bf12011 | 13009 | + } |
13010 | + goto out; | |
13011 | + } | |
13012 | + | |
13013 | + /* | |
13014 | + * LSST_WAIT_START - we're not running because of getting a stop or | |
13015 | + * failing a start. We wait in this state for another stop/start or | |
13016 | + * just the next start to begin another reconfig attempt. | |
13017 | + */ | |
13018 | + | |
13019 | + if (cur_state == LSST_WAIT_START) { | |
13020 | + switch (do_now) { | |
13021 | + case DO_STOP: | |
13022 | + break; | |
13023 | + | |
13024 | + case DO_START: | |
10d56c87 | 13025 | + error = ls_reconfig(ls, rv); |
4bf12011 | 13026 | + if (error) |
13027 | + next_state = LSST_WAIT_START; | |
13028 | + else | |
13029 | + next_state = LSST_RECONFIG_DONE; | |
13030 | + break; | |
13031 | + | |
13032 | + case DO_FINISH: /* invalid */ | |
13033 | + case DO_FINISH_STOP: /* invalid */ | |
13034 | + case DO_FINISH_START: /* invalid */ | |
13035 | + default: | |
10d56c87 | 13036 | + DLM_ASSERT(0,); |
4bf12011 | 13037 | + } |
13038 | + goto out; | |
13039 | + } | |
13040 | + | |
13041 | + /* | |
13042 | + * LSST_RECONFIG_DONE - we entered this state after successfully | |
13043 | + * completing ls_reconfig and calling kcl_start_done. We expect to get | |
13044 | + * a finish if everything goes ok. A finish could be followed by stop | |
13045 | + * or stop/start before we get here to check it. Or a finish may never | |
13046 | + * happen, only stop or stop/start. | |
13047 | + */ | |
13048 | + | |
13049 | + if (cur_state == LSST_RECONFIG_DONE) { | |
13050 | + switch (do_now) { | |
13051 | + case DO_FINISH: | |
13052 | + clear_finished_nodes(ls, finish_event); | |
13053 | + next_state = LSST_CLEAR; | |
13054 | + | |
13055 | + error = enable_locking(ls, finish_event); | |
13056 | + if (error) | |
13057 | + break; | |
13058 | + | |
13059 | + error = process_requestqueue(ls); | |
13060 | + if (error) | |
13061 | + break; | |
13062 | + | |
13063 | + error = resend_cluster_requests(ls); | |
13064 | + if (error) | |
13065 | + break; | |
13066 | + | |
13067 | + restbl_grant_after_purge(ls); | |
13068 | + | |
13069 | + log_all(ls, "recover event %u finished", finish_event); | |
13070 | + break; | |
13071 | + | |
13072 | + case DO_STOP: | |
13073 | + next_state = LSST_WAIT_START; | |
13074 | + break; | |
13075 | + | |
13076 | + case DO_FINISH_STOP: | |
13077 | + clear_finished_nodes(ls, finish_event); | |
13078 | + next_state = LSST_WAIT_START; | |
13079 | + break; | |
13080 | + | |
13081 | + case DO_FINISH_START: | |
13082 | + clear_finished_nodes(ls, finish_event); | |
13083 | + /* fall into DO_START */ | |
13084 | + | |
13085 | + case DO_START: | |
10d56c87 | 13086 | + error = ls_reconfig(ls, rv); |
4bf12011 | 13087 | + if (error) |
13088 | + next_state = LSST_WAIT_START; | |
13089 | + else | |
13090 | + next_state = LSST_RECONFIG_DONE; | |
13091 | + break; | |
13092 | + | |
13093 | + default: | |
10d56c87 | 13094 | + DLM_ASSERT(0,); |
4bf12011 | 13095 | + } |
13096 | + goto out; | |
13097 | + } | |
13098 | + | |
13099 | + /* | |
13100 | + * LSST_INIT - state after ls is created and before it has been | |
13101 | + * started. A start operation will cause the ls to be started for the | |
13102 | + * first time. A failed start will cause to just wait in INIT for | |
13103 | + * another stop/start. | |
13104 | + */ | |
13105 | + | |
13106 | + if (cur_state == LSST_INIT) { | |
13107 | + switch (do_now) { | |
13108 | + case DO_START: | |
10d56c87 | 13109 | + error = ls_first_start(ls, rv); |
4bf12011 | 13110 | + if (!error) |
13111 | + next_state = LSST_INIT_DONE; | |
13112 | + break; | |
13113 | + | |
13114 | + case DO_STOP: | |
13115 | + break; | |
13116 | + | |
13117 | + case DO_FINISH: /* invalid */ | |
13118 | + case DO_FINISH_STOP: /* invalid */ | |
13119 | + case DO_FINISH_START: /* invalid */ | |
13120 | + default: | |
10d56c87 | 13121 | + DLM_ASSERT(0,); |
4bf12011 | 13122 | + } |
13123 | + goto out; | |
13124 | + } | |
13125 | + | |
13126 | + /* | |
13127 | + * LSST_INIT_DONE - after the first start operation is completed | |
13128 | + * successfully and kcl_start_done() called. If there are no errors, a | |
13129 | + * finish will arrive next and we'll move to LSST_CLEAR. | |
13130 | + */ | |
13131 | + | |
13132 | + if (cur_state == LSST_INIT_DONE) { | |
13133 | + switch (do_now) { | |
13134 | + case DO_STOP: | |
13135 | + case DO_FINISH_STOP: | |
13136 | + next_state = LSST_WAIT_START; | |
13137 | + break; | |
13138 | + | |
13139 | + case DO_START: | |
13140 | + case DO_FINISH_START: | |
10d56c87 | 13141 | + error = ls_reconfig(ls, rv); |
4bf12011 | 13142 | + if (error) |
13143 | + next_state = LSST_WAIT_START; | |
13144 | + else | |
13145 | + next_state = LSST_RECONFIG_DONE; | |
13146 | + break; | |
13147 | + | |
13148 | + case DO_FINISH: | |
13149 | + next_state = LSST_CLEAR; | |
13150 | + enable_locking(ls, finish_event); | |
13151 | + log_all(ls, "recover event %u finished", finish_event); | |
13152 | + break; | |
13153 | + | |
13154 | + default: | |
10d56c87 | 13155 | + DLM_ASSERT(0,); |
4bf12011 | 13156 | + } |
13157 | + goto out; | |
13158 | + } | |
13159 | + | |
13160 | + out: | |
13161 | + if (next_state) | |
13162 | + ls->ls_state = next_state; | |
13163 | + | |
10d56c87 AM |
13164 | + if (rv) { |
13165 | + kfree(rv->nodeids); | |
13166 | + kfree(rv); | |
4bf12011 | 13167 | + } |
13168 | +} | |
13169 | + | |
10d56c87 | 13170 | +static __inline__ struct dlm_ls *get_work(int clear) |
4bf12011 | 13171 | +{ |
10d56c87 | 13172 | + struct dlm_ls *ls; |
4bf12011 | 13173 | + |
13174 | + spin_lock(&lslist_lock); | |
13175 | + | |
13176 | + list_for_each_entry(ls, &lslist, ls_list) { | |
13177 | + if (clear) { | |
13178 | + if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) | |
13179 | + goto got_work; | |
13180 | + | |
13181 | + } else { | |
13182 | + if (test_bit(LSFL_WORK, &ls->ls_flags)) | |
13183 | + goto got_work; | |
13184 | + } | |
13185 | + } | |
13186 | + ls = NULL; | |
13187 | + | |
13188 | + got_work: | |
13189 | + spin_unlock(&lslist_lock); | |
13190 | + | |
13191 | + return ls; | |
13192 | +} | |
13193 | + | |
13194 | +/* | |
13195 | + * Thread which does recovery for all lockspaces. | |
13196 | + */ | |
13197 | + | |
13198 | +static int dlm_recoverd(void *arg) | |
13199 | +{ | |
10d56c87 | 13200 | + struct dlm_ls *ls; |
4bf12011 | 13201 | + |
13202 | + daemonize("dlm_recoverd"); | |
13203 | + recoverd_task = current; | |
13204 | + complete(&recoverd_run); | |
13205 | + | |
13206 | + while (!test_bit(THREAD_STOP, &recoverd_flags)) { | |
13207 | + wchan_cond_sleep_intr(recoverd_wait, !get_work(0)); | |
13208 | + if ((ls = get_work(1))) | |
13209 | + do_ls_recovery(ls); | |
13210 | + } | |
13211 | + | |
13212 | + complete(&recoverd_run); | |
13213 | + return 0; | |
13214 | +} | |
13215 | + | |
13216 | +/* | |
13217 | + * Mark a specific lockspace as needing work and wake up the thread to do it. | |
13218 | + */ | |
13219 | + | |
10d56c87 | 13220 | +void dlm_recoverd_kick(struct dlm_ls *ls) |
4bf12011 | 13221 | +{ |
13222 | + set_bit(LSFL_WORK, &ls->ls_flags); | |
13223 | + wake_up(&recoverd_wait); | |
13224 | +} | |
13225 | + | |
13226 | +/* | |
10d56c87 | 13227 | + * Start the recoverd thread when dlm is started (before any lockspaces). |
4bf12011 | 13228 | + */ |
13229 | + | |
10d56c87 | 13230 | +int dlm_recoverd_start(void) |
4bf12011 | 13231 | +{ |
13232 | + int error; | |
13233 | + | |
13234 | + clear_bit(THREAD_STOP, &recoverd_flags); | |
13235 | + error = kernel_thread(dlm_recoverd, NULL, 0); | |
13236 | + if (error < 0) | |
13237 | + goto out; | |
13238 | + | |
13239 | + error = 0; | |
13240 | + wait_for_completion(&recoverd_run); | |
13241 | + | |
13242 | + out: | |
13243 | + return error; | |
13244 | +} | |
13245 | + | |
13246 | +/* | |
10d56c87 | 13247 | + * Stop the recoverd thread when dlm is shut down (all lockspaces are gone). |
4bf12011 | 13248 | + */ |
13249 | + | |
10d56c87 | 13250 | +int dlm_recoverd_stop(void) |
4bf12011 | 13251 | +{ |
13252 | + set_bit(THREAD_STOP, &recoverd_flags); | |
13253 | + wake_up(&recoverd_wait); | |
13254 | + wait_for_completion(&recoverd_run); | |
13255 | + | |
13256 | + return 0; | |
13257 | +} | |
13258 | diff -urN linux-orig/cluster/dlm/recoverd.h linux-patched/cluster/dlm/recoverd.h | |
13259 | --- linux-orig/cluster/dlm/recoverd.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 13260 | +++ linux-patched/cluster/dlm/recoverd.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 13261 | @@ -0,0 +1,22 @@ |
13262 | +/****************************************************************************** | |
13263 | +******************************************************************************* | |
13264 | +** | |
13265 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13266 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13267 | +** | |
13268 | +** This copyrighted material is made available to anyone wishing to use, | |
13269 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13270 | +** of the GNU General Public License v.2. | |
13271 | +** | |
13272 | +******************************************************************************* | |
13273 | +******************************************************************************/ | |
13274 | + | |
13275 | +#ifndef __RECOVERD_DOT_H__ | |
13276 | +#define __RECOVERD_DOT_H__ | |
13277 | + | |
13278 | +void dlm_recoverd_init(void); | |
10d56c87 AM |
13279 | +void dlm_recoverd_kick(struct dlm_ls *ls); |
13280 | +int dlm_recoverd_start(void); | |
13281 | +int dlm_recoverd_stop(void); | |
4bf12011 | 13282 | + |
13283 | +#endif /* __RECOVERD_DOT_H__ */ | |
13284 | diff -urN linux-orig/cluster/dlm/rsb.c linux-patched/cluster/dlm/rsb.c | |
13285 | --- linux-orig/cluster/dlm/rsb.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
13286 | +++ linux-patched/cluster/dlm/rsb.c 2004-07-13 18:57:22.000000000 +0800 |
13287 | @@ -0,0 +1,319 @@ | |
4bf12011 | 13288 | +/****************************************************************************** |
13289 | +******************************************************************************* | |
13290 | +** | |
13291 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13292 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13293 | +** | |
13294 | +** This copyrighted material is made available to anyone wishing to use, | |
13295 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13296 | +** of the GNU General Public License v.2. | |
13297 | +** | |
13298 | +******************************************************************************* | |
13299 | +******************************************************************************/ | |
13300 | + | |
13301 | +#include "dlm_internal.h" | |
13302 | +#include "locking.h" | |
13303 | +#include "memory.h" | |
13304 | +#include "lockqueue.h" | |
13305 | +#include "nodes.h" | |
13306 | +#include "dir.h" | |
13307 | +#include "util.h" | |
13308 | + | |
10d56c87 AM |
13309 | +static struct dlm_rsb *search_hashchain(struct list_head *head, |
13310 | + struct dlm_rsb *parent, | |
13311 | + char *name, int namelen) | |
4bf12011 | 13312 | +{ |
10d56c87 | 13313 | + struct dlm_rsb *r; |
4bf12011 | 13314 | + |
13315 | + list_for_each_entry(r, head, res_hashchain) { | |
13316 | + if ((parent == r->res_parent) && (namelen == r->res_length) && | |
13317 | + (memcmp(name, r->res_name, namelen) == 0)) { | |
13318 | + atomic_inc(&r->res_ref); | |
13319 | + return r; | |
13320 | + } | |
13321 | + } | |
13322 | + | |
13323 | + return NULL; | |
13324 | +} | |
13325 | + | |
13326 | +/* | |
13327 | + * A way to arbitrarily hold onto an rsb which we already have a reference to | |
13328 | + * to make sure it doesn't go away. Opposite of release_rsb(). | |
13329 | + */ | |
13330 | + | |
10d56c87 | 13331 | +void hold_rsb(struct dlm_rsb *r) |
4bf12011 | 13332 | +{ |
13333 | + atomic_inc(&r->res_ref); | |
13334 | +} | |
13335 | + | |
13336 | +/* | |
13337 | + * release_rsb() - Decrement reference count on rsb struct. Free the rsb | |
13338 | + * struct when there are zero references. Every lkb for the rsb adds a | |
13339 | + * reference. When ref is zero there can be no more lkb's for the rsb, on the | |
13340 | + * queue's or anywhere else. | |
13341 | + */ | |
13342 | + | |
10d56c87 | 13343 | +void release_rsb(struct dlm_rsb *r) |
4bf12011 | 13344 | +{ |
10d56c87 | 13345 | + struct dlm_ls *ls = r->res_ls; |
4bf12011 | 13346 | + int removed = FALSE; |
13347 | + | |
10d56c87 AM |
13348 | + write_lock(&ls->ls_rsbtbl[r->res_bucket].lock); |
13349 | + if (atomic_dec_and_test(&r->res_ref)) { | |
13350 | + DLM_ASSERT(list_empty(&r->res_grantqueue), print_rsb(r);); | |
13351 | + DLM_ASSERT(list_empty(&r->res_waitqueue), print_rsb(r);); | |
13352 | + DLM_ASSERT(list_empty(&r->res_convertqueue), print_rsb(r);); | |
4bf12011 | 13353 | + removed = TRUE; |
13354 | + list_del(&r->res_hashchain); | |
13355 | + } | |
10d56c87 | 13356 | + write_unlock(&ls->ls_rsbtbl[r->res_bucket].lock); |
4bf12011 | 13357 | + |
10d56c87 AM |
13358 | + if (!removed) |
13359 | + return; | |
4bf12011 | 13360 | + |
10d56c87 AM |
13361 | + down_read(&ls->ls_gap_rsblist); |
13362 | + if (r->res_parent) | |
13363 | + list_del(&r->res_subreslist); | |
13364 | + else | |
13365 | + list_del(&r->res_rootlist); | |
13366 | + up_read(&ls->ls_gap_rsblist); | |
4bf12011 | 13367 | + |
10d56c87 AM |
13368 | + if (r->res_parent) |
13369 | + goto out; | |
13370 | + if (r->res_nodeid && r->res_nodeid != -1) | |
13371 | + goto out; | |
13372 | + if (r->res_nodeid == -1 && !test_bit(RESFL_MASTER, &r->res_flags)) | |
13373 | + goto out; | |
4bf12011 | 13374 | + |
10d56c87 AM |
13375 | + if (get_directory_nodeid(r) != our_nodeid()) |
13376 | + remote_remove_resdata(r->res_ls, get_directory_nodeid(r), | |
13377 | + r->res_name, r->res_length); | |
13378 | + else | |
13379 | + remove_resdata(r->res_ls, our_nodeid(), r->res_name, | |
13380 | + r->res_length); | |
13381 | + out: | |
13382 | + if (r->res_lvbptr) | |
13383 | + free_lvb(r->res_lvbptr); | |
13384 | + | |
13385 | + free_rsb(r); | |
13386 | +} | |
13387 | + | |
13388 | +struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |
13389 | +{ | |
13390 | + struct dlm_rsb *r = lkb->lkb_resource; | |
13391 | + | |
13392 | + write_lock(&ls->ls_rsbtbl[r->res_bucket].lock); | |
13393 | + if (!r->res_parent && atomic_read(&r->res_ref) == 1) | |
13394 | + r->res_nodeid = -1; | |
13395 | + write_unlock(&ls->ls_rsbtbl[r->res_bucket].lock); | |
13396 | + | |
13397 | + return r; | |
4bf12011 | 13398 | +} |
13399 | + | |
13400 | +/* | |
13401 | + * find_or_create_rsb() - Get an rsb struct, or create one if it doesn't exist. | |
13402 | + * If the rsb exists, its ref count is incremented by this function. If it | |
13403 | + * doesn't exist, it's created with a ref count of one. | |
13404 | + */ | |
13405 | + | |
10d56c87 AM |
13406 | +int find_or_create_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name, |
13407 | + int namelen, int create, struct dlm_rsb **rp) | |
4bf12011 | 13408 | +{ |
10d56c87 AM |
13409 | + uint32_t bucket; |
13410 | + struct dlm_rsb *r, *tmp; | |
4bf12011 | 13411 | + int error = -ENOMEM; |
13412 | + | |
10d56c87 | 13413 | + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); |
4bf12011 | 13414 | + |
10d56c87 AM |
13415 | + bucket = dlm_hash(name, namelen); |
13416 | + bucket &= (ls->ls_rsbtbl_size - 1); | |
4bf12011 | 13417 | + |
10d56c87 AM |
13418 | + read_lock(&ls->ls_rsbtbl[bucket].lock); |
13419 | + r = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, namelen); | |
13420 | + read_unlock(&ls->ls_rsbtbl[bucket].lock); | |
4bf12011 | 13421 | + |
13422 | + if (r) | |
13423 | + goto out_set; | |
13424 | + if (!create) { | |
13425 | + *rp = NULL; | |
13426 | + goto out; | |
13427 | + } | |
13428 | + | |
13429 | + r = allocate_rsb(ls, namelen); | |
13430 | + if (!r) | |
13431 | + goto fail; | |
13432 | + | |
13433 | + INIT_LIST_HEAD(&r->res_subreslist); | |
13434 | + INIT_LIST_HEAD(&r->res_grantqueue); | |
13435 | + INIT_LIST_HEAD(&r->res_convertqueue); | |
13436 | + INIT_LIST_HEAD(&r->res_waitqueue); | |
13437 | + | |
13438 | + memcpy(r->res_name, name, namelen); | |
13439 | + r->res_length = namelen; | |
13440 | + r->res_ls = ls; | |
13441 | + init_rwsem(&r->res_lock); | |
13442 | + atomic_set(&r->res_ref, 1); | |
10d56c87 | 13443 | + r->res_bucket = bucket; |
4bf12011 | 13444 | + |
13445 | + if (parent) { | |
13446 | + r->res_parent = parent; | |
13447 | + r->res_depth = parent->res_depth + 1; | |
13448 | + r->res_root = parent->res_root; | |
13449 | + r->res_nodeid = parent->res_nodeid; | |
13450 | + } else { | |
13451 | + r->res_parent = NULL; | |
13452 | + r->res_depth = 1; | |
13453 | + r->res_root = r; | |
13454 | + r->res_nodeid = -1; | |
13455 | + } | |
13456 | + | |
10d56c87 AM |
13457 | + write_lock(&ls->ls_rsbtbl[bucket].lock); |
13458 | + tmp = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, namelen); | |
4bf12011 | 13459 | + if (tmp) { |
10d56c87 | 13460 | + write_unlock(&ls->ls_rsbtbl[bucket].lock); |
4bf12011 | 13461 | + free_rsb(r); |
13462 | + r = tmp; | |
13463 | + } else { | |
10d56c87 AM |
13464 | + list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); |
13465 | + write_unlock(&ls->ls_rsbtbl[bucket].lock); | |
4bf12011 | 13466 | + |
13467 | + down_read(&ls->ls_gap_rsblist); | |
13468 | + if (parent) | |
13469 | + list_add_tail(&r->res_subreslist, | |
13470 | + &r->res_root->res_subreslist); | |
13471 | + else | |
13472 | + list_add(&r->res_rootlist, &ls->ls_rootres); | |
13473 | + up_read(&ls->ls_gap_rsblist); | |
13474 | + } | |
13475 | + | |
13476 | + out_set: | |
13477 | + *rp = r; | |
13478 | + | |
13479 | + out: | |
13480 | + error = 0; | |
13481 | + | |
13482 | + fail: | |
13483 | + return error; | |
13484 | +} | |
13485 | + | |
13486 | +/* | |
13487 | + * Add a LKB to a resource's grant/convert/wait queue. in order | |
13488 | + */ | |
13489 | + | |
13490 | +void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode) | |
13491 | +{ | |
10d56c87 | 13492 | + struct dlm_lkb *lkb = NULL; |
4bf12011 | 13493 | + |
13494 | + list_for_each_entry(lkb, head, lkb_statequeue) { | |
13495 | + if (lkb->lkb_rqmode < mode) | |
13496 | + break; | |
13497 | + } | |
13498 | + | |
13499 | + if (!lkb) { | |
13500 | + /* No entries in the queue, we are alone */ | |
13501 | + list_add_tail(new, head); | |
13502 | + } else { | |
13503 | + __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); | |
13504 | + } | |
13505 | +} | |
13506 | + | |
13507 | +/* | |
13508 | + * The rsb res_lock must be held in write when this function is called. | |
13509 | + */ | |
13510 | + | |
10d56c87 | 13511 | +void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
4bf12011 | 13512 | +{ |
10d56c87 AM |
13513 | + DLM_ASSERT(!lkb->lkb_status, |
13514 | + print_lkb(lkb); | |
13515 | + print_rsb(r);); | |
4bf12011 | 13516 | + |
13517 | + lkb->lkb_status = type; | |
13518 | + | |
13519 | + switch (type) { | |
13520 | + case GDLM_LKSTS_WAITING: | |
13521 | + list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); | |
13522 | + break; | |
13523 | + | |
13524 | + case GDLM_LKSTS_GRANTED: | |
13525 | + lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, | |
13526 | + lkb->lkb_grmode); | |
13527 | + break; | |
13528 | + | |
13529 | + case GDLM_LKSTS_CONVERT: | |
13530 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_EXPEDITE) | |
13531 | + list_add(&lkb->lkb_statequeue, &r->res_convertqueue); | |
13532 | + | |
13533 | + else | |
13534 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_QUECVT) | |
13535 | + list_add_tail(&lkb->lkb_statequeue, | |
13536 | + &r->res_convertqueue); | |
13537 | + else | |
13538 | + lkb_add_ordered(&lkb->lkb_statequeue, | |
13539 | + &r->res_convertqueue, lkb->lkb_rqmode); | |
13540 | + break; | |
13541 | + | |
13542 | + default: | |
10d56c87 | 13543 | + DLM_ASSERT(0,); |
4bf12011 | 13544 | + } |
13545 | +} | |
13546 | + | |
10d56c87 | 13547 | +void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
4bf12011 | 13548 | +{ |
13549 | + down_write(&r->res_lock); | |
13550 | + lkb_enqueue(r, lkb, type); | |
13551 | + up_write(&r->res_lock); | |
13552 | +} | |
13553 | + | |
13554 | +/* | |
13555 | + * The rsb res_lock must be held in write when this function is called. | |
13556 | + */ | |
13557 | + | |
10d56c87 | 13558 | +int lkb_dequeue(struct dlm_lkb *lkb) |
4bf12011 | 13559 | +{ |
13560 | + int status = lkb->lkb_status; | |
13561 | + | |
13562 | + if (!status) | |
13563 | + goto out; | |
13564 | + | |
13565 | + lkb->lkb_status = 0; | |
13566 | + list_del(&lkb->lkb_statequeue); | |
13567 | + | |
13568 | + out: | |
13569 | + return status; | |
13570 | +} | |
13571 | + | |
10d56c87 | 13572 | +int res_lkb_dequeue(struct dlm_lkb *lkb) |
4bf12011 | 13573 | +{ |
13574 | + int status; | |
13575 | + | |
13576 | + down_write(&lkb->lkb_resource->res_lock); | |
13577 | + status = lkb_dequeue(lkb); | |
13578 | + up_write(&lkb->lkb_resource->res_lock); | |
13579 | + | |
13580 | + return status; | |
13581 | +} | |
13582 | + | |
13583 | +/* | |
13584 | + * The rsb res_lock must be held in write when this function is called. | |
13585 | + */ | |
13586 | + | |
10d56c87 | 13587 | +int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
4bf12011 | 13588 | +{ |
13589 | + int status; | |
13590 | + | |
13591 | + status = lkb_dequeue(lkb); | |
13592 | + lkb_enqueue(r, lkb, type); | |
13593 | + | |
13594 | + return status; | |
13595 | +} | |
13596 | + | |
10d56c87 | 13597 | +int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
4bf12011 | 13598 | +{ |
13599 | + int status; | |
13600 | + | |
13601 | + down_write(&r->res_lock); | |
13602 | + status = lkb_swqueue(r, lkb, type); | |
13603 | + up_write(&r->res_lock); | |
13604 | + | |
13605 | + return status; | |
13606 | +} | |
13607 | diff -urN linux-orig/cluster/dlm/rsb.h linux-patched/cluster/dlm/rsb.h | |
13608 | --- linux-orig/cluster/dlm/rsb.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 13609 | +++ linux-patched/cluster/dlm/rsb.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 13610 | @@ -0,0 +1,30 @@ |
13611 | +/****************************************************************************** | |
13612 | +******************************************************************************* | |
13613 | +** | |
13614 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13615 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13616 | +** | |
13617 | +** This copyrighted material is made available to anyone wishing to use, | |
13618 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13619 | +** of the GNU General Public License v.2. | |
13620 | +** | |
13621 | +******************************************************************************* | |
13622 | +******************************************************************************/ | |
13623 | + | |
13624 | +#ifndef __RSB_DOT_H__ | |
13625 | +#define __RSB_DOT_H__ | |
13626 | + | |
13627 | +void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode); | |
10d56c87 AM |
13628 | +void release_rsb(struct dlm_rsb *r); |
13629 | +void hold_rsb(struct dlm_rsb *r); | |
13630 | +int find_or_create_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name, | |
13631 | + int namelen, int create, struct dlm_rsb **rp); | |
13632 | +struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb); | |
13633 | +void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
13634 | +void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
13635 | +int lkb_dequeue(struct dlm_lkb *lkb); | |
13636 | +int res_lkb_dequeue(struct dlm_lkb *lkb); | |
13637 | +int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
13638 | +int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
4bf12011 | 13639 | + |
13640 | +#endif /* __RSB_DOT_H__ */ | |
13641 | diff -urN linux-orig/cluster/dlm/util.c linux-patched/cluster/dlm/util.c | |
13642 | --- linux-orig/cluster/dlm/util.c 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
13643 | +++ linux-patched/cluster/dlm/util.c 2004-07-13 18:57:22.000000000 +0800 |
13644 | @@ -0,0 +1,190 @@ | |
4bf12011 | 13645 | +/****************************************************************************** |
13646 | +******************************************************************************* | |
13647 | +** | |
13648 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13649 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13650 | +** | |
13651 | +** This copyrighted material is made available to anyone wishing to use, | |
13652 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13653 | +** of the GNU General Public License v.2. | |
13654 | +** | |
13655 | +******************************************************************************* | |
13656 | +******************************************************************************/ | |
13657 | + | |
13658 | +#include "dlm_internal.h" | |
13659 | + | |
13660 | +static const uint32_t crc_32_tab[] = { | |
13661 | + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, | |
13662 | + 0xe963a535, 0x9e6495a3, | |
13663 | + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, | |
13664 | + 0xe7b82d07, 0x90bf1d91, | |
13665 | + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, | |
13666 | + 0xf4d4b551, 0x83d385c7, | |
13667 | + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, | |
13668 | + 0xfa0f3d63, 0x8d080df5, | |
13669 | + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, | |
13670 | + 0xd20d85fd, 0xa50ab56b, | |
13671 | + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, | |
13672 | + 0xdcd60dcf, 0xabd13d59, | |
13673 | + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, | |
13674 | + 0xcfba9599, 0xb8bda50f, | |
13675 | + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, | |
13676 | + 0xc1611dab, 0xb6662d3d, | |
13677 | + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, | |
13678 | + 0x9fbfe4a5, 0xe8b8d433, | |
13679 | + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, | |
13680 | + 0x91646c97, 0xe6635c01, | |
13681 | + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, | |
13682 | + 0x8208f4c1, 0xf50fc457, | |
13683 | + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, | |
13684 | + 0x8cd37cf3, 0xfbd44c65, | |
13685 | + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, | |
13686 | + 0xa4d1c46d, 0xd3d6f4fb, | |
13687 | + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, | |
13688 | + 0xaa0a4c5f, 0xdd0d7cc9, | |
13689 | + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, | |
13690 | + 0xb966d409, 0xce61e49f, | |
13691 | + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, | |
13692 | + 0xb7bd5c3b, 0xc0ba6cad, | |
13693 | + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, | |
13694 | + 0x04db2615, 0x73dc1683, | |
13695 | + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, | |
13696 | + 0x0a00ae27, 0x7d079eb1, | |
13697 | + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, | |
13698 | + 0x196c3671, 0x6e6b06e7, | |
13699 | + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, | |
13700 | + 0x17b7be43, 0x60b08ed5, | |
13701 | + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, | |
13702 | + 0x3fb506dd, 0x48b2364b, | |
13703 | + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, | |
13704 | + 0x316e8eef, 0x4669be79, | |
13705 | + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, | |
13706 | + 0x220216b9, 0x5505262f, | |
13707 | + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, | |
13708 | + 0x2cd99e8b, 0x5bdeae1d, | |
13709 | + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, | |
13710 | + 0x72076785, 0x05005713, | |
13711 | + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, | |
13712 | + 0x7cdcefb7, 0x0bdbdf21, | |
13713 | + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, | |
13714 | + 0x6fb077e1, 0x18b74777, | |
13715 | + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, | |
13716 | + 0x616bffd3, 0x166ccf45, | |
13717 | + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, | |
13718 | + 0x4969474d, 0x3e6e77db, | |
13719 | + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, | |
13720 | + 0x47b2cf7f, 0x30b5ffe9, | |
13721 | + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, | |
13722 | + 0x54de5729, 0x23d967bf, | |
13723 | + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, | |
13724 | + 0x5a05df1b, 0x2d02ef8d | |
13725 | +}; | |
13726 | + | |
13727 | +/** | |
10d56c87 | 13728 | + * dlm_hash - hash an array of data |
4bf12011 | 13729 | + * @data: the data to be hashed |
13730 | + * @len: the length of data to be hashed | |
13731 | + * | |
13732 | + * Copied from GFS. | |
13733 | + * | |
13734 | + * Take some data and convert it to a 32-bit hash. | |
13735 | + * | |
13736 | + * The hash function is a 32-bit CRC of the data. The algorithm uses | |
13737 | + * the crc_32_tab table above. | |
13738 | + * | |
13739 | + * This may not be the fastest hash function, but it does a fair bit better | |
13740 | + * at providing uniform results than the others I've looked at. That's | |
13741 | + * really important for efficient directories. | |
13742 | + * | |
13743 | + * Returns: the hash | |
13744 | + */ | |
13745 | + | |
10d56c87 | 13746 | +uint32_t dlm_hash(const char *data, int len) |
4bf12011 | 13747 | +{ |
13748 | + uint32_t hash = 0xFFFFFFFF; | |
13749 | + | |
13750 | + for (; len--; data++) | |
13751 | + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); | |
13752 | + | |
13753 | + hash = ~hash; | |
13754 | + | |
13755 | + return hash; | |
13756 | +} | |
13757 | + | |
10d56c87 | 13758 | +uint32_t dlm_next_power2(uint32_t val) |
4bf12011 | 13759 | +{ |
13760 | + uint32_t x; | |
13761 | + | |
13762 | + for (x = 1; x < val; x <<= 1) ; | |
13763 | + | |
13764 | + return x; | |
13765 | +} | |
13766 | + | |
10d56c87 AM |
13767 | +void print_lkb(struct dlm_lkb *lkb) |
13768 | +{ | |
13769 | + printk("dlm: lkb\n" | |
13770 | + "id %x\n" | |
13771 | + "remid %x\n" | |
13772 | + "flags %x\n" | |
13773 | + "status %x\n" | |
13774 | + "rqmode %d\n" | |
13775 | + "grmode %d\n" | |
13776 | + "nodeid %u\n" | |
13777 | + "lqstate %x\n" | |
13778 | + "lqflags %x\n", | |
13779 | + lkb->lkb_id, | |
13780 | + lkb->lkb_remid, | |
13781 | + lkb->lkb_flags, | |
13782 | + lkb->lkb_status, | |
13783 | + lkb->lkb_rqmode, | |
13784 | + lkb->lkb_grmode, | |
13785 | + lkb->lkb_nodeid, | |
13786 | + lkb->lkb_lockqueue_state, | |
13787 | + lkb->lkb_lockqueue_flags); | |
13788 | +} | |
13789 | + | |
13790 | +void print_rsb(struct dlm_rsb *r) | |
13791 | +{ | |
13792 | + printk("dlm: rsb\n" | |
13793 | + "name \"%s\"\n" | |
13794 | + "nodeid %u\n" | |
13795 | + "ref %u\n", | |
13796 | + r->res_name, | |
13797 | + r->res_nodeid, | |
13798 | + atomic_read(&r->res_ref)); | |
13799 | +} | |
13800 | + | |
13801 | +void print_request(struct dlm_request *req) | |
13802 | +{ | |
13803 | + printk("dlm: request\n" | |
13804 | + "rh_cmd %u\n" | |
13805 | + "rh_lkid %x\n" | |
13806 | + "remlkid %x\n" | |
13807 | + "flags %x\n" | |
13808 | + "status %u\n" | |
13809 | + "rqmode %u\n", | |
13810 | + req->rr_header.rh_cmd, | |
13811 | + req->rr_header.rh_lkid, | |
13812 | + req->rr_remlkid, | |
13813 | + req->rr_flags, | |
13814 | + req->rr_status, | |
13815 | + req->rr_rqmode); | |
13816 | +} | |
13817 | + | |
13818 | +void print_reply(struct dlm_reply *rp) | |
13819 | +{ | |
13820 | + printk("dlm: reply\n" | |
13821 | + "rh_cmd %u\n" | |
13822 | + "rh_lkid %x\n" | |
13823 | + "lockstate %u\n" | |
13824 | + "nodeid %u\n" | |
13825 | + "status %u\n" | |
13826 | + "lkid %x\n", | |
13827 | + rp->rl_header.rh_cmd, | |
13828 | + rp->rl_header.rh_lkid, | |
13829 | + rp->rl_lockstate, | |
13830 | + rp->rl_nodeid, | |
13831 | + rp->rl_status, | |
13832 | + rp->rl_lkid); | |
4bf12011 | 13833 | +} |
10d56c87 | 13834 | + |
4bf12011 | 13835 | diff -urN linux-orig/cluster/dlm/util.h linux-patched/cluster/dlm/util.h |
13836 | --- linux-orig/cluster/dlm/util.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
13837 | +++ linux-patched/cluster/dlm/util.h 2004-07-13 18:57:22.000000000 +0800 |
13838 | @@ -0,0 +1,25 @@ | |
4bf12011 | 13839 | +/****************************************************************************** |
13840 | +******************************************************************************* | |
13841 | +** | |
13842 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13843 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13844 | +** | |
13845 | +** This copyrighted material is made available to anyone wishing to use, | |
13846 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13847 | +** of the GNU General Public License v.2. | |
13848 | +** | |
13849 | +******************************************************************************* | |
13850 | +******************************************************************************/ | |
13851 | + | |
13852 | +#ifndef __UTIL_DOT_H__ | |
13853 | +#define __UTIL_DOT_H__ | |
13854 | + | |
10d56c87 AM |
13855 | +uint32_t dlm_hash(const char *data, int len); |
13856 | +uint32_t dlm_next_power2(uint32_t val); | |
4bf12011 | 13857 | + |
10d56c87 AM |
13858 | +void print_lkb(struct dlm_lkb *lkb); |
13859 | +void print_rsb(struct dlm_rsb *r); | |
13860 | +void print_request(struct dlm_request *req); | |
13861 | +void print_reply(struct dlm_reply *rp); | |
4bf12011 | 13862 | + |
13863 | +#endif | |
13864 | diff -urN linux-orig/include/cluster/dlm.h linux-patched/include/cluster/dlm.h | |
13865 | --- linux-orig/include/cluster/dlm.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 AM |
13866 | +++ linux-patched/include/cluster/dlm.h 2004-07-13 18:57:22.000000000 +0800 |
13867 | @@ -0,0 +1,412 @@ | |
4bf12011 | 13868 | +/****************************************************************************** |
13869 | +******************************************************************************* | |
13870 | +** | |
13871 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13872 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13873 | +** | |
13874 | +** This copyrighted material is made available to anyone wishing to use, | |
13875 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13876 | +** of the GNU General Public License v.2. | |
13877 | +** | |
13878 | +******************************************************************************* | |
13879 | +******************************************************************************/ | |
13880 | + | |
13881 | +#ifndef __DLM_DOT_H__ | |
13882 | +#define __DLM_DOT_H__ | |
13883 | + | |
13884 | +/* | |
13885 | + * Interface to DLM - routines and structures to use DLM lockspaces. | |
13886 | + */ | |
13887 | + | |
13888 | +/* | |
13889 | + * Lock Modes | |
13890 | + */ | |
13891 | + | |
13892 | +#define DLM_LOCK_IV (-1) /* invalid */ | |
13893 | +#define DLM_LOCK_NL (0) /* null */ | |
13894 | +#define DLM_LOCK_CR (1) /* concurrent read */ | |
13895 | +#define DLM_LOCK_CW (2) /* concurrent write */ | |
13896 | +#define DLM_LOCK_PR (3) /* protected read */ | |
13897 | +#define DLM_LOCK_PW (4) /* protected write */ | |
13898 | +#define DLM_LOCK_EX (5) /* exclusive */ | |
13899 | + | |
13900 | +/* | |
13901 | + * Maximum size in bytes of a dlm_lock name | |
13902 | + */ | |
13903 | + | |
13904 | +#define DLM_RESNAME_MAXLEN (64) | |
13905 | + | |
13906 | +/* | |
13907 | + * Size in bytes of Lock Value Block | |
13908 | + */ | |
13909 | + | |
13910 | +#define DLM_LVB_LEN (32) | |
13911 | + | |
13912 | +/* | |
13913 | + * Flags to dlm_new_lockspace | |
13914 | + * | |
13915 | + * DLM_LSF_NOTIMERS | |
13916 | + * | |
13917 | + * Do not subject locks in this lockspace to time-outs. | |
13918 | + * | |
10d56c87 AM |
13919 | + * DLM_LSF_NOCONVGRANT |
13920 | + * | |
13921 | + * Do not grant new locks unless the conversion queue is empty. | |
13922 | + * | |
4bf12011 | 13923 | + */ |
13924 | + | |
13925 | +#define DLM_LSF_NOTIMERS (1) | |
10d56c87 | 13926 | +#define DLM_LSF_NOCONVGRANT (2) |
4bf12011 | 13927 | + |
13928 | +/* | |
13929 | + * Flags to dlm_lock | |
13930 | + * | |
13931 | + * DLM_LKF_NOQUEUE | |
13932 | + * | |
13933 | + * Do not queue the lock request on the wait queue if it cannot be granted | |
13934 | + * immediately. If the lock cannot be granted because of this flag, DLM will | |
13935 | + * either return -EAGAIN from the dlm_lock call or will return 0 from | |
13936 | + * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | |
13937 | + * | |
13938 | + * DLM_LKF_CONVERT | |
13939 | + * | |
13940 | + * Indicates a lock conversion request. For conversions the name and namelen | |
13941 | + * are ignored and the lock ID in the LKSB is used to identify the lock. | |
13942 | + * | |
13943 | + * DLM_LKF_VALBLK | |
13944 | + * | |
13945 | + * Requests DLM to return the current contents of the lock value block in the | |
13946 | + * lock status block. When this flag is set in a lock conversion from PW or EX | |
13947 | + * modes, DLM assigns the value specified in the lock status block to the lock | |
13948 | + * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | |
13949 | + * containing application-specific information. | |
13950 | + * | |
13951 | + * DLM_LKF_QUECVT | |
13952 | + * | |
13953 | + * Force a conversion lock request to the back of the convert queue. All other | |
13954 | + * conversion requests ahead of it must be granted before it can be granted. | |
13955 | + * This enforces a FIFO ordering on the convert queue. When this flag is set, | |
13956 | + * indefinite postponement is averted. This flag is allowed only when | |
13957 | + * converting a lock to a more restrictive mode. | |
13958 | + * | |
13959 | + * DLM_LKF_CANCEL | |
13960 | + * | |
13961 | + * Used to cancel a pending conversion (with dlm_unlock). Lock is returned to | |
13962 | + * previously granted mode. | |
13963 | + * | |
13964 | + * DLM_LKF_IVVALBLK | |
13965 | + * | |
13966 | + * Invalidate/clear the lock value block. | |
13967 | + * | |
13968 | + * DLM_LKF_CONVDEADLK | |
13969 | + * | |
13970 | + * The granted mode of a lock being converted (from a non-NL mode) can be | |
13971 | + * changed to NL in the process of acquiring the requested mode to avoid | |
13972 | + * conversion deadlock. | |
13973 | + * | |
13974 | + * DLM_LKF_PERSISTENT | |
13975 | + * | |
13976 | + * Only relevant to locks originating in userspace. Signals to the ioctl.c code | |
13977 | + * that this lock should not be unlocked when the process exits. | |
13978 | + * | |
13979 | + * DLM_LKF_NODLKWT | |
13980 | + * | |
13981 | + * This lock is not to be checked for conversion deadlocks. | |
13982 | + * | |
13983 | + * DLM_LKF_NODLCKBLK | |
13984 | + * | |
13985 | + * not yet implemented | |
13986 | + * | |
13987 | + * DLM_LKF_EXPEDITE | |
13988 | + * | |
13989 | + * If this lock conversion cannot be granted immediately it is to go to the | |
13990 | + * head of the conversion queue regardless of its requested lock mode. | |
13991 | + * | |
13992 | + * DLM_LKF_NOQUEUEBAST | |
13993 | + * | |
13994 | + * Send blocking AST's before returning -EAGAIN to the caller. It is only | |
13995 | + * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | |
13996 | + * NOQUEUE requests otherwise. | |
13997 | + * | |
13998 | + */ | |
13999 | + | |
14000 | +#define DLM_LKF_NOQUEUE (0x00000001) | |
14001 | +#define DLM_LKF_CANCEL (0x00000002) | |
14002 | +#define DLM_LKF_CONVERT (0x00000004) | |
14003 | +#define DLM_LKF_VALBLK (0x00000008) | |
14004 | +#define DLM_LKF_QUECVT (0x00000010) | |
14005 | +#define DLM_LKF_IVVALBLK (0x00000020) | |
14006 | +#define DLM_LKF_CONVDEADLK (0x00000040) | |
14007 | +#define DLM_LKF_PERSISTENT (0x00000080) | |
14008 | +#define DLM_LKF_NODLCKWT (0x00000100) | |
14009 | +#define DLM_LKF_NODLCKBLK (0x00000200) | |
14010 | +#define DLM_LKF_EXPEDITE (0x00000400) | |
14011 | +#define DLM_LKF_NOQUEUEBAST (0x00000800) | |
14012 | + | |
14013 | +/* | |
14014 | + * Some return codes that are not not in errno.h | |
14015 | + */ | |
14016 | + | |
14017 | +#define DLM_ECANCEL (0x10001) | |
14018 | +#define DLM_EUNLOCK (0x10002) | |
14019 | + | |
14020 | +typedef void dlm_lockspace_t; | |
14021 | + | |
14022 | +/* | |
14023 | + * Lock range structure | |
14024 | + */ | |
14025 | + | |
14026 | +struct dlm_range { | |
14027 | + uint64_t ra_start; | |
14028 | + uint64_t ra_end; | |
14029 | +}; | |
14030 | + | |
14031 | +/* | |
14032 | + * Lock status block | |
14033 | + * | |
14034 | + * Use this structure to specify the contents of the lock value block. For a | |
14035 | + * conversion request, this structure is used to specify the lock ID of the | |
14036 | + * lock. DLM writes the status of the lock request and the lock ID assigned | |
14037 | + * to the request in the lock status block. | |
14038 | + * | |
14039 | + * sb_lkid: the returned lock ID. It is set on new (non-conversion) requests. | |
14040 | + * It is available when dlm_lock returns. | |
14041 | + * | |
14042 | + * sb_lvbptr: saves or returns the contents of the lock's LVB according to rules | |
14043 | + * shown for the DLM_LKF_VALBLK flag. | |
14044 | + * | |
14045 | + * sb_flags: DLM_SBF_DEMOTED is returned if in the process of promoting a lock, | |
14046 | + * it was first demoted to NL to avoid conversion deadlock. | |
14047 | + * | |
14048 | + * sb_status: the returned status of the lock request set prior to AST | |
14049 | + * execution. Possible return values: | |
14050 | + * | |
14051 | + * 0 if lock request was successful | |
14052 | + * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | |
14053 | + * -ENOMEM if there is no memory to process request | |
14054 | + * -EINVAL if there are invalid parameters | |
14055 | + * -DLM_EUNLOCK if unlock request was successful | |
14056 | + * -DLM_ECANCEL ? | |
14057 | + */ | |
14058 | + | |
14059 | +#define DLM_SBF_DEMOTED (0x01) | |
14060 | + | |
14061 | +struct dlm_lksb { | |
14062 | + int sb_status; | |
14063 | + uint32_t sb_lkid; | |
14064 | + char sb_flags; | |
14065 | + char * sb_lvbptr; | |
14066 | +}; | |
14067 | + | |
14068 | +/* | |
14069 | + * These defines are the bits that make up the | |
14070 | + * query code. | |
14071 | + */ | |
14072 | + | |
14073 | +/* Bits 0, 1, 2, the lock mode or DLM_LOCK_THIS, see DLM_LOCK_NL etc in | |
14074 | + * dlm.h Ignored for DLM_QUERY_LOCKS_ALL */ | |
14075 | +#define DLM_LOCK_THIS 0x0007 | |
14076 | +#define DLM_QUERY_MODE_MASK 0x0007 | |
14077 | + | |
14078 | +/* Bits 3, 4, 5 bitmap of queue(s) to query */ | |
14079 | +#define DLM_QUERY_QUEUE_WAIT 0x0008 | |
14080 | +#define DLM_QUERY_QUEUE_CONVERT 0x0010 | |
14081 | +#define DLM_QUERY_QUEUE_GRANT 0x0020 | |
14082 | +#define DLM_QUERY_QUEUE_GRANTED 0x0030 /* Shorthand */ | |
14083 | +#define DLM_QUERY_QUEUE_ALL 0x0038 /* Shorthand */ | |
14084 | + | |
14085 | +/* Bit 6, Return only the information that can be established without a network | |
14086 | + * round-trip. The caller must be aware of the implications of this. Useful for | |
14087 | + * just getting the master node id or resource name. */ | |
14088 | +#define DLM_QUERY_LOCAL 0x0040 | |
14089 | + | |
14090 | +/* Bits 8 up, query type */ | |
14091 | +#define DLM_QUERY_LOCKS_HIGHER 0x0100 | |
14092 | +#define DLM_QUERY_LOCKS_LOWER 0x0200 | |
14093 | +#define DLM_QUERY_LOCKS_EQUAL 0x0300 | |
14094 | +#define DLM_QUERY_LOCKS_BLOCKING 0x0400 | |
14095 | +#define DLM_QUERY_LOCKS_NOTBLOCK 0x0500 | |
14096 | +#define DLM_QUERY_LOCKS_ALL 0x0600 | |
14097 | +#define DLM_QUERY_MASK 0x0F00 | |
14098 | + | |
14099 | +/* GRMODE is the default for mode comparisons, | |
14100 | + RQMODE might also be handy */ | |
14101 | +#define DLM_QUERY_GRMODE 0x0000 | |
14102 | +#define DLM_QUERY_RQMODE 0x1000 | |
14103 | + | |
14104 | +/* Structures passed into and out of the query */ | |
14105 | + | |
14106 | +struct dlm_lockinfo { | |
14107 | + int lki_lkid; /* Lock ID on originating node */ | |
14108 | + int lki_mstlkid; /* Lock ID on master node */ | |
14109 | + int lki_parent; | |
14110 | + int lki_node; /* Originating node (not master) */ | |
14111 | + uint8_t lki_state; /* Queue the lock is on */ | |
14112 | + uint8_t lki_grmode; /* Granted mode */ | |
14113 | + uint8_t lki_rqmode; /* Requested mode */ | |
14114 | + struct dlm_range lki_grrange; /* Granted range, if applicable */ | |
14115 | + struct dlm_range lki_rqrange; /* Requested range, if applicable */ | |
14116 | +}; | |
14117 | + | |
14118 | +struct dlm_resinfo { | |
14119 | + int rsi_length; | |
14120 | + int rsi_grantcount; /* No. of nodes on grant queue */ | |
14121 | + int rsi_convcount; /* No. of nodes on convert queue */ | |
14122 | + int rsi_waitcount; /* No. of nodes on wait queue */ | |
14123 | + int rsi_masternode; /* Master for this resource */ | |
14124 | + char rsi_name[DLM_RESNAME_MAXLEN]; /* Resource name */ | |
14125 | + char rsi_valblk[DLM_LVB_LEN]; /* Master's LVB contents, if applicable | |
14126 | + */ | |
14127 | +}; | |
14128 | + | |
14129 | +struct dlm_queryinfo { | |
14130 | + struct dlm_resinfo *gqi_resinfo; | |
14131 | + struct dlm_lockinfo *gqi_lockinfo; /* This points to an array | |
14132 | + * of structs */ | |
14133 | + int gqi_locksize; /* input */ | |
14134 | + int gqi_lockcount; /* output */ | |
14135 | +}; | |
14136 | + | |
14137 | +#ifdef __KERNEL__ | |
14138 | +/* | |
14139 | + * dlm_init | |
14140 | + * | |
14141 | + * Starts and initializes DLM threads and structures. Creation of the first | |
14142 | + * lockspace will call this if it has not been called already. | |
14143 | + * | |
14144 | + * Returns: 0 if successful, -EXXX on error | |
14145 | + */ | |
14146 | + | |
14147 | +int dlm_init(void); | |
14148 | + | |
14149 | +/* | |
14150 | + * dlm_release | |
14151 | + * | |
14152 | + * Stops DLM threads. | |
14153 | + * | |
14154 | + * Returns: 0 if successful, -EXXX on error | |
14155 | + */ | |
14156 | + | |
14157 | +int dlm_release(void); | |
14158 | + | |
14159 | +/* | |
14160 | + * dlm_new_lockspace | |
14161 | + * | |
14162 | + * Starts a lockspace with the given name. If the named lockspace exists in | |
14163 | + * the cluster, the calling node joins it. | |
14164 | + */ | |
14165 | + | |
14166 | +int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, | |
14167 | + int flags); | |
14168 | + | |
14169 | +/* | |
14170 | + * dlm_release_lockspace | |
14171 | + * | |
14172 | + * Stop a lockspace. | |
14173 | + */ | |
14174 | + | |
14175 | +int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); | |
14176 | + | |
14177 | +/* | |
14178 | + * dlm_lock | |
14179 | + * | |
14180 | + * Make an asyncronous request to acquire or convert a lock on a named | |
14181 | + * resource. | |
14182 | + * | |
14183 | + * lockspace: context for the request | |
14184 | + * mode: the requested mode of the lock (DLM_LOCK_) | |
14185 | + * lksb: lock status block for input and async return values | |
14186 | + * flags: input flags (DLM_LKF_) | |
14187 | + * name: name of the resource to lock, can be binary | |
14188 | + * namelen: the length in bytes of the resource name (MAX_RESNAME_LEN) | |
14189 | + * parent: the lock ID of a parent lock or 0 if none | |
14190 | + * lockast: function DLM executes when it completes processing the request | |
14191 | + * astarg: argument passed to lockast and bast functions | |
14192 | + * bast: function DLM executes when this lock later blocks another request | |
14193 | + * | |
14194 | + * Returns: | |
14195 | + * 0 if request is successfully queued for processing | |
14196 | + * -EINVAL if any input parameters are invalid | |
14197 | + * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | |
14198 | + * -ENOMEM if there is no memory to process request | |
14199 | + * -ENOTCONN if there is a communication error | |
14200 | + * | |
14201 | + * If the call to dlm_lock returns an error then the operation has failed and | |
14202 | + * the AST routine will not be called. If dlm_lock returns 0 it is still | |
14203 | + * possible that the lock operation will fail. The AST routine will be called | |
14204 | + * when the locking is complete and the status is returned in the lksb. | |
14205 | + * | |
14206 | + * If the AST routines or parameter are passed to a conversion operation then | |
14207 | + * they will overwrite those values that were passed to a previous dlm_lock | |
14208 | + * call. | |
14209 | + * | |
14210 | + * AST routines should not block (at least not for long), but may make | |
14211 | + * any locking calls they please. | |
14212 | + */ | |
14213 | + | |
14214 | +int dlm_lock(dlm_lockspace_t *lockspace, | |
14215 | + uint32_t mode, | |
14216 | + struct dlm_lksb *lksb, | |
14217 | + uint32_t flags, | |
14218 | + void *name, | |
14219 | + unsigned int namelen, | |
14220 | + uint32_t parent, | |
14221 | + void (*lockast) (void *astarg), | |
14222 | + void *astarg, | |
14223 | + void (*bast) (void *astarg, int mode), | |
14224 | + struct dlm_range *range); | |
14225 | + | |
14226 | +/* | |
14227 | + * dlm_unlock | |
14228 | + * | |
14229 | + * Asynchronously release a lock on a resource. The AST routine is called | |
14230 | + * when the resource is successfully unlocked. | |
14231 | + * | |
14232 | + * lockspace: context for the request | |
14233 | + * lkid: the lock ID as returned in the lksb | |
14234 | + * flags: input flags (DLM_LKF_) | |
14235 | + * lksb: if NULL the lksb parameter passed to last lock request is used | |
14236 | + * astarg: if NULL, astarg in last lock request is used | |
14237 | + * | |
14238 | + * Returns: | |
14239 | + * 0 if request is successfully queued for processing | |
14240 | + * -EINVAL if any input parameters are invalid | |
14241 | + * -ENOTEMPTY if the lock still has sublocks | |
14242 | + * -EBUSY if the lock is waiting for a remote lock operation | |
14243 | + * -ENOTCONN if there is a communication error | |
14244 | + */ | |
14245 | + | |
14246 | +extern int dlm_unlock(dlm_lockspace_t *lockspace, | |
14247 | + uint32_t lkid, | |
14248 | + uint32_t flags, | |
14249 | + struct dlm_lksb *lksb, | |
14250 | + void *astarg); | |
14251 | + | |
14252 | +/* Query interface | |
14253 | + * | |
14254 | + * Query the other holders of a resource, given a known lock ID | |
14255 | + * | |
14256 | + * lockspace: context for the request | |
14257 | + * lksb: LKSB, sb_lkid contains the lock ID of a valid lock | |
14258 | + * on the resource. sb_status will contain the status | |
14259 | + * of the request on completion. | |
14260 | + * query: query bitmap see DLM_QUERY_* above | |
14261 | + * qinfo: pointer to dlm_queryinfo structure | |
14262 | + * ast_routine: AST routine to call on completion | |
14263 | + * artarg: argument to AST routine. It is "traditional" | |
14264 | + * to put the qinfo pointer into lksb->sb_lvbptr | |
14265 | + * and pass the lksb in here. | |
14266 | + */ | |
14267 | +extern int dlm_query(dlm_lockspace_t *lockspace, | |
14268 | + struct dlm_lksb *lksb, | |
14269 | + int query, | |
14270 | + struct dlm_queryinfo *qinfo, | |
14271 | + void (ast_routine(void *)), | |
14272 | + void *astarg); | |
14273 | + | |
10d56c87 AM |
14274 | + |
14275 | +void dlm_debug_dump(void); | |
14276 | + | |
4bf12011 | 14277 | +#endif /* __KERNEL__ */ |
14278 | + | |
14279 | +#endif /* __DLM_DOT_H__ */ | |
14280 | diff -urN linux-orig/include/cluster/dlm_device.h linux-patched/include/cluster/dlm_device.h | |
14281 | --- linux-orig/include/cluster/dlm_device.h 1970-01-01 07:30:00.000000000 +0730 | |
10d56c87 | 14282 | +++ linux-patched/include/cluster/dlm_device.h 2004-07-13 18:57:22.000000000 +0800 |
4bf12011 | 14283 | @@ -0,0 +1,63 @@ |
14284 | +/****************************************************************************** | |
14285 | +******************************************************************************* | |
14286 | +** | |
14287 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14288 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14289 | +** | |
14290 | +** This copyrighted material is made available to anyone wishing to use, | |
14291 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14292 | +** of the GNU General Public License v.2. | |
14293 | +** | |
14294 | +******************************************************************************* | |
14295 | +******************************************************************************/ | |
14296 | + | |
14297 | +/* This is the device interface for dlm, most users will use a library | |
14298 | + * interface. | |
14299 | + */ | |
14300 | + | |
14301 | +/* Version of the device interface */ | |
14302 | +#define DLM_DEVICE_VERSION_MAJOR 2 | |
14303 | +#define DLM_DEVICE_VERSION_MINOR 0 | |
14304 | +#define DLM_DEVICE_VERSION_PATCH 0 | |
14305 | + | |
14306 | +/* struct passed to the lock write */ | |
14307 | +struct dlm_lock_params { | |
14308 | + uint32_t version[3]; | |
14309 | + uint8_t cmd; | |
14310 | + uint8_t mode; | |
14311 | + uint16_t flags; | |
14312 | + uint32_t lkid; | |
14313 | + uint32_t parent; | |
14314 | + struct dlm_range range; | |
14315 | + uint8_t namelen; | |
14316 | + void *astparam; | |
14317 | + void *astaddr; | |
14318 | + void *bastaddr; | |
14319 | + struct dlm_lksb *lksb; | |
14320 | + char name[1]; | |
14321 | +}; | |
14322 | + | |
14323 | + | |
14324 | +/* struct read from the "device" fd, | |
14325 | + consists mainly of userspace pointers for the library to use */ | |
14326 | +struct dlm_lock_result { | |
14327 | + uint8_t cmd; | |
14328 | + void *astparam; | |
14329 | + void (*astaddr)(void *astparam); | |
14330 | + struct dlm_lksb *user_lksb; | |
14331 | + struct dlm_lksb lksb; /* But this has real data in it */ | |
14332 | + uint8_t bast_mode; /* Not yet used */ | |
14333 | +}; | |
14334 | + | |
14335 | +/* commands passed to the device */ | |
14336 | +#define DLM_USER_LOCK 1 | |
14337 | +#define DLM_USER_UNLOCK 2 | |
14338 | +#define DLM_USER_QUERY 3 | |
14339 | + | |
14340 | +/* Arbitrary length restriction */ | |
14341 | +#define MAX_LS_NAME_LEN 64 | |
14342 | + | |
14343 | +/* ioctls on the device */ | |
14344 | +#define DLM_CREATE_LOCKSPACE _IOW('D', 0x01, char *) | |
14345 | +#define DLM_RELEASE_LOCKSPACE _IOW('D', 0x02, char *) | |
14346 | +#define DLM_FORCE_RELEASE_LOCKSPACE _IOW('D', 0x03, char *) |