]>
Commit | Line | Data |
---|---|---|
bb1d8b11 AM |
1 | # Add DLM to the build system |
2 | diff -urN -p linux-2.6.8.1/cluster/Kconfig linux/cluster/Kconfig | |
3 | --- linux-2.6.8.1/cluster/Kconfig 2004-08-24 13:23:09.000000000 +0800 | |
4 | +++ linux/cluster/Kconfig 2004-08-24 13:23:32.000000000 +0800 | |
5 | @@ -10,4 +10,22 @@ config CLUSTER | |
6 | needed by all the other components. It provides membership services | |
7 | for those other subsystems. | |
8 | ||
9 | +config CLUSTER_DLM | |
10 | + tristate "Distributed Lock Manager" | |
11 | + depends on CLUSTER | |
12 | + ---help--- | |
13 | + A fully distributed lock manager, providing cluster-wide locking services | |
14 | + and protected lock namespaces for kernel and userland applications. | |
15 | + | |
16 | +config CLUSTER_DLM_PROCLOCKS | |
17 | + boolean "/proc/locks support for DLM" | |
18 | + depends on CLUSTER_DLM | |
19 | + depends on PROC_FS | |
20 | + ---help--- | |
21 | + If this option is enabled a file will appear in /proc/cluster/dlm_locks. | |
22 | + write into this "file" the name of a lockspace known to the DLM and then | |
23 | + read out a list of all the resources and locks in that lockspace that are | |
24 | + known to the local node. Note because the DLM is distributed this may not | |
25 | + be the full lock picture. | |
26 | + | |
27 | endmenu | |
28 | diff -urN -p linux-2.6.8.1/cluster/Makefile linux/cluster/Makefile | |
29 | --- linux-2.6.8.1/cluster/Makefile 2004-08-24 13:23:09.000000000 +0800 | |
30 | +++ linux/cluster/Makefile 2004-08-24 13:23:32.000000000 +0800 | |
31 | @@ -1,3 +1,4 @@ | |
32 | obj-y := nocluster.o | |
33 | ||
34 | obj-$(CONFIG_CLUSTER) += cman/ | |
35 | +obj-$(CONFIG_CLUSTER_DLM) += dlm/ | |
36 | diff -urN -p linux-2.6.8.1/cluster/dlm/Makefile linux/cluster/dlm/Makefile | |
37 | --- linux-2.6.8.1/cluster/dlm/Makefile 1970-01-01 07:30:00.000000000 +0730 | |
38 | +++ linux/cluster/dlm/Makefile 2004-08-24 13:23:32.000000000 +0800 | |
39 | @@ -0,0 +1,23 @@ | |
40 | +dlm-objs := ast.o \ | |
41 | + config.o \ | |
42 | + device.o \ | |
43 | + dir.o \ | |
44 | + lkb.o \ | |
45 | + locking.o \ | |
46 | + lockqueue.o \ | |
47 | + lockspace.o \ | |
48 | + lowcomms.o \ | |
49 | + main.o \ | |
50 | + memory.o \ | |
51 | + midcomms.o \ | |
52 | + nodes.o \ | |
53 | + proc.o \ | |
54 | + queries.o \ | |
55 | + rebuild.o \ | |
56 | + reccomms.o \ | |
57 | + recover.o \ | |
58 | + recoverd.o \ | |
59 | + rsb.o \ | |
60 | + util.o \ | |
61 | + | |
62 | +obj-$(CONFIG_CLUSTER_DLM) += dlm.o | |
c1c6733f AM |
63 | diff -urN linux-orig/cluster/dlm/ast.c linux-patched/cluster/dlm/ast.c |
64 | --- linux-orig/cluster/dlm/ast.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
65 | +++ linux-patched/cluster/dlm/ast.c 2004-11-03 11:31:56.000000000 +0800 |
66 | @@ -0,0 +1,618 @@ | |
c1c6733f AM |
67 | +/****************************************************************************** |
68 | +******************************************************************************* | |
69 | +** | |
70 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
71 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
72 | +** | |
73 | +** This copyrighted material is made available to anyone wishing to use, | |
74 | +** modify, copy, or redistribute it subject to the terms and conditions | |
75 | +** of the GNU General Public License v.2. | |
76 | +** | |
77 | +******************************************************************************* | |
78 | +******************************************************************************/ | |
79 | + | |
80 | +/* | |
81 | + * This delivers ASTs and checks for dead remote requests and deadlocks. | |
82 | + */ | |
83 | + | |
84 | +#include <linux/timer.h> | |
85 | + | |
86 | +#include "dlm_internal.h" | |
87 | +#include "rsb.h" | |
88 | +#include "lockqueue.h" | |
89 | +#include "dir.h" | |
90 | +#include "locking.h" | |
91 | +#include "lkb.h" | |
92 | +#include "lowcomms.h" | |
93 | +#include "midcomms.h" | |
94 | +#include "ast.h" | |
95 | +#include "nodes.h" | |
96 | +#include "config.h" | |
b7b72b66 | 97 | +#include "util.h" |
c1c6733f AM |
98 | + |
99 | +/* Wake up flags for astd */ | |
b7b72b66 AM |
100 | +#define WAKE_ASTS 1 |
101 | +#define WAKE_TIMER 2 | |
102 | + | |
103 | +static struct list_head ast_queue; | |
104 | +static struct semaphore ast_queue_lock; | |
105 | +static wait_queue_head_t astd_waitchan; | |
106 | +struct task_struct * astd_task; | |
107 | +static unsigned long astd_wakeflags; | |
108 | + | |
109 | +static struct list_head _deadlockqueue; | |
110 | +static struct semaphore _deadlockqueue_lock; | |
111 | +static struct list_head _lockqueue; | |
112 | +static struct semaphore _lockqueue_lock; | |
113 | +static struct timer_list _lockqueue_timer; | |
114 | + | |
115 | +void add_to_lockqueue(struct dlm_lkb *lkb) | |
c1c6733f AM |
116 | +{ |
117 | + /* Time stamp the entry so we know if it's been waiting too long */ | |
118 | + lkb->lkb_lockqueue_time = jiffies; | |
119 | + | |
120 | + down(&_lockqueue_lock); | |
121 | + list_add(&lkb->lkb_lockqueue, &_lockqueue); | |
122 | + up(&_lockqueue_lock); | |
123 | +} | |
124 | + | |
b7b72b66 | 125 | +void remove_from_lockqueue(struct dlm_lkb *lkb) |
c1c6733f AM |
126 | +{ |
127 | + down(&_lockqueue_lock); | |
128 | + list_del(&lkb->lkb_lockqueue); | |
129 | + up(&_lockqueue_lock); | |
c783755a AM |
130 | + |
131 | +#ifdef CONFIG_DLM_STATS | |
132 | + dlm_stats.lockqueue_time[lkb->lkb_lockqueue_state] += (jiffies - lkb->lkb_lockqueue_time); | |
133 | + dlm_stats.lockqueue_locks[lkb->lkb_lockqueue_state]++; | |
134 | +#endif | |
135 | + lkb->lkb_lockqueue_state = 0; | |
c1c6733f AM |
136 | +} |
137 | + | |
b7b72b66 | 138 | +void add_to_deadlockqueue(struct dlm_lkb *lkb) |
c1c6733f AM |
139 | +{ |
140 | + if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags)) | |
141 | + return; | |
142 | + lkb->lkb_duetime = jiffies; | |
143 | + down(&_deadlockqueue_lock); | |
144 | + list_add(&lkb->lkb_deadlockq, &_deadlockqueue); | |
145 | + up(&_deadlockqueue_lock); | |
146 | +} | |
147 | + | |
b7b72b66 | 148 | +void remove_from_deadlockqueue(struct dlm_lkb *lkb) |
c1c6733f AM |
149 | +{ |
150 | + if (test_bit(LSFL_NOTIMERS, &lkb->lkb_resource->res_ls->ls_flags)) | |
151 | + return; | |
152 | + | |
153 | + down(&_deadlockqueue_lock); | |
154 | + list_del(&lkb->lkb_deadlockq); | |
155 | + up(&_deadlockqueue_lock); | |
156 | + | |
157 | + /* Invalidate the due time */ | |
158 | + memset(&lkb->lkb_duetime, 0, sizeof(lkb->lkb_duetime)); | |
159 | +} | |
160 | + | |
c1c6733f AM |
161 | +/* |
162 | + * Queue an AST for delivery, this will only deal with | |
163 | + * kernel ASTs, usermode API will piggyback on top of this. | |
164 | + * | |
165 | + * This can be called in either the user or DLM context. | |
b7b72b66 | 166 | + * ASTs are queued EVEN IF we are already running in dlm_astd |
c1c6733f AM |
167 | + * context as we don't know what other locks are held (eg we could |
168 | + * be being called from a lock operation that was called from | |
169 | + * another AST! | |
170 | + * If the AST is to be queued remotely then a message is sent to | |
171 | + * the target system via midcomms. | |
172 | + */ | |
173 | + | |
b7b72b66 | 174 | +void queue_ast(struct dlm_lkb *lkb, uint16_t flags, uint8_t rqmode) |
c1c6733f | 175 | +{ |
b7b72b66 | 176 | + struct dlm_request req; |
c1c6733f AM |
177 | + |
178 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
179 | + /* | |
180 | + * Send a message to have an ast queued remotely. Note: we do | |
181 | + * not send remote completion asts, they are handled as part of | |
182 | + * remote lock granting. | |
183 | + */ | |
b7b72b66 | 184 | + if (flags & AST_BAST) { |
c1c6733f AM |
185 | + req.rr_header.rh_cmd = GDLM_REMCMD_SENDBAST; |
186 | + req.rr_header.rh_length = sizeof(req); | |
187 | + req.rr_header.rh_flags = 0; | |
188 | + req.rr_header.rh_lkid = lkb->lkb_id; | |
189 | + req.rr_header.rh_lockspace = | |
190 | + lkb->lkb_resource->res_ls->ls_global_id; | |
191 | + req.rr_status = lkb->lkb_retstatus; | |
192 | + req.rr_remlkid = lkb->lkb_remid; | |
193 | + req.rr_rqmode = rqmode; | |
194 | + | |
195 | + midcomms_send_message(lkb->lkb_nodeid, &req.rr_header, | |
b7b72b66 | 196 | + lkb->lkb_resource->res_ls->ls_allocation); |
c1c6733f AM |
197 | + } else if (lkb->lkb_retstatus == -EDEADLOCK) { |
198 | + /* | |
199 | + * We only queue remote Completion ASTs here for error | |
200 | + * completions that happen out of band. | |
201 | + * DEADLOCK is one such. | |
202 | + */ | |
c1c6733f AM |
203 | + req.rr_header.rh_cmd = GDLM_REMCMD_SENDCAST; |
204 | + req.rr_header.rh_length = sizeof(req); | |
205 | + req.rr_header.rh_flags = 0; | |
206 | + req.rr_header.rh_lkid = lkb->lkb_id; | |
207 | + req.rr_header.rh_lockspace = | |
208 | + lkb->lkb_resource->res_ls->ls_global_id; | |
209 | + req.rr_status = lkb->lkb_retstatus; | |
210 | + req.rr_remlkid = lkb->lkb_remid; | |
211 | + req.rr_rqmode = rqmode; | |
212 | + | |
213 | + midcomms_send_message(lkb->lkb_nodeid, &req.rr_header, | |
b7b72b66 | 214 | + lkb->lkb_resource->res_ls->ls_allocation); |
c1c6733f AM |
215 | + } |
216 | + } else { | |
217 | + /* | |
b7b72b66 | 218 | + * Prepare info that will be returned in ast/bast. |
c1c6733f AM |
219 | + */ |
220 | + | |
b7b72b66 | 221 | + if (flags & AST_BAST) { |
c1c6733f AM |
222 | + lkb->lkb_bastmode = rqmode; |
223 | + } else { | |
224 | + lkb->lkb_lksb->sb_status = lkb->lkb_retstatus; | |
c1c6733f AM |
225 | + if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) |
226 | + lkb->lkb_lksb->sb_flags = DLM_SBF_DEMOTED; | |
227 | + else | |
228 | + lkb->lkb_lksb->sb_flags = 0; | |
229 | + } | |
230 | + | |
b7b72b66 AM |
231 | + down(&ast_queue_lock); |
232 | + if (!(lkb->lkb_astflags & (AST_COMP | AST_BAST))) | |
233 | + list_add_tail(&lkb->lkb_astqueue, &ast_queue); | |
234 | + lkb->lkb_astflags |= flags; | |
235 | + up(&ast_queue_lock); | |
c1c6733f AM |
236 | + |
237 | + /* It is the responsibility of the caller to call wake_astd() | |
238 | + * after it has finished other locking operations that request | |
239 | + * the ASTs to be delivered after */ | |
240 | + } | |
241 | +} | |
242 | + | |
243 | +/* | |
b7b72b66 | 244 | + * Process any LKBs on the AST queue. |
c1c6733f AM |
245 | + */ |
246 | + | |
247 | +static void process_asts(void) | |
248 | +{ | |
b7b72b66 AM |
249 | + struct dlm_ls *ls; |
250 | + struct dlm_rsb *rsb; | |
251 | + struct dlm_lkb *lkb; | |
252 | + void (*cast) (long param); | |
253 | + void (*bast) (long param, int mode); | |
254 | + long astparam; | |
255 | + uint16_t flags; | |
256 | + | |
257 | + for (;;) { | |
258 | + down(&ast_queue_lock); | |
259 | + if (list_empty(&ast_queue)) { | |
260 | + up(&ast_queue_lock); | |
261 | + break; | |
262 | + } | |
263 | + | |
264 | + lkb = list_entry(ast_queue.next, struct dlm_lkb, lkb_astqueue); | |
265 | + list_del(&lkb->lkb_astqueue); | |
266 | + flags = lkb->lkb_astflags; | |
267 | + lkb->lkb_astflags = 0; | |
268 | + up(&ast_queue_lock); | |
c1c6733f | 269 | + |
b7b72b66 AM |
270 | + cast = lkb->lkb_astaddr; |
271 | + bast = lkb->lkb_bastaddr; | |
272 | + astparam = lkb->lkb_astparam; | |
273 | + rsb = lkb->lkb_resource; | |
274 | + ls = rsb->res_ls; | |
c1c6733f | 275 | + |
b7b72b66 AM |
276 | + if (flags & AST_COMP) { |
277 | + if (flags & AST_DEL) { | |
278 | + DLM_ASSERT(lkb->lkb_astflags == 0,); | |
c1c6733f | 279 | + |
b7b72b66 AM |
280 | + /* FIXME: we don't want to block asts for other |
281 | + lockspaces while one is being recovered */ | |
c1c6733f | 282 | + |
b7b72b66 AM |
283 | + down_read(&ls->ls_in_recovery); |
284 | + release_lkb(ls, lkb); | |
285 | + release_rsb(rsb); | |
286 | + up_read(&ls->ls_in_recovery); | |
287 | + } | |
c1c6733f | 288 | + |
c783755a AM |
289 | + if (cast) { |
290 | +#ifdef CONFIG_DLM_STATS | |
291 | + dlm_stats.cast++; | |
292 | +#endif | |
b7b72b66 | 293 | + cast(astparam); |
c783755a | 294 | + } |
b7b72b66 AM |
295 | + } |
296 | + | |
297 | + if (flags & AST_BAST && !(flags & AST_DEL)) { | |
298 | + int bmode = lkb->lkb_bastmode; | |
299 | + | |
300 | + /* gr or rq mode of the lock may have changed since the | |
301 | + ast was queued making the delivery unnecessary */ | |
302 | + | |
303 | + if (!bast || dlm_modes_compat(lkb->lkb_grmode, bmode)) | |
304 | + continue; | |
305 | + | |
306 | + if (lkb->lkb_rqmode == DLM_LOCK_IV || | |
c783755a | 307 | + !dlm_modes_compat(lkb->lkb_rqmode, bmode)) { |
b7b72b66 | 308 | + bast(astparam, bmode); |
c783755a AM |
309 | +#ifdef CONFIG_DLM_STATS |
310 | + dlm_stats.bast++; | |
311 | +#endif | |
312 | + } | |
b7b72b66 | 313 | + } |
c1c6733f | 314 | + |
b7b72b66 | 315 | + schedule(); |
c1c6733f | 316 | + } |
c1c6733f AM |
317 | +} |
318 | + | |
b7b72b66 | 319 | +void lockqueue_lkb_mark(struct dlm_ls *ls) |
c1c6733f | 320 | +{ |
b7b72b66 | 321 | + struct dlm_lkb *lkb, *safe; |
c1c6733f AM |
322 | + int count = 0; |
323 | + | |
324 | + log_all(ls, "mark waiting requests"); | |
325 | + | |
326 | + down(&_lockqueue_lock); | |
327 | + | |
328 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
329 | + | |
330 | + if (lkb->lkb_resource->res_ls != ls) | |
331 | + continue; | |
332 | + | |
b7b72b66 AM |
333 | + log_debug(ls, "mark %x lq %d nodeid %d", lkb->lkb_id, |
334 | + lkb->lkb_lockqueue_state, lkb->lkb_nodeid); | |
335 | + | |
336 | + /* | |
c1c6733f AM |
337 | + * These lkb's are new and the master is being looked up. Mark |
338 | + * the lkb request to be resent. Even if the destination node | |
339 | + * for the request is still living and has our request, it will | |
340 | + * purge all resdir requests in purge_requestqueue. If there's | |
341 | + * a reply to the LOOKUP request in our requestqueue (the reply | |
342 | + * arrived after ls_stop), it is invalid and will be discarded | |
343 | + * in purge_requestqueue, too. | |
344 | + */ | |
345 | + | |
346 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) { | |
b7b72b66 AM |
347 | + DLM_ASSERT(lkb->lkb_nodeid == -1, |
348 | + print_lkb(lkb); | |
349 | + print_rsb(lkb->lkb_resource);); | |
c1c6733f AM |
350 | + |
351 | + lkb->lkb_flags |= GDLM_LKFLG_LQRESEND; | |
352 | + count++; | |
353 | + continue; | |
354 | + } | |
355 | + | |
b7b72b66 AM |
356 | + /* |
357 | + * We're waiting for an unlock reply and the master node from | |
358 | + * whom we're expecting the reply has failed. If there's a | |
359 | + * reply in the requestqueue do nothing and process it later in | |
360 | + * process_requestqueue. If there's no reply, don't rebuild | |
361 | + * the lkb on a new master, but just assume we've gotten an | |
362 | + * unlock completion reply from the prev master (this also | |
363 | + * means not resending the unlock request). If the unlock is | |
364 | + * for the last lkb on the rsb, the rsb has nodeid of -1 and | |
365 | + * the rsb won't be rebuilt on the new master either. | |
366 | + * | |
367 | + * If we're waiting for an unlock reply and the master node is | |
368 | + * still alive, we should either have a reply in the | |
369 | + * requestqueue from the master already, or we should get one | |
370 | + * from the master once recovery is complete. There is no | |
371 | + * rebuilding of the rsb/lkb in this case and no resending of | |
372 | + * the request. | |
373 | + */ | |
374 | + | |
375 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_UNLOCK) { | |
376 | + if (in_nodes_gone(ls, lkb->lkb_nodeid)) { | |
377 | + if (reply_in_requestqueue(ls, lkb->lkb_id)) { | |
378 | + lkb->lkb_flags |= GDLM_LKFLG_NOREBUILD; | |
379 | + log_debug(ls, "mark %x unlock have rep", | |
380 | + lkb->lkb_id); | |
381 | + } else { | |
382 | + /* assume we got reply fr old master */ | |
383 | + lkb->lkb_flags |= GDLM_LKFLG_NOREBUILD; | |
384 | + lkb->lkb_flags |= GDLM_LKFLG_UNLOCKDONE; | |
385 | + log_debug(ls, "mark %x unlock no rep", | |
386 | + lkb->lkb_id); | |
387 | + } | |
388 | + } | |
389 | + count++; | |
390 | + continue; | |
391 | + } | |
392 | + | |
393 | + /* | |
c1c6733f AM |
394 | + * These lkb's have an outstanding request to a bygone node. |
395 | + * The request will be redirected to the new master node in | |
396 | + * resend_cluster_requests(). Don't mark the request for | |
397 | + * resending if there's a reply for it saved in the | |
398 | + * requestqueue. | |
399 | + */ | |
400 | + | |
401 | + if (in_nodes_gone(ls, lkb->lkb_nodeid) && | |
402 | + !reply_in_requestqueue(ls, lkb->lkb_id)) { | |
403 | + | |
404 | + lkb->lkb_flags |= GDLM_LKFLG_LQRESEND; | |
405 | + | |
406 | + /* | |
407 | + * Don't rebuild this lkb on a new rsb in | |
408 | + * rebuild_rsbs_send(). | |
409 | + */ | |
410 | + | |
b7b72b66 AM |
411 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONDGRANT) { |
412 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_WAITING, | |
413 | + print_lkb(lkb); | |
414 | + print_rsb(lkb->lkb_resource);); | |
c1c6733f AM |
415 | + lkb->lkb_flags |= GDLM_LKFLG_NOREBUILD; |
416 | + } | |
417 | + | |
418 | + /* | |
419 | + * This flag indicates to the new master that his lkb | |
420 | + * is in the midst of a convert request and should be | |
421 | + * placed on the granted queue rather than the convert | |
422 | + * queue. We will resend this convert request to the | |
423 | + * new master. | |
424 | + */ | |
425 | + | |
b7b72b66 AM |
426 | + else if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_CONVERT) { |
427 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT, | |
428 | + print_lkb(lkb); | |
429 | + print_rsb(lkb->lkb_resource);); | |
c1c6733f AM |
430 | + lkb->lkb_flags |= GDLM_LKFLG_LQCONVERT; |
431 | + } | |
432 | + | |
433 | + count++; | |
434 | + } | |
435 | + } | |
436 | + up(&_lockqueue_lock); | |
437 | + | |
438 | + log_all(ls, "marked %d requests", count); | |
439 | +} | |
440 | + | |
b7b72b66 | 441 | +int resend_cluster_requests(struct dlm_ls *ls) |
c1c6733f | 442 | +{ |
b7b72b66 AM |
443 | + struct dlm_lkb *lkb, *safe; |
444 | + struct dlm_rsb *r; | |
c1c6733f AM |
445 | + int error = 0, state, count = 0; |
446 | + | |
447 | + log_all(ls, "resend marked requests"); | |
448 | + | |
449 | + down(&_lockqueue_lock); | |
450 | + | |
451 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
452 | + | |
453 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
454 | + log_debug(ls, "resend_cluster_requests: aborted"); | |
455 | + error = -EINTR; | |
456 | + break; | |
457 | + } | |
458 | + | |
b7b72b66 AM |
459 | + r = lkb->lkb_resource; |
460 | + | |
461 | + if (r->res_ls != ls) | |
c1c6733f AM |
462 | + continue; |
463 | + | |
b7b72b66 AM |
464 | + log_debug(ls, "resend %x lq %d flg %x node %d/%d \"%s\"", |
465 | + lkb->lkb_id, lkb->lkb_lockqueue_state, lkb->lkb_flags, | |
466 | + lkb->lkb_nodeid, r->res_nodeid, r->res_name); | |
c1c6733f | 467 | + |
b7b72b66 AM |
468 | + if (lkb->lkb_flags & GDLM_LKFLG_UNLOCKDONE) { |
469 | + log_debug(ls, "unlock done %x", lkb->lkb_id); | |
470 | + list_del(&lkb->lkb_lockqueue); | |
471 | + res_lkb_dequeue(lkb); | |
472 | + lkb->lkb_retstatus = -DLM_EUNLOCK; | |
473 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); | |
474 | + count++; | |
475 | + continue; | |
476 | + } | |
477 | + | |
478 | + /* | |
c1c6733f AM |
479 | + * Resend/process the lockqueue lkb's (in-progres requests) |
480 | + * that were flagged at the start of recovery in | |
481 | + * lockqueue_lkb_mark(). | |
482 | + */ | |
483 | + | |
484 | + if (lkb->lkb_flags & GDLM_LKFLG_LQRESEND) { | |
485 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQRESEND; | |
486 | + lkb->lkb_flags &= ~GDLM_LKFLG_NOREBUILD; | |
487 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT; | |
488 | + | |
489 | + if (lkb->lkb_nodeid == -1) { | |
490 | + /* | |
491 | + * Send lookup to new resdir node. | |
492 | + */ | |
493 | + lkb->lkb_lockqueue_time = jiffies; | |
494 | + send_cluster_request(lkb, | |
495 | + lkb->lkb_lockqueue_state); | |
496 | + } | |
497 | + | |
498 | + else if (lkb->lkb_nodeid != 0) { | |
499 | + /* | |
500 | + * There's a new RSB master (that's not us.) | |
501 | + */ | |
502 | + lkb->lkb_lockqueue_time = jiffies; | |
503 | + send_cluster_request(lkb, | |
504 | + lkb->lkb_lockqueue_state); | |
505 | + } | |
506 | + | |
507 | + else { | |
508 | + /* | |
509 | + * We are the new RSB master for this lkb | |
510 | + * request. | |
511 | + */ | |
512 | + state = lkb->lkb_lockqueue_state; | |
513 | + lkb->lkb_lockqueue_state = 0; | |
514 | + /* list_del equals remove_from_lockqueue() */ | |
515 | + list_del(&lkb->lkb_lockqueue); | |
b7b72b66 | 516 | + process_remastered_lkb(ls, lkb, state); |
c1c6733f AM |
517 | + } |
518 | + | |
519 | + count++; | |
520 | + } | |
521 | + } | |
522 | + up(&_lockqueue_lock); | |
523 | + | |
524 | + log_all(ls, "resent %d requests", count); | |
525 | + return error; | |
526 | +} | |
527 | + | |
528 | +/* | |
529 | + * Process any LKBs on the Lock queue, this | |
530 | + * just looks at the entries to see if they have been | |
531 | + * on the queue too long and fails the requests if so. | |
532 | + */ | |
533 | + | |
534 | +static void process_lockqueue(void) | |
535 | +{ | |
b7b72b66 AM |
536 | + struct dlm_lkb *lkb, *safe; |
537 | + struct dlm_ls *ls; | |
c1c6733f AM |
538 | + int count = 0; |
539 | + | |
540 | + down(&_lockqueue_lock); | |
541 | + | |
542 | + list_for_each_entry_safe(lkb, safe, &_lockqueue, lkb_lockqueue) { | |
543 | + ls = lkb->lkb_resource->res_ls; | |
544 | + | |
545 | + if (test_bit(LSFL_NOTIMERS, &ls->ls_flags)) | |
546 | + continue; | |
547 | + | |
548 | + /* Don't time out locks that are in transition */ | |
549 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) | |
550 | + continue; | |
551 | + | |
552 | + if (check_timeout(lkb->lkb_lockqueue_time, | |
553 | + dlm_config.lock_timeout)) { | |
554 | + count++; | |
555 | + list_del(&lkb->lkb_lockqueue); | |
556 | + up(&_lockqueue_lock); | |
557 | + cancel_lockop(lkb, -ETIMEDOUT); | |
558 | + down(&_lockqueue_lock); | |
559 | + } | |
560 | + } | |
561 | + up(&_lockqueue_lock); | |
562 | + | |
563 | + if (count) | |
564 | + wake_astd(); | |
565 | + | |
b7b72b66 AM |
566 | + mod_timer(&_lockqueue_timer, |
567 | + jiffies + ((dlm_config.lock_timeout >> 1) * HZ)); | |
c1c6733f AM |
568 | +} |
569 | + | |
570 | +/* Look for deadlocks */ | |
571 | +static void process_deadlockqueue(void) | |
572 | +{ | |
b7b72b66 | 573 | + struct dlm_lkb *lkb, *safe; |
c1c6733f AM |
574 | + |
575 | + down(&_deadlockqueue_lock); | |
576 | + | |
577 | + list_for_each_entry_safe(lkb, safe, &_deadlockqueue, lkb_deadlockq) { | |
b7b72b66 | 578 | + struct dlm_lkb *kill_lkb; |
c1c6733f AM |
579 | + |
580 | + /* Only look at "due" locks */ | |
581 | + if (!check_timeout(lkb->lkb_duetime, dlm_config.deadlocktime)) | |
582 | + break; | |
583 | + | |
584 | + /* Don't look at locks that are in transition */ | |
585 | + if (!test_bit(LSFL_LS_RUN, | |
586 | + &lkb->lkb_resource->res_ls->ls_flags)) | |
587 | + continue; | |
588 | + | |
589 | + up(&_deadlockqueue_lock); | |
590 | + | |
591 | + /* Lock has hit due time, check for conversion deadlock */ | |
592 | + kill_lkb = conversion_deadlock_check(lkb); | |
593 | + if (kill_lkb) | |
594 | + cancel_conversion(kill_lkb, -EDEADLOCK); | |
595 | + | |
596 | + down(&_deadlockqueue_lock); | |
597 | + } | |
598 | + up(&_deadlockqueue_lock); | |
599 | +} | |
600 | + | |
601 | +static __inline__ int no_asts(void) | |
602 | +{ | |
603 | + int ret; | |
604 | + | |
b7b72b66 AM |
605 | + down(&ast_queue_lock); |
606 | + ret = list_empty(&ast_queue); | |
607 | + up(&ast_queue_lock); | |
c1c6733f AM |
608 | + return ret; |
609 | +} | |
610 | + | |
611 | +static void lockqueue_timer_fn(unsigned long arg) | |
612 | +{ | |
b7b72b66 AM |
613 | + set_bit(WAKE_TIMER, &astd_wakeflags); |
614 | + wake_up(&astd_waitchan); | |
c1c6733f AM |
615 | +} |
616 | + | |
617 | +/* | |
618 | + * DLM daemon which delivers asts. | |
619 | + */ | |
620 | + | |
621 | +static int dlm_astd(void *data) | |
622 | +{ | |
b7b72b66 AM |
623 | + /* |
624 | + * Set a timer to check the lockqueue for dead locks (and deadlocks). | |
625 | + */ | |
c1c6733f AM |
626 | + INIT_LIST_HEAD(&_lockqueue); |
627 | + init_MUTEX(&_lockqueue_lock); | |
628 | + INIT_LIST_HEAD(&_deadlockqueue); | |
629 | + init_MUTEX(&_deadlockqueue_lock); | |
c1c6733f AM |
630 | + init_timer(&_lockqueue_timer); |
631 | + _lockqueue_timer.function = lockqueue_timer_fn; | |
632 | + _lockqueue_timer.data = 0; | |
633 | + mod_timer(&_lockqueue_timer, | |
634 | + jiffies + ((dlm_config.lock_timeout >> 1) * HZ)); | |
635 | + | |
b7b72b66 | 636 | + while (!kthread_should_stop()) { |
bb1d8b11 | 637 | + wchan_cond_sleep_intr(astd_waitchan, !test_bit(WAKE_ASTS, &astd_wakeflags)); |
c1c6733f | 638 | + |
b7b72b66 | 639 | + if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) |
c1c6733f AM |
640 | + process_asts(); |
641 | + | |
b7b72b66 | 642 | + if (test_and_clear_bit(WAKE_TIMER, &astd_wakeflags)) { |
c1c6733f AM |
643 | + process_lockqueue(); |
644 | + if (dlm_config.deadlocktime) | |
645 | + process_deadlockqueue(); | |
646 | + } | |
647 | + } | |
648 | + | |
649 | + if (timer_pending(&_lockqueue_timer)) | |
650 | + del_timer(&_lockqueue_timer); | |
651 | + | |
c1c6733f AM |
652 | + return 0; |
653 | +} | |
654 | + | |
655 | +void wake_astd(void) | |
656 | +{ | |
bb1d8b11 AM |
657 | + if (!no_asts()) { |
658 | + set_bit(WAKE_ASTS, &astd_wakeflags); | |
659 | + wake_up(&astd_waitchan); | |
660 | + } | |
c1c6733f AM |
661 | +} |
662 | + | |
b7b72b66 | 663 | +int astd_start(void) |
c1c6733f | 664 | +{ |
b7b72b66 AM |
665 | + struct task_struct *p; |
666 | + int error = 0; | |
667 | + | |
668 | + INIT_LIST_HEAD(&ast_queue); | |
669 | + init_MUTEX(&ast_queue_lock); | |
670 | + init_waitqueue_head(&astd_waitchan); | |
671 | + | |
d3b4771f | 672 | + p = kthread_run(dlm_astd, NULL, 0, "dlm_astd"); |
b7b72b66 AM |
673 | + if (IS_ERR(p)) |
674 | + error = PTR_ERR(p); | |
675 | + else | |
676 | + astd_task = p; | |
677 | + return error; | |
c1c6733f AM |
678 | +} |
679 | + | |
b7b72b66 | 680 | +void astd_stop(void) |
c1c6733f | 681 | +{ |
b7b72b66 AM |
682 | + kthread_stop(astd_task); |
683 | + wake_up(&astd_waitchan); | |
c1c6733f AM |
684 | +} |
685 | diff -urN linux-orig/cluster/dlm/ast.h linux-patched/cluster/dlm/ast.h | |
686 | --- linux-orig/cluster/dlm/ast.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 687 | +++ linux-patched/cluster/dlm/ast.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 688 | @@ -0,0 +1,28 @@ |
c1c6733f AM |
689 | +/****************************************************************************** |
690 | +******************************************************************************* | |
691 | +** | |
692 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
693 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
694 | +** | |
695 | +** This copyrighted material is made available to anyone wishing to use, | |
696 | +** modify, copy, or redistribute it subject to the terms and conditions | |
697 | +** of the GNU General Public License v.2. | |
698 | +** | |
699 | +******************************************************************************* | |
700 | +******************************************************************************/ | |
701 | + | |
702 | +#ifndef __AST_DOT_H__ | |
703 | +#define __AST_DOT_H__ | |
704 | + | |
b7b72b66 AM |
705 | +void lockqueue_lkb_mark(struct dlm_ls *ls); |
706 | +int resend_cluster_requests(struct dlm_ls *ls); | |
707 | +void add_to_lockqueue(struct dlm_lkb *lkb); | |
708 | +void remove_from_lockqueue(struct dlm_lkb *lkb); | |
709 | +void add_to_deadlockqueue(struct dlm_lkb *lkb); | |
710 | +void remove_from_deadlockqueue(struct dlm_lkb *lkb); | |
711 | +void queue_ast(struct dlm_lkb *lkb, uint16_t astflags, uint8_t rqmode); | |
c1c6733f AM |
712 | +void wake_astd(void); |
713 | +int astd_start(void); | |
714 | +void astd_stop(void); | |
715 | + | |
716 | +#endif /* __AST_DOT_H__ */ | |
717 | diff -urN linux-orig/cluster/dlm/config.c linux-patched/cluster/dlm/config.c | |
718 | --- linux-orig/cluster/dlm/config.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 719 | +++ linux-patched/cluster/dlm/config.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 720 | @@ -0,0 +1,137 @@ |
c1c6733f AM |
721 | +/****************************************************************************** |
722 | +******************************************************************************* | |
723 | +** | |
724 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
725 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
726 | +** | |
727 | +** This copyrighted material is made available to anyone wishing to use, | |
728 | +** modify, copy, or redistribute it subject to the terms and conditions | |
729 | +** of the GNU General Public License v.2. | |
730 | +** | |
731 | +******************************************************************************* | |
732 | +******************************************************************************/ | |
733 | + | |
734 | +#include <linux/module.h> | |
735 | +#include <linux/proc_fs.h> | |
736 | + | |
737 | +#include "dlm_internal.h" | |
738 | +#include "lowcomms.h" | |
739 | +#include "config.h" | |
740 | + | |
741 | +/* Config file defaults */ | |
742 | +#define DEFAULT_TCP_PORT 21064 | |
743 | +#define DEFAULT_LOCK_TIMEOUT 30 | |
744 | +#define DEFAULT_BUFFER_SIZE 4096 | |
b7b72b66 AM |
745 | +#define DEFAULT_RSBTBL_SIZE 256 |
746 | +#define DEFAULT_LKBTBL_SIZE 1024 | |
747 | +#define DEFAULT_DIRTBL_SIZE 512 | |
c783755a | 748 | +#define DEFAULT_CONN_INCREMENT 32 |
c1c6733f | 749 | +#define DEFAULT_DEADLOCKTIME 10 |
b7b72b66 | 750 | +#define DEFAULT_RECOVER_TIMER 5 |
c1c6733f AM |
751 | + |
752 | +struct config_info dlm_config = { | |
753 | + .tcp_port = DEFAULT_TCP_PORT, | |
754 | + .lock_timeout = DEFAULT_LOCK_TIMEOUT, | |
755 | + .buffer_size = DEFAULT_BUFFER_SIZE, | |
b7b72b66 AM |
756 | + .rsbtbl_size = DEFAULT_RSBTBL_SIZE, |
757 | + .lkbtbl_size = DEFAULT_LKBTBL_SIZE, | |
758 | + .dirtbl_size = DEFAULT_DIRTBL_SIZE, | |
c783755a | 759 | + .conn_increment = DEFAULT_CONN_INCREMENT, |
c1c6733f | 760 | + .deadlocktime = DEFAULT_DEADLOCKTIME, |
b7b72b66 | 761 | + .recover_timer = DEFAULT_RECOVER_TIMER |
c1c6733f AM |
762 | +}; |
763 | + | |
764 | + | |
765 | +static struct config_proc_info { | |
766 | + char *name; | |
767 | + int *value; | |
768 | +} config_proc[] = { | |
769 | + { | |
770 | + .name = "tcp_port", | |
771 | + .value = &dlm_config.tcp_port, | |
772 | + }, | |
773 | + { | |
774 | + .name = "lock_timeout", | |
775 | + .value = &dlm_config.lock_timeout, | |
776 | + }, | |
777 | + { | |
778 | + .name = "buffer_size", | |
779 | + .value = &dlm_config.buffer_size, | |
780 | + }, | |
781 | + { | |
b7b72b66 AM |
782 | + .name = "rsbtbl_size", |
783 | + .value = &dlm_config.rsbtbl_size, | |
784 | + }, | |
785 | + { | |
786 | + .name = "lkbtbl_size", | |
787 | + .value = &dlm_config.lkbtbl_size, | |
c1c6733f AM |
788 | + }, |
789 | + { | |
b7b72b66 AM |
790 | + .name = "dirtbl_size", |
791 | + .value = &dlm_config.dirtbl_size, | |
c1c6733f AM |
792 | + }, |
793 | + { | |
c783755a AM |
794 | + .name = "conn_increment", |
795 | + .value = &dlm_config.conn_increment, | |
c1c6733f AM |
796 | + }, |
797 | + { | |
798 | + .name = "deadlocktime", | |
799 | + .value = &dlm_config.deadlocktime, | |
800 | + }, | |
b7b72b66 AM |
801 | + { |
802 | + .name = "recover_timer", | |
803 | + .value = &dlm_config.recover_timer, | |
804 | + } | |
c1c6733f AM |
805 | +}; |
806 | +static struct proc_dir_entry *dlm_dir; | |
807 | + | |
808 | +static int dlm_config_read_proc(char *page, char **start, off_t off, int count, | |
809 | + int *eof, void *data) | |
810 | +{ | |
811 | + struct config_proc_info *cinfo = data; | |
812 | + return snprintf(page, count, "%d\n", *cinfo->value); | |
813 | +} | |
814 | + | |
815 | +static int dlm_config_write_proc(struct file *file, const char *buffer, | |
816 | + unsigned long count, void *data) | |
817 | +{ | |
818 | + struct config_proc_info *cinfo = data; | |
819 | + int value; | |
820 | + char *end; | |
821 | + | |
822 | + value = simple_strtoul(buffer, &end, 10); | |
823 | + if (*end) | |
824 | + *cinfo->value = value; | |
825 | + return count; | |
826 | +} | |
827 | + | |
828 | +int dlm_config_init(void) | |
829 | +{ | |
830 | + int i; | |
831 | + struct proc_dir_entry *pde; | |
832 | + | |
833 | + dlm_dir = proc_mkdir("cluster/config/dlm", 0); | |
834 | + if (!dlm_dir) | |
835 | + return -1; | |
836 | + | |
837 | + dlm_dir->owner = THIS_MODULE; | |
838 | + | |
839 | + for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) { | |
840 | + pde = create_proc_entry(config_proc[i].name, 0660, dlm_dir); | |
841 | + if (pde) { | |
842 | + pde->data = &config_proc[i]; | |
843 | + pde->write_proc = dlm_config_write_proc; | |
844 | + pde->read_proc = dlm_config_read_proc; | |
845 | + } | |
846 | + } | |
847 | + return 0; | |
848 | +} | |
849 | + | |
850 | +void dlm_config_exit(void) | |
851 | +{ | |
852 | + int i; | |
853 | + | |
854 | + for (i=0; i<sizeof(config_proc)/sizeof(struct config_proc_info); i++) | |
855 | + remove_proc_entry(config_proc[i].name, dlm_dir); | |
856 | + remove_proc_entry("cluster/config/dlm", NULL); | |
857 | +} | |
858 | diff -urN linux-orig/cluster/dlm/config.h linux-patched/cluster/dlm/config.h | |
859 | --- linux-orig/cluster/dlm/config.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 860 | +++ linux-patched/cluster/dlm/config.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 861 | @@ -0,0 +1,33 @@ |
c1c6733f AM |
862 | +/****************************************************************************** |
863 | +******************************************************************************* | |
864 | +** | |
865 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
866 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
867 | +** | |
868 | +** This copyrighted material is made available to anyone wishing to use, | |
869 | +** modify, copy, or redistribute it subject to the terms and conditions | |
870 | +** of the GNU General Public License v.2. | |
871 | +** | |
872 | +******************************************************************************* | |
873 | +******************************************************************************/ | |
874 | + | |
875 | +#ifndef __CONFIG_DOT_H__ | |
876 | +#define __CONFIG_DOT_H__ | |
877 | + | |
878 | +struct config_info { | |
879 | + int tcp_port; | |
880 | + int lock_timeout; | |
881 | + int buffer_size; | |
b7b72b66 AM |
882 | + int rsbtbl_size; |
883 | + int lkbtbl_size; | |
884 | + int dirtbl_size; | |
c783755a | 885 | + int conn_increment; |
c1c6733f | 886 | + int deadlocktime; |
b7b72b66 | 887 | + int recover_timer; |
c1c6733f AM |
888 | +}; |
889 | + | |
890 | +extern struct config_info dlm_config; | |
891 | +extern int dlm_config_init(void); | |
892 | +extern void dlm_config_exit(void); | |
893 | + | |
894 | +#endif /* __CONFIG_DOT_H__ */ | |
895 | diff -urN linux-orig/cluster/dlm/device.c linux-patched/cluster/dlm/device.c | |
896 | --- linux-orig/cluster/dlm/device.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 897 | +++ linux-patched/cluster/dlm/device.c 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 898 | @@ -0,0 +1,1212 @@ |
c1c6733f AM |
899 | +/****************************************************************************** |
900 | +******************************************************************************* | |
901 | +** | |
902 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
903 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
904 | +** | |
905 | +** This copyrighted material is made available to anyone wishing to use, | |
906 | +** modify, copy, or redistribute it subject to the terms and conditions | |
907 | +** of the GNU General Public License v.2. | |
908 | +** | |
909 | +******************************************************************************* | |
910 | +******************************************************************************/ | |
911 | + | |
912 | +/* | |
913 | + * device.c | |
914 | + * | |
915 | + * This is the userland interface to the DLM. | |
916 | + * | |
917 | + * The locking is done via a misc char device (find the | |
918 | + * registered minor number in /proc/misc). | |
919 | + * | |
920 | + * User code should not use this interface directly but | |
921 | + * call the library routines in libdlm.a instead. | |
922 | + * | |
923 | + */ | |
924 | + | |
925 | +#include <linux/miscdevice.h> | |
926 | +#include <linux/init.h> | |
927 | +#include <linux/wait.h> | |
928 | +#include <linux/module.h> | |
929 | +#include <linux/file.h> | |
930 | +#include <linux/fs.h> | |
931 | +#include <linux/poll.h> | |
932 | +#include <linux/signal.h> | |
933 | +#include <linux/spinlock.h> | |
934 | +#include <asm/ioctls.h> | |
935 | + | |
936 | +#include "dlm_internal.h" | |
937 | +#include "device.h" | |
938 | + | |
b7b72b66 | 939 | +extern struct dlm_lkb *dlm_get_lkb(struct dlm_ls *, int); |
c1c6733f AM |
940 | +static struct file_operations _dlm_fops; |
941 | +static const char *name_prefix="dlm"; | |
942 | +static struct list_head user_ls_list; | |
b7b72b66 | 943 | +static struct semaphore user_ls_lock; |
c1c6733f AM |
944 | + |
945 | +/* Flags in li_flags */ | |
946 | +#define LI_FLAG_COMPLETE 1 | |
947 | +#define LI_FLAG_FIRSTLOCK 2 | |
948 | + | |
c783755a AM |
949 | +#define LOCKINFO_MAGIC 0x53595324 |
950 | + | |
c1c6733f | 951 | +struct lock_info { |
c783755a | 952 | + uint32_t li_magic; |
c1c6733f AM |
953 | + uint8_t li_cmd; |
954 | + struct dlm_lksb li_lksb; | |
955 | + wait_queue_head_t li_waitq; | |
956 | + unsigned long li_flags; | |
b7b72b66 AM |
957 | + void __user *li_castparam; |
958 | + void __user *li_castaddr; | |
959 | + void __user *li_bastparam; | |
c1c6733f | 960 | + void __user *li_bastaddr; |
b7b72b66 AM |
961 | + void __user *li_pend_bastparam; |
962 | + void __user *li_pend_bastaddr; | |
963 | + void __user *li_user_lvbptr; | |
964 | + struct list_head li_ownerqueue; | |
c1c6733f AM |
965 | + struct file_info *li_file; |
966 | + struct dlm_lksb __user *li_user_lksb; | |
967 | + struct semaphore li_firstlock; | |
968 | + struct dlm_queryinfo *li_queryinfo; | |
969 | + struct dlm_queryinfo __user *li_user_queryinfo; | |
970 | +}; | |
971 | + | |
972 | +/* A queued AST no less */ | |
973 | +struct ast_info { | |
974 | + struct dlm_lock_result result; | |
975 | + struct dlm_queryinfo *queryinfo; | |
976 | + struct dlm_queryinfo __user *user_queryinfo; | |
977 | + struct list_head list; | |
b7b72b66 AM |
978 | + void __user *user_lvbptr; |
979 | + uint32_t ast_reason; /* AST_COMP or AST_BAST from dlm_internal.h */ | |
c1c6733f AM |
980 | +}; |
981 | + | |
982 | +/* One of these per userland lockspace */ | |
983 | +struct user_ls { | |
984 | + void *ls_lockspace; | |
985 | + atomic_t ls_refcnt; | |
986 | + long ls_flags; /* bit 1 means LS has been deleted */ | |
987 | + | |
988 | + /* Passed into misc_register() */ | |
989 | + struct miscdevice ls_miscinfo; | |
990 | + struct list_head ls_list; | |
991 | +}; | |
992 | + | |
993 | +/* misc_device info for the control device */ | |
994 | +static struct miscdevice ctl_device; | |
995 | + | |
996 | +/* | |
997 | + * Stuff we hang off the file struct. | |
998 | + * The first two are to cope with unlocking all the | |
999 | + * locks help by a process when it dies. | |
1000 | + */ | |
1001 | +struct file_info { | |
1002 | + struct list_head fi_lkb_list; /* List of active lkbs */ | |
1003 | + spinlock_t fi_lkb_lock; | |
1004 | + struct list_head fi_ast_list; /* Queue of ASTs to be delivered */ | |
1005 | + spinlock_t fi_ast_lock; | |
1006 | + wait_queue_head_t fi_wait; | |
1007 | + struct user_ls *fi_ls; | |
1008 | + atomic_t fi_refcnt; /* Number of users */ | |
1009 | + unsigned long fi_flags; /* Bit 1 means the device is open */ | |
1010 | +}; | |
1011 | + | |
1012 | + | |
1013 | +/* get and put ops for file_info. | |
1014 | + Actually I don't really like "get" and "put", but everyone | |
1015 | + else seems to use them and I can't think of anything | |
1016 | + nicer at the moment */ | |
1017 | +static void get_file_info(struct file_info *f) | |
1018 | +{ | |
1019 | + atomic_inc(&f->fi_refcnt); | |
1020 | +} | |
1021 | + | |
1022 | +static void put_file_info(struct file_info *f) | |
1023 | +{ | |
1024 | + if (atomic_dec_and_test(&f->fi_refcnt)) | |
1025 | + kfree(f); | |
1026 | +} | |
1027 | + | |
b7b72b66 AM |
1028 | +static void release_lockinfo(struct lock_info *li) |
1029 | +{ | |
1030 | + put_file_info(li->li_file); | |
1031 | + if (li->li_lksb.sb_lvbptr && li->li_cmd != DLM_USER_QUERY) | |
1032 | + kfree(li->li_lksb.sb_lvbptr); | |
1033 | + kfree(li); | |
1034 | +} | |
1035 | + | |
1036 | +static struct user_ls *__find_lockspace(int minor) | |
c1c6733f AM |
1037 | +{ |
1038 | + struct user_ls *lsinfo; | |
1039 | + | |
1040 | + list_for_each_entry(lsinfo, &user_ls_list, ls_list) { | |
1041 | + | |
1042 | + if (lsinfo->ls_miscinfo.minor == minor) | |
1043 | + return lsinfo; | |
1044 | + } | |
1045 | + return NULL; | |
1046 | +} | |
1047 | + | |
b7b72b66 AM |
1048 | +/* Find a lockspace struct given the device minor number */ |
1049 | +static struct user_ls *find_lockspace(int minor) | |
1050 | +{ | |
1051 | + struct user_ls *lsinfo; | |
1052 | + | |
1053 | + down(&user_ls_lock); | |
1054 | + lsinfo = __find_lockspace(minor); | |
1055 | + up(&user_ls_lock); | |
1056 | + | |
1057 | + return lsinfo; | |
1058 | +} | |
1059 | + | |
c1c6733f AM |
1060 | +static void add_lockspace_to_list(struct user_ls *lsinfo) |
1061 | +{ | |
b7b72b66 | 1062 | + down(&user_ls_lock); |
c1c6733f | 1063 | + list_add(&lsinfo->ls_list, &user_ls_list); |
b7b72b66 | 1064 | + up(&user_ls_lock); |
c1c6733f AM |
1065 | +} |
1066 | + | |
1067 | +/* Register a lockspace with the DLM and create a misc | |
1068 | + device for userland to access it */ | |
1069 | +static int register_lockspace(char *name, struct user_ls **ls) | |
1070 | +{ | |
1071 | + struct user_ls *newls; | |
1072 | + int status; | |
1073 | + int namelen; | |
1074 | + | |
1075 | + namelen = strlen(name)+strlen(name_prefix)+2; | |
1076 | + | |
1077 | + newls = kmalloc(sizeof(struct user_ls), GFP_KERNEL); | |
1078 | + if (!newls) | |
1079 | + return -ENOMEM; | |
1080 | + memset(newls, 0, sizeof(struct user_ls)); | |
1081 | + | |
1082 | + newls->ls_miscinfo.name = kmalloc(namelen, GFP_KERNEL); | |
1083 | + if (!newls->ls_miscinfo.name) { | |
1084 | + kfree(newls); | |
1085 | + return -ENOMEM; | |
1086 | + } | |
c783755a AM |
1087 | + status = dlm_new_lockspace(name, strlen(name), |
1088 | + &newls->ls_lockspace, 0); | |
c1c6733f AM |
1089 | + |
1090 | + if (status != 0) { | |
1091 | + kfree(newls->ls_miscinfo.name); | |
1092 | + kfree(newls); | |
1093 | + return status; | |
1094 | + } | |
1095 | + | |
c783755a AM |
1096 | + snprintf((char*)newls->ls_miscinfo.name, namelen, "%s_%s", name_prefix, name); |
1097 | + | |
c1c6733f AM |
1098 | + newls->ls_miscinfo.fops = &_dlm_fops; |
1099 | + newls->ls_miscinfo.minor = MISC_DYNAMIC_MINOR; | |
1100 | + | |
1101 | + status = misc_register(&newls->ls_miscinfo); | |
1102 | + if (status) { | |
1103 | + log_print("failed to register misc device for %s", name); | |
1104 | + dlm_release_lockspace(newls->ls_lockspace, 0); | |
1105 | + kfree(newls->ls_miscinfo.name); | |
1106 | + kfree(newls); | |
1107 | + return status; | |
1108 | + } | |
1109 | + | |
1110 | + | |
1111 | + add_lockspace_to_list(newls); | |
1112 | + *ls = newls; | |
1113 | + return 0; | |
1114 | +} | |
1115 | + | |
b7b72b66 | 1116 | +/* Called with the user_ls_lock semaphore held */ |
c1c6733f AM |
1117 | +static int unregister_lockspace(struct user_ls *lsinfo, int force) |
1118 | +{ | |
1119 | + int status; | |
1120 | + | |
1121 | + status = dlm_release_lockspace(lsinfo->ls_lockspace, force); | |
1122 | + if (status) | |
1123 | + return status; | |
1124 | + | |
1125 | + status = misc_deregister(&lsinfo->ls_miscinfo); | |
1126 | + if (status) | |
1127 | + return status; | |
1128 | + | |
1129 | + list_del(&lsinfo->ls_list); | |
b7b72b66 AM |
1130 | + set_bit(1, &lsinfo->ls_flags); /* LS has been deleted */ |
1131 | + lsinfo->ls_lockspace = NULL; | |
1132 | + if (atomic_dec_and_test(&lsinfo->ls_refcnt)) { | |
1133 | + kfree(lsinfo->ls_miscinfo.name); | |
1134 | + kfree(lsinfo); | |
1135 | + } | |
c1c6733f AM |
1136 | + |
1137 | + return 0; | |
1138 | +} | |
1139 | + | |
1140 | +/* Add it to userland's AST queue */ | |
b7b72b66 | 1141 | +static void add_to_astqueue(struct lock_info *li, void *astaddr, void *astparam, uint32_t reason) |
c1c6733f AM |
1142 | +{ |
1143 | + struct ast_info *ast = kmalloc(sizeof(struct ast_info), GFP_KERNEL); | |
1144 | + if (!ast) | |
1145 | + return; | |
1146 | + | |
b7b72b66 | 1147 | + ast->result.astparam = astparam; |
c1c6733f AM |
1148 | + ast->result.astaddr = astaddr; |
1149 | + ast->result.user_lksb = li->li_user_lksb; | |
1150 | + ast->result.cmd = li->li_cmd; | |
b7b72b66 AM |
1151 | + ast->user_lvbptr = li->li_user_lvbptr; |
1152 | + ast->ast_reason = reason; | |
c1c6733f AM |
1153 | + memcpy(&ast->result.lksb, &li->li_lksb, sizeof(struct dlm_lksb)); |
1154 | + | |
1155 | + /* These two will both be NULL for anything other than queries */ | |
1156 | + ast->queryinfo = li->li_queryinfo; | |
1157 | + ast->user_queryinfo = li->li_user_queryinfo; | |
1158 | + | |
1159 | + spin_lock(&li->li_file->fi_ast_lock); | |
1160 | + list_add_tail(&ast->list, &li->li_file->fi_ast_list); | |
1161 | + spin_unlock(&li->li_file->fi_ast_lock); | |
1162 | + wake_up_interruptible(&li->li_file->fi_wait); | |
1163 | +} | |
1164 | + | |
1165 | +static void bast_routine(void *param, int mode) | |
1166 | +{ | |
1167 | + struct lock_info *li = param; | |
1168 | + | |
b7b72b66 AM |
1169 | + if (li && li->li_bastaddr) { |
1170 | + add_to_astqueue(li, li->li_bastaddr, li->li_bastparam, AST_BAST); | |
c1c6733f AM |
1171 | + } |
1172 | +} | |
1173 | + | |
1174 | +/* | |
1175 | + * This is the kernel's AST routine. | |
1176 | + * All lock, unlock & query operations complete here. | |
1177 | + * The only syncronous ops are those done during device close. | |
1178 | + */ | |
1179 | +static void ast_routine(void *param) | |
1180 | +{ | |
1181 | + struct lock_info *li = param; | |
1182 | + | |
1183 | + /* Param may be NULL if a persistent lock is unlocked by someone else */ | |
b7b72b66 | 1184 | + if (!li) |
c1c6733f AM |
1185 | + return; |
1186 | + | |
b7b72b66 AM |
1187 | + /* If this is a succesful conversion then activate the blocking ast |
1188 | + * args from the conversion request */ | |
1189 | + if (!test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | |
1190 | + li->li_lksb.sb_status == 0) { | |
1191 | + | |
1192 | + li->li_bastparam = li->li_pend_bastparam; | |
1193 | + li->li_bastaddr = li->li_pend_bastaddr; | |
1194 | + li->li_pend_bastaddr = NULL; | |
1195 | + } | |
1196 | + | |
c1c6733f | 1197 | + /* If it's an async request then post data to the user's AST queue. */ |
b7b72b66 | 1198 | + if (li->li_castaddr) { |
c1c6733f AM |
1199 | + |
1200 | + /* Only queue AST if the device is still open */ | |
1201 | + if (test_bit(1, &li->li_file->fi_flags)) | |
b7b72b66 | 1202 | + add_to_astqueue(li, li->li_castaddr, li->li_castparam, AST_COMP); |
c1c6733f AM |
1203 | + |
1204 | + /* If it's a new lock operation that failed, then | |
1205 | + * remove it from the owner queue and free the | |
1206 | + * lock_info. The DLM will not free the LKB until this | |
1207 | + * AST has completed. | |
1208 | + */ | |
1209 | + if (test_and_clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags) && | |
1210 | + li->li_lksb.sb_status != 0) { | |
b7b72b66 | 1211 | + struct dlm_lkb *lkb; |
c1c6733f AM |
1212 | + |
1213 | + /* Wait till dlm_lock() has finished */ | |
1214 | + down(&li->li_firstlock); | |
b7b72b66 AM |
1215 | + up(&li->li_firstlock); |
1216 | + | |
1217 | + /* If the LKB has been freed then we need to tidy up too */ | |
c1c6733f | 1218 | + lkb = dlm_get_lkb(li->li_file->fi_ls->ls_lockspace, li->li_lksb.sb_lkid); |
b7b72b66 | 1219 | + if (!lkb) { |
c1c6733f | 1220 | + spin_lock(&li->li_file->fi_lkb_lock); |
b7b72b66 | 1221 | + list_del(&li->li_ownerqueue); |
c1c6733f | 1222 | + spin_unlock(&li->li_file->fi_lkb_lock); |
b7b72b66 AM |
1223 | + |
1224 | + release_lockinfo(li); | |
c1c6733f | 1225 | + } |
c1c6733f AM |
1226 | + return; |
1227 | + } | |
1228 | + /* Free unlocks & queries */ | |
1229 | + if (li->li_lksb.sb_status == -DLM_EUNLOCK || | |
1230 | + li->li_cmd == DLM_USER_QUERY) { | |
b7b72b66 | 1231 | + release_lockinfo(li); |
c1c6733f AM |
1232 | + } |
1233 | + } | |
1234 | + else { | |
b7b72b66 | 1235 | + /* Synchronous request, just wake up the caller */ |
c1c6733f AM |
1236 | + set_bit(LI_FLAG_COMPLETE, &li->li_flags); |
1237 | + wake_up_interruptible(&li->li_waitq); | |
1238 | + } | |
1239 | +} | |
1240 | + | |
1241 | +/* | |
1242 | + * Wait for the lock op to complete and return the status. | |
1243 | + */ | |
1244 | +static int wait_for_ast(struct lock_info *li) | |
1245 | +{ | |
1246 | + /* Wait for the AST routine to complete */ | |
1247 | + set_task_state(current, TASK_INTERRUPTIBLE); | |
1248 | + while (!test_bit(LI_FLAG_COMPLETE, &li->li_flags)) | |
1249 | + schedule(); | |
1250 | + | |
1251 | + set_task_state(current, TASK_RUNNING); | |
1252 | + | |
1253 | + return li->li_lksb.sb_status; | |
1254 | +} | |
1255 | + | |
1256 | + | |
1257 | +/* Open on control device */ | |
1258 | +static int dlm_ctl_open(struct inode *inode, struct file *file) | |
1259 | +{ | |
1260 | + return 0; | |
1261 | +} | |
1262 | + | |
1263 | +/* Close on control device */ | |
1264 | +static int dlm_ctl_close(struct inode *inode, struct file *file) | |
1265 | +{ | |
1266 | + return 0; | |
1267 | +} | |
1268 | + | |
1269 | +/* Open on lockspace device */ | |
1270 | +static int dlm_open(struct inode *inode, struct file *file) | |
1271 | +{ | |
1272 | + struct file_info *f; | |
1273 | + struct user_ls *lsinfo; | |
1274 | + | |
1275 | + lsinfo = find_lockspace(iminor(inode)); | |
1276 | + if (!lsinfo) | |
1277 | + return -ENOENT; | |
1278 | + | |
1279 | + f = kmalloc(sizeof(struct file_info), GFP_KERNEL); | |
1280 | + if (!f) | |
1281 | + return -ENOMEM; | |
1282 | + | |
1283 | + atomic_inc(&lsinfo->ls_refcnt); | |
1284 | + INIT_LIST_HEAD(&f->fi_lkb_list); | |
1285 | + INIT_LIST_HEAD(&f->fi_ast_list); | |
1286 | + spin_lock_init(&f->fi_ast_lock); | |
1287 | + spin_lock_init(&f->fi_lkb_lock); | |
1288 | + init_waitqueue_head(&f->fi_wait); | |
1289 | + f->fi_ls = lsinfo; | |
1290 | + atomic_set(&f->fi_refcnt, 1); | |
1291 | + set_bit(1, &f->fi_flags); | |
1292 | + | |
1293 | + file->private_data = f; | |
1294 | + | |
1295 | + return 0; | |
1296 | +} | |
1297 | + | |
1298 | +/* Check the user's version matches ours */ | |
1299 | +static int check_version(struct dlm_lock_params *params) | |
1300 | +{ | |
1301 | + if (params->version[0] != DLM_DEVICE_VERSION_MAJOR || | |
1302 | + (params->version[0] == DLM_DEVICE_VERSION_MAJOR && | |
1303 | + params->version[1] > DLM_DEVICE_VERSION_MINOR)) { | |
1304 | + | |
1305 | + log_print("version mismatch user (%d.%d.%d) kernel (%d.%d.%d)", | |
1306 | + params->version[0], | |
1307 | + params->version[1], | |
1308 | + params->version[2], | |
1309 | + DLM_DEVICE_VERSION_MAJOR, | |
1310 | + DLM_DEVICE_VERSION_MINOR, | |
1311 | + DLM_DEVICE_VERSION_PATCH); | |
1312 | + return -EINVAL; | |
1313 | + } | |
1314 | + return 0; | |
1315 | +} | |
1316 | + | |
1317 | +/* Close on lockspace device */ | |
1318 | +static int dlm_close(struct inode *inode, struct file *file) | |
1319 | +{ | |
1320 | + struct file_info *f = file->private_data; | |
1321 | + struct lock_info li; | |
b7b72b66 | 1322 | + struct lock_info *old_li, *safe; |
c1c6733f AM |
1323 | + sigset_t tmpsig; |
1324 | + sigset_t allsigs; | |
c1c6733f AM |
1325 | + struct user_ls *lsinfo; |
1326 | + DECLARE_WAITQUEUE(wq, current); | |
1327 | + | |
1328 | + lsinfo = find_lockspace(iminor(inode)); | |
1329 | + if (!lsinfo) | |
1330 | + return -ENOENT; | |
1331 | + | |
1332 | + /* Mark this closed so that ASTs will not be delivered any more */ | |
1333 | + clear_bit(1, &f->fi_flags); | |
1334 | + | |
1335 | + /* Block signals while we are doing this */ | |
1336 | + sigfillset(&allsigs); | |
1337 | + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | |
1338 | + | |
1339 | + /* We use our own lock_info struct here, so that any | |
1340 | + * outstanding "real" ASTs will be delivered with the | |
1341 | + * corresponding "real" params, thus freeing the lock_info | |
1342 | + * that belongs the lock. This catches the corner case where | |
1343 | + * a lock is BUSY when we try to unlock it here | |
1344 | + */ | |
1345 | + memset(&li, 0, sizeof(li)); | |
1346 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | |
1347 | + init_waitqueue_head(&li.li_waitq); | |
1348 | + add_wait_queue(&li.li_waitq, &wq); | |
1349 | + | |
1350 | + /* | |
1351 | + * Free any outstanding locks, they are on the | |
1352 | + * list in LIFO order so there should be no problems | |
1353 | + * about unlocking parents before children. | |
1354 | + * Although we don't remove the lkbs from the list here | |
1355 | + * (what would be the point?), foreach_safe is needed | |
1356 | + * because the lkbs are freed during dlm_unlock operations | |
1357 | + */ | |
b7b72b66 | 1358 | + list_for_each_entry_safe(old_li, safe, &f->fi_lkb_list, li_ownerqueue) { |
c1c6733f AM |
1359 | + int status; |
1360 | + int lock_status; | |
1361 | + int flags = 0; | |
b7b72b66 | 1362 | + struct dlm_lkb *lkb; |
c1c6733f | 1363 | + |
b7b72b66 | 1364 | + lkb = dlm_get_lkb(f->fi_ls->ls_lockspace, old_li->li_lksb.sb_lkid); |
c1c6733f AM |
1365 | + |
1366 | + /* Don't unlock persistent locks */ | |
1367 | + if (lkb->lkb_flags & GDLM_LKFLG_PERSISTENT) { | |
b7b72b66 | 1368 | + list_del(&old_li->li_ownerqueue); |
c1c6733f | 1369 | + |
c783755a AM |
1370 | + /* Update master copy */ |
1371 | + if (lkb->lkb_resource->res_nodeid) { | |
1372 | + li.li_lksb.sb_lkid = lkb->lkb_id; | |
1373 | + status = dlm_lock(f->fi_ls->ls_lockspace, | |
1374 | + lkb->lkb_grmode, &li.li_lksb, | |
1375 | + DLM_LKF_CONVERT|DLM_LKF_ORPHAN, | |
1376 | + NULL, 0, 0, ast_routine, &li, | |
1377 | + NULL, NULL); | |
1378 | + if (status == 0) | |
1379 | + wait_for_ast(&li); | |
1380 | + } | |
1381 | + lkb->lkb_flags |= GDLM_LKFLG_ORPHAN; | |
1382 | + | |
c1c6733f AM |
1383 | + /* But tidy our references in it */ |
1384 | + kfree(old_li); | |
1385 | + lkb->lkb_astparam = (long)NULL; | |
1386 | + put_file_info(f); | |
c783755a | 1387 | + |
c1c6733f AM |
1388 | + continue; |
1389 | + } | |
1390 | + | |
1391 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); | |
1392 | + | |
1393 | + /* If it's not granted then cancel the request. | |
1394 | + * If the lock was WAITING then it will be dropped, | |
1395 | + * if it was converting then it will be reverted to GRANTED, | |
1396 | + * then we will unlock it. | |
1397 | + */ | |
1398 | + lock_status = lkb->lkb_status; | |
1399 | + | |
1400 | + if (lock_status != GDLM_LKSTS_GRANTED) | |
1401 | + flags = DLM_LKF_CANCEL; | |
1402 | + | |
c783755a AM |
1403 | + if (lkb->lkb_grmode >= DLM_LOCK_PW) |
1404 | + flags |= DLM_LKF_IVVALBLK; | |
1405 | + | |
c1c6733f AM |
1406 | + status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, flags, &li.li_lksb, &li); |
1407 | + | |
1408 | + /* Must wait for it to complete as the next lock could be its | |
1409 | + * parent */ | |
1410 | + if (status == 0) | |
1411 | + wait_for_ast(&li); | |
1412 | + | |
1413 | + /* If it was waiting for a conversion, it will | |
1414 | + now be granted so we can unlock it properly */ | |
1415 | + if (lock_status == GDLM_LKSTS_CONVERT) { | |
c783755a | 1416 | + flags &= ~DLM_LKF_CANCEL; |
c1c6733f | 1417 | + clear_bit(LI_FLAG_COMPLETE, &li.li_flags); |
c783755a | 1418 | + status = dlm_unlock(f->fi_ls->ls_lockspace, lkb->lkb_id, flags, &li.li_lksb, &li); |
c1c6733f AM |
1419 | + |
1420 | + if (status == 0) | |
1421 | + wait_for_ast(&li); | |
1422 | + } | |
1423 | + /* Unlock suceeded, free the lock_info struct. */ | |
1424 | + if (status == 0) { | |
1425 | + kfree(old_li); | |
1426 | + put_file_info(f); | |
1427 | + } | |
1428 | + } | |
1429 | + | |
1430 | + remove_wait_queue(&li.li_waitq, &wq); | |
1431 | + | |
1432 | + /* If this is the last reference, and the lockspace has been deleted | |
b7b72b66 | 1433 | + then free the struct */ |
c1c6733f | 1434 | + if (atomic_dec_and_test(&lsinfo->ls_refcnt) && !lsinfo->ls_lockspace) { |
b7b72b66 | 1435 | + kfree(lsinfo->ls_miscinfo.name); |
c1c6733f AM |
1436 | + kfree(lsinfo); |
1437 | + } | |
1438 | + | |
1439 | + /* Restore signals */ | |
1440 | + sigprocmask(SIG_SETMASK, &tmpsig, NULL); | |
1441 | + recalc_sigpending(); | |
1442 | + | |
1443 | + return 0; | |
1444 | +} | |
1445 | + | |
1446 | +/* | |
1447 | + * ioctls to create/remove lockspaces, and check how many | |
1448 | + * outstanding ASTs there are against a particular LS. | |
1449 | + */ | |
1450 | +static int dlm_ioctl(struct inode *inode, struct file *file, | |
1451 | + uint command, ulong u) | |
1452 | +{ | |
1453 | + struct file_info *fi = file->private_data; | |
1454 | + int status = -EINVAL; | |
1455 | + int count; | |
1456 | + struct list_head *tmp_list; | |
1457 | + | |
1458 | + switch (command) { | |
1459 | + | |
1460 | + /* Are there any ASTs for us to read? | |
1461 | + * Warning, this returns the number of messages (ASTs) | |
1462 | + * in the queue, NOT the number of bytes to read | |
1463 | + */ | |
1464 | + case FIONREAD: | |
1465 | + count = 0; | |
1466 | + spin_lock(&fi->fi_ast_lock); | |
1467 | + list_for_each(tmp_list, &fi->fi_ast_list) | |
1468 | + count++; | |
1469 | + spin_unlock(&fi->fi_ast_lock); | |
1470 | + status = put_user(count, (int *)u); | |
1471 | + break; | |
1472 | + | |
1473 | + default: | |
1474 | + return -ENOTTY; | |
1475 | + } | |
1476 | + | |
1477 | + return status; | |
1478 | +} | |
1479 | + | |
1480 | +/* | |
1481 | + * ioctls to create/remove lockspaces. | |
1482 | + */ | |
1483 | +static int dlm_ctl_ioctl(struct inode *inode, struct file *file, | |
1484 | + uint command, ulong u) | |
1485 | +{ | |
1486 | + int status = -EINVAL; | |
1487 | + char ls_name[MAX_LS_NAME_LEN]; | |
1488 | + struct user_ls *lsinfo; | |
1489 | + int force = 0; | |
1490 | + | |
1491 | + switch (command) { | |
1492 | + case DLM_CREATE_LOCKSPACE: | |
1493 | + if (!capable(CAP_SYS_ADMIN)) | |
1494 | + return -EPERM; | |
1495 | + | |
1496 | + if (strncpy_from_user(ls_name, (char*)u, MAX_LS_NAME_LEN) < 0) | |
1497 | + return -EFAULT; | |
1498 | + status = register_lockspace(ls_name, &lsinfo); | |
1499 | + | |
1500 | + /* If it succeeded then return the minor number */ | |
1501 | + if (status == 0) | |
1502 | + status = lsinfo->ls_miscinfo.minor; | |
1503 | + break; | |
1504 | + | |
1505 | + case DLM_FORCE_RELEASE_LOCKSPACE: | |
1506 | + force = 2; | |
1507 | + | |
1508 | + case DLM_RELEASE_LOCKSPACE: | |
1509 | + if (!capable(CAP_SYS_ADMIN)) | |
1510 | + return -EPERM; | |
1511 | + | |
b7b72b66 AM |
1512 | + down(&user_ls_lock); |
1513 | + lsinfo = __find_lockspace(u); | |
1514 | + if (!lsinfo) { | |
1515 | + up(&user_ls_lock); | |
c1c6733f | 1516 | + return -EINVAL; |
b7b72b66 AM |
1517 | + } |
1518 | + | |
c1c6733f | 1519 | + status = unregister_lockspace(lsinfo, force); |
b7b72b66 | 1520 | + up(&user_ls_lock); |
c1c6733f AM |
1521 | + break; |
1522 | + | |
1523 | + default: | |
1524 | + return -ENOTTY; | |
1525 | + } | |
1526 | + | |
1527 | + return status; | |
1528 | +} | |
1529 | + | |
1530 | +/* Deal with the messy stuff of copying a web of structs | |
1531 | + from kernel space to userspace */ | |
1532 | +static int copy_query_result(struct ast_info *ast) | |
1533 | +{ | |
1534 | + int status = -EFAULT; | |
1535 | + struct dlm_queryinfo qi; | |
1536 | + | |
1537 | + /* Get the pointers to userspace structs */ | |
1538 | + if (copy_from_user(&qi, ast->user_queryinfo, | |
1539 | + sizeof(struct dlm_queryinfo))) | |
1540 | + goto copy_out; | |
1541 | + | |
c1c6733f AM |
1542 | + if (put_user(ast->queryinfo->gqi_lockcount, |
1543 | + &ast->user_queryinfo->gqi_lockcount)) | |
1544 | + goto copy_out; | |
1545 | + | |
1546 | + if (qi.gqi_resinfo) { | |
1547 | + if (copy_to_user(qi.gqi_resinfo, ast->queryinfo->gqi_resinfo, | |
1548 | + sizeof(struct dlm_resinfo))) | |
1549 | + goto copy_out; | |
1550 | + } | |
1551 | + | |
1552 | + if (qi.gqi_lockinfo) { | |
1553 | + if (copy_to_user(qi.gqi_lockinfo, ast->queryinfo->gqi_lockinfo, | |
1554 | + sizeof(struct dlm_lockinfo) * ast->queryinfo->gqi_lockcount)) | |
1555 | + goto copy_out; | |
1556 | + } | |
1557 | + | |
1558 | + status = 0; | |
1559 | + | |
1560 | + if (ast->queryinfo->gqi_lockinfo) | |
1561 | + kfree(ast->queryinfo->gqi_lockinfo); | |
1562 | + | |
1563 | + if (ast->queryinfo->gqi_resinfo) | |
1564 | + kfree(ast->queryinfo->gqi_resinfo); | |
1565 | + | |
1566 | + kfree(ast->queryinfo); | |
1567 | + | |
1568 | + copy_out: | |
1569 | + return status; | |
1570 | +} | |
1571 | + | |
1572 | +/* Read call, might block if no ASTs are waiting. | |
1573 | + * It will only ever return one message at a time, regardless | |
1574 | + * of how many are pending. | |
1575 | + */ | |
1576 | +static ssize_t dlm_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) | |
1577 | +{ | |
1578 | + struct file_info *fi = file->private_data; | |
1579 | + struct ast_info *ast; | |
1580 | + int ret; | |
1581 | + DECLARE_WAITQUEUE(wait, current); | |
1582 | + | |
1583 | + if (count < sizeof(struct dlm_lock_result)) | |
1584 | + return -EINVAL; | |
1585 | + | |
1586 | + spin_lock(&fi->fi_ast_lock); | |
1587 | + if (list_empty(&fi->fi_ast_list)) { | |
1588 | + | |
1589 | + /* No waiting ASTs. | |
1590 | + * Return EOF if the lockspace been deleted. | |
1591 | + */ | |
1592 | + if (test_bit(1, &fi->fi_ls->ls_flags)) | |
1593 | + return 0; | |
1594 | + | |
1595 | + if (file->f_flags & O_NONBLOCK) { | |
1596 | + spin_unlock(&fi->fi_ast_lock); | |
1597 | + return -EAGAIN; | |
1598 | + } | |
1599 | + | |
1600 | + add_wait_queue(&fi->fi_wait, &wait); | |
1601 | + | |
1602 | + repeat: | |
1603 | + set_current_state(TASK_INTERRUPTIBLE); | |
1604 | + if (list_empty(&fi->fi_ast_list) && | |
1605 | + !signal_pending(current)) { | |
1606 | + | |
1607 | + spin_unlock(&fi->fi_ast_lock); | |
1608 | + schedule(); | |
1609 | + spin_lock(&fi->fi_ast_lock); | |
1610 | + goto repeat; | |
1611 | + } | |
1612 | + | |
1613 | + current->state = TASK_RUNNING; | |
1614 | + remove_wait_queue(&fi->fi_wait, &wait); | |
1615 | + | |
1616 | + if (signal_pending(current)) { | |
1617 | + spin_unlock(&fi->fi_ast_lock); | |
1618 | + return -ERESTARTSYS; | |
1619 | + } | |
1620 | + } | |
1621 | + | |
1622 | + ast = list_entry(fi->fi_ast_list.next, struct ast_info, list); | |
1623 | + list_del(&ast->list); | |
1624 | + spin_unlock(&fi->fi_ast_lock); | |
1625 | + | |
1626 | + ret = sizeof(struct dlm_lock_result); | |
1627 | + if (copy_to_user(buffer, &ast->result, sizeof(struct dlm_lock_result))) | |
1628 | + ret = -EFAULT; | |
1629 | + | |
b7b72b66 AM |
1630 | + if (ast->ast_reason == AST_COMP && |
1631 | + ast->result.cmd == DLM_USER_LOCK && ast->user_lvbptr) { | |
1632 | + if (copy_to_user(ast->user_lvbptr, ast->result.lksb.sb_lvbptr, DLM_LVB_LEN)) | |
1633 | + ret = -EFAULT; | |
1634 | + } | |
1635 | + | |
c1c6733f AM |
1636 | + /* If it was a query then copy the result block back here */ |
1637 | + if (ast->queryinfo) { | |
1638 | + int status = copy_query_result(ast); | |
1639 | + if (status) | |
1640 | + ret = status; | |
1641 | + } | |
1642 | + | |
1643 | + kfree(ast); | |
1644 | + return ret; | |
1645 | +} | |
1646 | + | |
1647 | +static unsigned int dlm_poll(struct file *file, poll_table *wait) | |
1648 | +{ | |
1649 | + struct file_info *fi = file->private_data; | |
1650 | + | |
1651 | + poll_wait(file, &fi->fi_wait, wait); | |
1652 | + | |
1653 | + spin_lock(&fi->fi_ast_lock); | |
1654 | + if (!list_empty(&fi->fi_ast_list)) { | |
1655 | + spin_unlock(&fi->fi_ast_lock); | |
1656 | + return POLLIN | POLLRDNORM; | |
1657 | + } | |
1658 | + | |
1659 | + spin_unlock(&fi->fi_ast_lock); | |
1660 | + return 0; | |
1661 | +} | |
1662 | + | |
1663 | +static int do_user_query(struct file_info *fi, struct dlm_lock_params *kparams) | |
1664 | +{ | |
1665 | + struct lock_info *li; | |
1666 | + int status; | |
1667 | + | |
b7b72b66 AM |
1668 | + if (!kparams->castaddr) |
1669 | + return -EINVAL; | |
1670 | + | |
1671 | + if (!kparams->lksb) | |
1672 | + return -EINVAL; | |
1673 | + | |
c1c6733f AM |
1674 | + li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); |
1675 | + if (!li) | |
1676 | + return -ENOMEM; | |
1677 | + | |
1678 | + get_file_info(fi); | |
1679 | + li->li_user_lksb = kparams->lksb; | |
b7b72b66 | 1680 | + li->li_bastparam = kparams->bastparam; |
c1c6733f | 1681 | + li->li_bastaddr = kparams->bastaddr; |
b7b72b66 AM |
1682 | + li->li_castparam = kparams->castparam; |
1683 | + li->li_castaddr = kparams->castaddr; | |
c1c6733f AM |
1684 | + li->li_file = fi; |
1685 | + li->li_flags = 0; | |
1686 | + li->li_cmd = kparams->cmd; | |
1687 | + clear_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | |
1688 | + | |
1689 | + if (copy_from_user(&li->li_lksb, kparams->lksb, | |
1690 | + sizeof(struct dlm_lksb))) { | |
1691 | + kfree(li); | |
1692 | + return -EFAULT; | |
1693 | + } | |
1694 | + li->li_user_queryinfo = (struct dlm_queryinfo *)li->li_lksb.sb_lvbptr; | |
1695 | + | |
1696 | + /* Allocate query structs */ | |
1697 | + status = -ENOMEM; | |
1698 | + li->li_queryinfo = kmalloc(sizeof(struct dlm_queryinfo), GFP_KERNEL); | |
1699 | + if (!li->li_queryinfo) | |
1700 | + goto out1; | |
1701 | + | |
1702 | + /* Mainly to get gqi_lock buffer size */ | |
1703 | + if (copy_from_user(li->li_queryinfo, li->li_lksb.sb_lvbptr, | |
1704 | + sizeof(struct dlm_queryinfo))) { | |
1705 | + status = -EFAULT; | |
1706 | + goto out1; | |
1707 | + } | |
1708 | + | |
1709 | + /* Overwrite userspace pointers we just copied with kernel space ones */ | |
1710 | + if (li->li_queryinfo->gqi_resinfo) { | |
1711 | + li->li_queryinfo->gqi_resinfo = kmalloc(sizeof(struct dlm_resinfo), GFP_KERNEL); | |
1712 | + if (!li->li_queryinfo->gqi_resinfo) | |
1713 | + goto out1; | |
1714 | + } | |
1715 | + if (li->li_queryinfo->gqi_lockinfo) { | |
1716 | + li->li_queryinfo->gqi_lockinfo = | |
1717 | + kmalloc(sizeof(struct dlm_lockinfo) * li->li_queryinfo->gqi_locksize, | |
1718 | + GFP_KERNEL); | |
1719 | + if (!li->li_queryinfo->gqi_lockinfo) | |
1720 | + goto out2; | |
1721 | + } | |
1722 | + | |
1723 | + li->li_lksb.sb_lvbptr = (char *)li->li_queryinfo; | |
1724 | + | |
1725 | + return dlm_query(fi->fi_ls->ls_lockspace, &li->li_lksb, | |
1726 | + kparams->flags, /* query */ | |
1727 | + li->li_queryinfo, | |
1728 | + ast_routine, li); | |
1729 | + | |
1730 | + out2: | |
1731 | + kfree(li->li_queryinfo); | |
1732 | + | |
1733 | + out1: | |
1734 | + kfree(li); | |
1735 | + return status; | |
1736 | +} | |
1737 | + | |
c783755a AM |
1738 | +static struct lock_info *allocate_lockinfo(struct file_info *fi, struct dlm_lock_params *kparams) |
1739 | +{ | |
1740 | + struct lock_info *li; | |
1741 | + | |
1742 | + li = kmalloc(sizeof(struct lock_info), GFP_KERNEL); | |
1743 | + if (li) { | |
1744 | + li->li_magic = LOCKINFO_MAGIC; | |
1745 | + li->li_file = fi; | |
1746 | + li->li_cmd = kparams->cmd; | |
1747 | + li->li_queryinfo = NULL; | |
1748 | + li->li_flags = 0; | |
1749 | + li->li_pend_bastparam = NULL; | |
1750 | + li->li_pend_bastaddr = NULL; | |
1751 | + li->li_lksb.sb_lvbptr = NULL; | |
1752 | + li->li_bastaddr = kparams->bastaddr; | |
1753 | + li->li_bastparam = kparams->bastparam; | |
1754 | + | |
1755 | + get_file_info(fi); | |
1756 | + } | |
1757 | + return li; | |
1758 | +} | |
1759 | + | |
c1c6733f AM |
1760 | +static int do_user_lock(struct file_info *fi, struct dlm_lock_params *kparams, |
1761 | + const char *buffer) | |
1762 | +{ | |
1763 | + struct lock_info *li; | |
1764 | + int status; | |
1765 | + char name[DLM_RESNAME_MAXLEN]; | |
b7b72b66 | 1766 | + void *lvbptr; |
c1c6733f AM |
1767 | + |
1768 | + /* | |
1769 | + * Validate things that we need to have correct. | |
1770 | + */ | |
b7b72b66 | 1771 | + if (!kparams->castaddr) |
c1c6733f AM |
1772 | + return -EINVAL; |
1773 | + | |
1774 | + if (!kparams->lksb) | |
1775 | + return -EINVAL; | |
1776 | + | |
b7b72b66 | 1777 | + if (!access_ok(VERIFY_WRITE, kparams->lksb, sizeof(struct dlm_lksb))) |
c1c6733f | 1778 | + return -EFAULT; |
c1c6733f | 1779 | + |
c783755a AM |
1780 | + /* Persistent child locks are not available yet */ |
1781 | + if ((kparams->flags & DLM_LKF_PERSISTENT) && kparams->parent) | |
1782 | + return -EINVAL; | |
1783 | + | |
1784 | + /* For conversions, the lock will already have a lock_info | |
c1c6733f AM |
1785 | + block squirelled away in astparam */ |
1786 | + if (kparams->flags & DLM_LKF_CONVERT) { | |
b7b72b66 | 1787 | + struct dlm_lkb *lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid); |
c1c6733f AM |
1788 | + if (!lkb) { |
1789 | + return -EINVAL; | |
1790 | + } | |
c1c6733f | 1791 | + |
b7b72b66 | 1792 | + li = (struct lock_info *)lkb->lkb_astparam; |
c783755a AM |
1793 | + |
1794 | + /* li may be NULL if the lock was PERSISTENT and the process went | |
1795 | + away, so we need to allocate a new one */ | |
1796 | + if (!li) { | |
1797 | + li = allocate_lockinfo(fi, kparams); | |
1798 | + if (li) { | |
1799 | + spin_lock(&fi->fi_lkb_lock); | |
1800 | + list_add(&li->li_ownerqueue, &fi->fi_lkb_list); | |
1801 | + spin_unlock(&fi->fi_lkb_lock); | |
1802 | + } | |
1803 | + else { | |
1804 | + return -ENOMEM; | |
1805 | + } | |
1806 | + } | |
1807 | + | |
1808 | + if (li->li_magic != LOCKINFO_MAGIC) | |
1809 | + return -EINVAL; | |
1810 | + | |
b7b72b66 AM |
1811 | + /* For conversions don't overwrite the current blocking AST |
1812 | + info so that: | |
1813 | + a) if a blocking AST fires before the conversion is queued | |
1814 | + it runs the current handler | |
1815 | + b) if the conversion is cancelled, the original blocking AST | |
1816 | + declaration is active | |
1817 | + The pend_ info is made active when the conversion | |
1818 | + completes. | |
1819 | + */ | |
1820 | + li->li_pend_bastaddr = kparams->bastaddr; | |
1821 | + li->li_pend_bastparam = kparams->bastparam; | |
c1c6733f AM |
1822 | + } |
1823 | + else { | |
c783755a | 1824 | + li = allocate_lockinfo(fi, kparams); |
c1c6733f AM |
1825 | + if (!li) |
1826 | + return -ENOMEM; | |
1827 | + | |
b7b72b66 AM |
1828 | + /* Get the lock name */ |
1829 | + if (copy_from_user(name, buffer + offsetof(struct dlm_lock_params, name), | |
1830 | + kparams->namelen)) { | |
1831 | + return -EFAULT; | |
1832 | + } | |
c1c6733f AM |
1833 | + |
1834 | + /* semaphore to allow us to complete our work before | |
1835 | + the AST routine runs. In fact we only need (and use) this | |
1836 | + when the initial lock fails */ | |
1837 | + init_MUTEX_LOCKED(&li->li_firstlock); | |
1838 | + set_bit(LI_FLAG_FIRSTLOCK, &li->li_flags); | |
c1c6733f AM |
1839 | + } |
1840 | + | |
b7b72b66 AM |
1841 | + li->li_user_lksb = kparams->lksb; |
1842 | + li->li_castaddr = kparams->castaddr; | |
1843 | + li->li_castparam = kparams->castparam; | |
1844 | + | |
c1c6733f | 1845 | + /* Copy the user's LKSB into kernel space, |
b7b72b66 AM |
1846 | + needed for conversions & value block operations. |
1847 | + Save our kernel-space lvbptr first */ | |
1848 | + lvbptr = li->li_lksb.sb_lvbptr; | |
1849 | + if (copy_from_user(&li->li_lksb, kparams->lksb, sizeof(struct dlm_lksb))) { | |
1850 | + status = -EFAULT; | |
1851 | + goto out_err; | |
1852 | + } | |
1853 | + /* Store new userland LVBptr and restore kernel one */ | |
1854 | + li->li_user_lvbptr = li->li_lksb.sb_lvbptr; | |
1855 | + li->li_lksb.sb_lvbptr = lvbptr; | |
1856 | + | |
1857 | + /* Copy in the value block */ | |
1858 | + if (kparams->flags & DLM_LKF_VALBLK) { | |
1859 | + if (!li->li_lksb.sb_lvbptr) { | |
1860 | + li->li_lksb.sb_lvbptr = kmalloc(DLM_LVB_LEN, GFP_KERNEL); | |
1861 | + if (!li->li_lksb.sb_lvbptr) { | |
1862 | + status = -ENOMEM; | |
1863 | + goto out_err; | |
1864 | + } | |
1865 | + } | |
1866 | + | |
1867 | + if (copy_from_user(li->li_lksb.sb_lvbptr, kparams->lksb->sb_lvbptr, | |
1868 | + DLM_LVB_LEN)) { | |
1869 | + status = -EFAULT; | |
1870 | + goto out_err; | |
1871 | + } | |
1872 | + } | |
1873 | + else { | |
1874 | + li->li_user_lvbptr = NULL; | |
1875 | + } | |
c1c6733f AM |
1876 | + |
1877 | + /* Lock it ... */ | |
1878 | + status = dlm_lock(fi->fi_ls->ls_lockspace, kparams->mode, &li->li_lksb, | |
1879 | + kparams->flags, name, kparams->namelen, | |
1880 | + kparams->parent, | |
1881 | + ast_routine, | |
1882 | + li, | |
b7b72b66 AM |
1883 | + (li->li_pend_bastaddr || li->li_bastaddr) ? |
1884 | + bast_routine : NULL, | |
c1c6733f AM |
1885 | + kparams->range.ra_end ? &kparams->range : NULL); |
1886 | + | |
1887 | + /* If it succeeded (this far) with a new lock then keep track of | |
1888 | + it on the file's lkb list */ | |
1889 | + if (!status && !(kparams->flags & DLM_LKF_CONVERT)) { | |
c1c6733f | 1890 | + |
b7b72b66 AM |
1891 | + spin_lock(&fi->fi_lkb_lock); |
1892 | + list_add(&li->li_ownerqueue, &fi->fi_lkb_list); | |
1893 | + spin_unlock(&fi->fi_lkb_lock); | |
1894 | + | |
c1c6733f | 1895 | + up(&li->li_firstlock); |
b7b72b66 AM |
1896 | + |
1897 | + /* Copy the lkid back to userspace in case they want to cancel. | |
1898 | + This address has already been tested so /should/ be OK, if not: | |
1899 | + tough - we've taken the lock! */ | |
1900 | + copy_to_user(&kparams->lksb->sb_lkid, | |
1901 | + &li->li_lksb.sb_lkid, | |
1902 | + sizeof(li->li_lksb.sb_lkid)); | |
c1c6733f AM |
1903 | + } |
1904 | + | |
1905 | + return status; | |
b7b72b66 AM |
1906 | + |
1907 | + out_err: | |
1908 | + if (test_bit(LI_FLAG_FIRSTLOCK, &li->li_flags)) { | |
1909 | + | |
1910 | + release_lockinfo(li); | |
1911 | + } | |
1912 | + return status; | |
1913 | + | |
c1c6733f AM |
1914 | +} |
1915 | + | |
1916 | +static int do_user_unlock(struct file_info *fi, struct dlm_lock_params *kparams) | |
1917 | +{ | |
1918 | + struct lock_info *li; | |
b7b72b66 | 1919 | + struct dlm_lkb *lkb; |
c1c6733f | 1920 | + int status; |
b7b72b66 | 1921 | + int convert_cancel = 0; |
c1c6733f AM |
1922 | + |
1923 | + lkb = dlm_get_lkb(fi->fi_ls->ls_lockspace, kparams->lkid); | |
1924 | + if (!lkb) { | |
1925 | + return -EINVAL; | |
1926 | + } | |
1927 | + | |
b7b72b66 AM |
1928 | + /* Cancelling a conversion doesn't remove the lock...*/ |
1929 | + if (kparams->flags & DLM_LKF_CANCEL && | |
1930 | + lkb->lkb_status == GDLM_LKSTS_CONVERT) { | |
1931 | + convert_cancel = 1; | |
1932 | + } | |
c1c6733f | 1933 | + |
b7b72b66 | 1934 | + li = (struct lock_info *)lkb->lkb_astparam; |
c783755a AM |
1935 | + if (!li) { |
1936 | + li = allocate_lockinfo(fi, kparams); | |
1937 | + spin_lock(&fi->fi_lkb_lock); | |
1938 | + list_add(&li->li_ownerqueue, &fi->fi_lkb_list); | |
1939 | + spin_unlock(&fi->fi_lkb_lock); | |
1940 | + } | |
1941 | + if (!li) | |
1942 | + return -ENOMEM; | |
1943 | + | |
1944 | + if (li->li_magic != LOCKINFO_MAGIC) | |
1945 | + return -EINVAL; | |
1946 | + | |
c1c6733f | 1947 | + li->li_user_lksb = kparams->lksb; |
b7b72b66 | 1948 | + li->li_castparam = kparams->castparam; |
c1c6733f AM |
1949 | + li->li_cmd = kparams->cmd; |
1950 | + | |
b7b72b66 AM |
1951 | + /* dlm_unlock() passes a 0 for castaddr which means don't overwrite |
1952 | + the existing li_castaddr as that's the completion routine for | |
1953 | + unlocks. dlm_unlock_wait() specifies a new AST routine to be | |
1954 | + executed when the unlock completes. */ | |
1955 | + if (kparams->castaddr) | |
1956 | + li->li_castaddr = kparams->castaddr; | |
1957 | + | |
c1c6733f AM |
1958 | + /* Have to do it here cos the lkb may not exist after |
1959 | + * dlm_unlock() */ | |
b7b72b66 AM |
1960 | + if (!convert_cancel) { |
1961 | + spin_lock(&fi->fi_lkb_lock); | |
1962 | + list_del(&li->li_ownerqueue); | |
1963 | + spin_unlock(&fi->fi_lkb_lock); | |
1964 | + } | |
c1c6733f AM |
1965 | + |
1966 | + /* Use existing lksb & astparams */ | |
1967 | + status = dlm_unlock(fi->fi_ls->ls_lockspace, | |
1968 | + kparams->lkid, | |
b7b72b66 AM |
1969 | + kparams->flags, &li->li_lksb, li); |
1970 | + if (status && !convert_cancel) { | |
1971 | + /* It failed, put it back on the list */ | |
1972 | + spin_lock(&fi->fi_lkb_lock); | |
1973 | + list_add(&li->li_ownerqueue, &fi->fi_lkb_list); | |
1974 | + spin_unlock(&fi->fi_lkb_lock); | |
1975 | + } | |
c1c6733f AM |
1976 | + |
1977 | + return status; | |
1978 | +} | |
1979 | + | |
1980 | +/* Write call, submit a locking request */ | |
1981 | +static ssize_t dlm_write(struct file *file, const char __user *buffer, | |
1982 | + size_t count, loff_t *ppos) | |
1983 | +{ | |
1984 | + struct file_info *fi = file->private_data; | |
1985 | + struct dlm_lock_params kparams; | |
1986 | + sigset_t tmpsig; | |
1987 | + sigset_t allsigs; | |
1988 | + int status; | |
1989 | + | |
b7b72b66 | 1990 | + if (count < sizeof(kparams)-1) /* -1 because lock name is optional */ |
c1c6733f AM |
1991 | + return -EINVAL; |
1992 | + | |
1993 | + /* Has the lockspace been deleted */ | |
1994 | + if (test_bit(1, &fi->fi_ls->ls_flags)) | |
1995 | + return -ENOENT; | |
1996 | + | |
1997 | + /* Get the command info */ | |
1998 | + if (copy_from_user(&kparams, buffer, sizeof(kparams))) | |
1999 | + return -EFAULT; | |
2000 | + | |
2001 | + if (check_version(&kparams)) | |
2002 | + return -EINVAL; | |
2003 | + | |
2004 | + /* Block signals while we are doing this */ | |
2005 | + sigfillset(&allsigs); | |
2006 | + sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); | |
2007 | + | |
2008 | + switch (kparams.cmd) | |
2009 | + { | |
2010 | + case DLM_USER_LOCK: | |
2011 | + status = do_user_lock(fi, &kparams, buffer); | |
2012 | + break; | |
2013 | + | |
2014 | + case DLM_USER_UNLOCK: | |
2015 | + status = do_user_unlock(fi, &kparams); | |
2016 | + break; | |
2017 | + | |
2018 | + case DLM_USER_QUERY: | |
2019 | + status = do_user_query(fi, &kparams); | |
2020 | + break; | |
2021 | + | |
2022 | + default: | |
2023 | + status = -EINVAL; | |
2024 | + break; | |
2025 | + } | |
2026 | + /* Restore signals */ | |
2027 | + sigprocmask(SIG_SETMASK, &tmpsig, NULL); | |
2028 | + recalc_sigpending(); | |
2029 | + | |
2030 | + if (status == 0) | |
2031 | + return count; | |
2032 | + else | |
2033 | + return status; | |
2034 | +} | |
2035 | + | |
b7b72b66 AM |
2036 | +/* Called when the cluster is shutdown uncleanly, all lockspaces |
2037 | + have been summarily removed */ | |
c1c6733f AM |
2038 | +void dlm_device_free_devices() |
2039 | +{ | |
2040 | + struct user_ls *tmp; | |
2041 | + struct user_ls *lsinfo; | |
2042 | + | |
b7b72b66 | 2043 | + down(&user_ls_lock); |
c1c6733f AM |
2044 | + list_for_each_entry_safe(lsinfo, tmp, &user_ls_list, ls_list) { |
2045 | + misc_deregister(&lsinfo->ls_miscinfo); | |
2046 | + | |
2047 | + /* Tidy up, but don't delete the lsinfo struct until | |
2048 | + all the users have closed their devices */ | |
2049 | + list_del(&lsinfo->ls_list); | |
c1c6733f | 2050 | + set_bit(1, &lsinfo->ls_flags); /* LS has been deleted */ |
b7b72b66 | 2051 | + lsinfo->ls_lockspace = NULL; |
c1c6733f | 2052 | + } |
b7b72b66 | 2053 | + up(&user_ls_lock); |
c1c6733f AM |
2054 | +} |
2055 | + | |
2056 | +static struct file_operations _dlm_fops = { | |
2057 | + .open = dlm_open, | |
2058 | + .release = dlm_close, | |
2059 | + .ioctl = dlm_ioctl, | |
2060 | + .read = dlm_read, | |
2061 | + .write = dlm_write, | |
2062 | + .poll = dlm_poll, | |
2063 | + .owner = THIS_MODULE, | |
2064 | +}; | |
2065 | + | |
2066 | +static struct file_operations _dlm_ctl_fops = { | |
2067 | + .open = dlm_ctl_open, | |
2068 | + .release = dlm_ctl_close, | |
2069 | + .ioctl = dlm_ctl_ioctl, | |
2070 | + .owner = THIS_MODULE, | |
2071 | +}; | |
2072 | + | |
2073 | +/* | |
2074 | + * Create control device | |
2075 | + */ | |
2076 | +int dlm_device_init(void) | |
2077 | +{ | |
2078 | + int r; | |
2079 | + | |
2080 | + INIT_LIST_HEAD(&user_ls_list); | |
b7b72b66 | 2081 | + init_MUTEX(&user_ls_lock); |
c1c6733f AM |
2082 | + |
2083 | + ctl_device.name = "dlm-control"; | |
2084 | + ctl_device.fops = &_dlm_ctl_fops; | |
2085 | + ctl_device.minor = MISC_DYNAMIC_MINOR; | |
2086 | + | |
2087 | + r = misc_register(&ctl_device); | |
2088 | + if (r) { | |
2089 | + log_print("misc_register failed for DLM control device"); | |
2090 | + return r; | |
2091 | + } | |
2092 | + | |
2093 | + return 0; | |
2094 | +} | |
2095 | + | |
2096 | +void dlm_device_exit(void) | |
2097 | +{ | |
2098 | + misc_deregister(&ctl_device); | |
2099 | +} | |
2100 | + | |
2101 | +/* | |
2102 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
2103 | + * Emacs will notice this stuff at the end of the file and automatically | |
2104 | + * adjust the settings for this buffer only. This must remain at the end | |
2105 | + * of the file. | |
2106 | + * --------------------------------------------------------------------------- | |
2107 | + * Local variables: | |
2108 | + * c-file-style: "linux" | |
2109 | + * End: | |
2110 | + */ | |
2111 | diff -urN linux-orig/cluster/dlm/device.h linux-patched/cluster/dlm/device.h | |
2112 | --- linux-orig/cluster/dlm/device.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 2113 | +++ linux-patched/cluster/dlm/device.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
2114 | @@ -0,0 +1,19 @@ |
2115 | +/****************************************************************************** | |
2116 | +******************************************************************************* | |
2117 | +** | |
2118 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2119 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2120 | +** | |
2121 | +** This copyrighted material is made available to anyone wishing to use, | |
2122 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2123 | +** of the GNU General Public License v.2. | |
2124 | +** | |
2125 | +******************************************************************************* | |
2126 | +******************************************************************************/ | |
2127 | + | |
2128 | +#ifndef __DEVICE_DOT_H__ | |
2129 | +#define __DEVICE_DOT_H__ | |
2130 | + | |
2131 | +extern void dlm_device_free_devices(void); | |
2132 | + | |
2133 | +#endif /* __DEVICE_DOT_H__ */ | |
2134 | diff -urN linux-orig/cluster/dlm/dir.c linux-patched/cluster/dlm/dir.c | |
2135 | --- linux-orig/cluster/dlm/dir.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
2136 | +++ linux-patched/cluster/dlm/dir.c 2004-11-03 11:31:56.000000000 +0800 |
2137 | @@ -0,0 +1,471 @@ | |
c1c6733f AM |
2138 | +/****************************************************************************** |
2139 | +******************************************************************************* | |
2140 | +** | |
2141 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2142 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2143 | +** | |
2144 | +** This copyrighted material is made available to anyone wishing to use, | |
2145 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2146 | +** of the GNU General Public License v.2. | |
2147 | +** | |
2148 | +******************************************************************************* | |
2149 | +******************************************************************************/ | |
2150 | + | |
2151 | +#include "dlm_internal.h" | |
2152 | +#include "nodes.h" | |
2153 | +#include "lockspace.h" | |
2154 | +#include "lowcomms.h" | |
2155 | +#include "reccomms.h" | |
2156 | +#include "rsb.h" | |
2157 | +#include "config.h" | |
2158 | +#include "memory.h" | |
2159 | +#include "recover.h" | |
2160 | +#include "util.h" | |
2161 | + | |
b7b72b66 AM |
2162 | +struct resmov { |
2163 | + uint32_t rm_nodeid; | |
2164 | + uint16_t rm_length; | |
2165 | + uint16_t rm_pad; | |
2166 | +}; | |
2167 | + | |
2168 | +void print_name(char *b, int len) | |
2169 | +{ | |
2170 | + int i; | |
2171 | + for (i = 0; i < len; i++) | |
2172 | + printk("%c", b[i]); | |
2173 | + printk("\n"); | |
2174 | +} | |
2175 | + | |
2176 | +static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) | |
2177 | +{ | |
2178 | + spin_lock(&ls->ls_recover_list_lock); | |
2179 | + list_add(&de->list, &ls->ls_recover_list); | |
2180 | + spin_unlock(&ls->ls_recover_list_lock); | |
2181 | +} | |
2182 | + | |
2183 | +static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) | |
2184 | +{ | |
2185 | + int found = FALSE; | |
2186 | + struct dlm_direntry *de; | |
2187 | + | |
2188 | + spin_lock(&ls->ls_recover_list_lock); | |
2189 | + list_for_each_entry(de, &ls->ls_recover_list, list) { | |
2190 | + if (de->length == len) { | |
2191 | + list_del(&de->list); | |
2192 | + de->master_nodeid = 0; | |
2193 | + memset(de->name, 0, len); | |
2194 | + found = TRUE; | |
2195 | + break; | |
2196 | + } | |
2197 | + } | |
2198 | + spin_unlock(&ls->ls_recover_list_lock); | |
2199 | + | |
2200 | + if (!found) | |
2201 | + de = allocate_direntry(ls, len); | |
2202 | + return de; | |
2203 | +} | |
2204 | + | |
c783755a | 2205 | +void clear_free_de(struct dlm_ls *ls) |
b7b72b66 AM |
2206 | +{ |
2207 | + struct dlm_direntry *de; | |
2208 | + | |
2209 | + spin_lock(&ls->ls_recover_list_lock); | |
2210 | + while (!list_empty(&ls->ls_recover_list)) { | |
2211 | + de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, | |
2212 | + list); | |
2213 | + list_del(&de->list); | |
2214 | + free_direntry(de); | |
2215 | + } | |
2216 | + spin_unlock(&ls->ls_recover_list_lock); | |
2217 | +} | |
2218 | + | |
c1c6733f AM |
2219 | +/* |
2220 | + * We use the upper 16 bits of the hash value to select the directory node. | |
2221 | + * Low bits are used for distribution of rsb's among hash buckets on each node. | |
2222 | + * | |
c1c6733f | 2223 | + * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of |
b7b72b66 AM |
2224 | + * num_nodes to the hash value. This value in the desired range is used as an |
2225 | + * offset into the sorted list of nodeid's to give the particular nodeid of the | |
2226 | + * directory node. | |
c1c6733f AM |
2227 | + */ |
2228 | + | |
b7b72b66 | 2229 | +uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length) |
c1c6733f AM |
2230 | +{ |
2231 | + struct list_head *tmp; | |
b7b72b66 | 2232 | + struct dlm_csb *csb = NULL; |
c1c6733f AM |
2233 | + uint32_t hash, node, n = 0, nodeid; |
2234 | + | |
2235 | + if (ls->ls_num_nodes == 1) { | |
2236 | + nodeid = our_nodeid(); | |
2237 | + goto out; | |
2238 | + } | |
2239 | + | |
b7b72b66 AM |
2240 | + hash = dlm_hash(name, length); |
2241 | + node = (hash >> 16) % ls->ls_num_nodes; | |
c1c6733f | 2242 | + |
bb1d8b11 AM |
2243 | + if (ls->ls_node_array) { |
2244 | + nodeid = ls->ls_node_array[node]; | |
2245 | + goto out; | |
2246 | + } | |
2247 | + | |
c1c6733f AM |
2248 | + list_for_each(tmp, &ls->ls_nodes) { |
2249 | + if (n++ != node) | |
2250 | + continue; | |
b7b72b66 | 2251 | + csb = list_entry(tmp, struct dlm_csb, list); |
c1c6733f AM |
2252 | + break; |
2253 | + } | |
2254 | + | |
b7b72b66 AM |
2255 | + DLM_ASSERT(csb, printk("num_nodes=%u n=%u node=%u\n", |
2256 | + ls->ls_num_nodes, n, node);); | |
2257 | + nodeid = csb->node->nodeid; | |
bb1d8b11 | 2258 | + out: |
c1c6733f AM |
2259 | + return nodeid; |
2260 | +} | |
2261 | + | |
b7b72b66 | 2262 | +uint32_t get_directory_nodeid(struct dlm_rsb *rsb) |
c1c6733f AM |
2263 | +{ |
2264 | + return name_to_directory_nodeid(rsb->res_ls, rsb->res_name, | |
2265 | + rsb->res_length); | |
2266 | +} | |
2267 | + | |
b7b72b66 | 2268 | +static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) |
c1c6733f AM |
2269 | +{ |
2270 | + uint32_t val; | |
2271 | + | |
b7b72b66 AM |
2272 | + val = dlm_hash(name, len); |
2273 | + val &= (ls->ls_dirtbl_size - 1); | |
c1c6733f AM |
2274 | + |
2275 | + return val; | |
2276 | +} | |
2277 | + | |
b7b72b66 | 2278 | +static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) |
c1c6733f | 2279 | +{ |
b7b72b66 | 2280 | + uint32_t bucket; |
c1c6733f | 2281 | + |
b7b72b66 AM |
2282 | + bucket = dir_hash(ls, de->name, de->length); |
2283 | + list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | |
c1c6733f AM |
2284 | +} |
2285 | + | |
b7b72b66 AM |
2286 | +static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, |
2287 | + int namelen, uint32_t bucket) | |
c1c6733f | 2288 | +{ |
b7b72b66 | 2289 | + struct dlm_direntry *de; |
c1c6733f | 2290 | + |
b7b72b66 AM |
2291 | + list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { |
2292 | + if (de->length == namelen && !memcmp(name, de->name, namelen)) | |
c1c6733f AM |
2293 | + goto out; |
2294 | + } | |
b7b72b66 AM |
2295 | + de = NULL; |
2296 | + out: | |
2297 | + return de; | |
c1c6733f AM |
2298 | +} |
2299 | + | |
b7b72b66 | 2300 | +void dlm_dir_remove(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen) |
c1c6733f | 2301 | +{ |
b7b72b66 | 2302 | + struct dlm_direntry *de; |
c1c6733f AM |
2303 | + uint32_t bucket; |
2304 | + | |
b7b72b66 | 2305 | + bucket = dir_hash(ls, name, namelen); |
c1c6733f | 2306 | + |
b7b72b66 | 2307 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
c1c6733f | 2308 | + |
b7b72b66 | 2309 | + de = search_bucket(ls, name, namelen, bucket); |
c1c6733f | 2310 | + |
b7b72b66 AM |
2311 | + if (!de) { |
2312 | + log_all(ls, "remove fr %u none", nodeid); | |
2313 | + print_name(name, namelen); | |
c1c6733f AM |
2314 | + goto out; |
2315 | + } | |
2316 | + | |
b7b72b66 AM |
2317 | + if (de->master_nodeid != nodeid) { |
2318 | + log_all(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); | |
2319 | + print_name(name, namelen); | |
c1c6733f AM |
2320 | + goto out; |
2321 | + } | |
2322 | + | |
b7b72b66 AM |
2323 | + list_del(&de->list); |
2324 | + free_direntry(de); | |
2325 | + out: | |
2326 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
c1c6733f AM |
2327 | +} |
2328 | + | |
b7b72b66 | 2329 | +void dlm_dir_clear(struct dlm_ls *ls) |
c1c6733f AM |
2330 | +{ |
2331 | + struct list_head *head; | |
b7b72b66 | 2332 | + struct dlm_direntry *de; |
c1c6733f AM |
2333 | + int i; |
2334 | + | |
b7b72b66 AM |
2335 | + for (i = 0; i < ls->ls_dirtbl_size; i++) { |
2336 | + write_lock(&ls->ls_dirtbl[i].lock); | |
2337 | + head = &ls->ls_dirtbl[i].list; | |
c1c6733f | 2338 | + while (!list_empty(head)) { |
b7b72b66 AM |
2339 | + de = list_entry(head->next, struct dlm_direntry, list); |
2340 | + list_del(&de->list); | |
2341 | + put_free_de(ls, de); | |
c1c6733f | 2342 | + } |
b7b72b66 | 2343 | + write_unlock(&ls->ls_dirtbl[i].lock); |
c1c6733f AM |
2344 | + } |
2345 | +} | |
2346 | + | |
b7b72b66 | 2347 | +static void resmov_in(struct resmov *rm, char *buf) |
c1c6733f | 2348 | +{ |
b7b72b66 | 2349 | + struct resmov tmp; |
c1c6733f | 2350 | + |
b7b72b66 | 2351 | + memcpy(&tmp, buf, sizeof(struct resmov)); |
c1c6733f AM |
2352 | + |
2353 | + rm->rm_nodeid = be32_to_cpu(tmp.rm_nodeid); | |
2354 | + rm->rm_length = be16_to_cpu(tmp.rm_length); | |
2355 | +} | |
2356 | + | |
b7b72b66 | 2357 | +int dlm_dir_rebuild_local(struct dlm_ls *ls) |
c1c6733f | 2358 | +{ |
b7b72b66 AM |
2359 | + struct dlm_csb *csb; |
2360 | + struct dlm_direntry *de; | |
2361 | + struct dlm_rcom *rc; | |
2362 | + struct resmov mov, last_mov; | |
c1c6733f AM |
2363 | + char *b, *last_name; |
2364 | + int error = -ENOMEM, count = 0; | |
2365 | + | |
2366 | + log_all(ls, "rebuild resource directory"); | |
2367 | + | |
b7b72b66 | 2368 | + dlm_dir_clear(ls); |
c1c6733f AM |
2369 | + |
2370 | + rc = allocate_rcom_buffer(ls); | |
2371 | + if (!rc) | |
2372 | + goto out; | |
2373 | + | |
2374 | + last_name = (char *) kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); | |
2375 | + if (!last_name) | |
2376 | + goto free_rc; | |
2377 | + | |
b7b72b66 | 2378 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
c1c6733f AM |
2379 | + last_mov.rm_length = 0; |
2380 | + for (;;) { | |
b7b72b66 | 2381 | + error = dlm_recovery_stopped(ls); |
c1c6733f AM |
2382 | + if (error) |
2383 | + goto free_last; | |
2384 | + | |
2385 | + memcpy(rc->rc_buf, last_name, last_mov.rm_length); | |
2386 | + rc->rc_datalen = last_mov.rm_length; | |
2387 | + | |
b7b72b66 | 2388 | + error = rcom_send_message(ls, csb->node->nodeid, |
c1c6733f AM |
2389 | + RECCOMM_RECOVERNAMES, rc, 1); |
2390 | + if (error) | |
2391 | + goto free_last; | |
2392 | + | |
2393 | + schedule(); | |
2394 | + | |
2395 | + /* | |
2396 | + * pick each res out of buffer | |
2397 | + */ | |
2398 | + | |
2399 | + b = rc->rc_buf; | |
2400 | + | |
2401 | + for (;;) { | |
b7b72b66 AM |
2402 | + resmov_in(&mov, b); |
2403 | + b += sizeof(struct resmov); | |
c1c6733f AM |
2404 | + |
2405 | + /* Length of 0 with a non-zero nodeid marks the | |
2406 | + * end of the list */ | |
2407 | + if (!mov.rm_length && mov.rm_nodeid) | |
2408 | + goto done; | |
2409 | + | |
2410 | + /* This is just the end of the block */ | |
2411 | + if (!mov.rm_length) | |
2412 | + break; | |
2413 | + | |
b7b72b66 AM |
2414 | + DLM_ASSERT(mov.rm_nodeid == csb->node->nodeid,); |
2415 | + | |
c1c6733f | 2416 | + error = -ENOMEM; |
b7b72b66 AM |
2417 | + de = get_free_de(ls, mov.rm_length); |
2418 | + if (!de) | |
c1c6733f AM |
2419 | + goto free_last; |
2420 | + | |
b7b72b66 AM |
2421 | + de->master_nodeid = mov.rm_nodeid; |
2422 | + de->length = mov.rm_length; | |
2423 | + memcpy(de->name, b, mov.rm_length); | |
c1c6733f AM |
2424 | + b += mov.rm_length; |
2425 | + | |
b7b72b66 | 2426 | + add_entry_to_hash(ls, de); |
c1c6733f AM |
2427 | + count++; |
2428 | + | |
2429 | + last_mov = mov; | |
2430 | + memset(last_name, 0, DLM_RESNAME_MAXLEN); | |
b7b72b66 | 2431 | + memcpy(last_name, de->name, de->length); |
c1c6733f AM |
2432 | + } |
2433 | + } | |
2434 | + done: | |
2435 | + ; | |
2436 | + } | |
2437 | + | |
2438 | + set_bit(LSFL_RESDIR_VALID, &ls->ls_flags); | |
2439 | + error = 0; | |
2440 | + | |
2441 | + log_all(ls, "rebuilt %d resources", count); | |
2442 | + | |
2443 | + free_last: | |
2444 | + kfree(last_name); | |
2445 | + | |
2446 | + free_rc: | |
2447 | + free_rcom_buffer(rc); | |
2448 | + | |
2449 | + out: | |
b7b72b66 | 2450 | + clear_free_de(ls); |
c1c6733f AM |
2451 | + return error; |
2452 | +} | |
2453 | + | |
2454 | +/* | |
b7b72b66 | 2455 | + * The reply end of dlm_dir_rebuild_local/RECOVERNAMES. Collect and send as |
c1c6733f AM |
2456 | + * many resource names as can fit in the buffer. |
2457 | + */ | |
2458 | + | |
b7b72b66 AM |
2459 | +int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen, |
2460 | + char *outbuf, int outlen, uint32_t nodeid) | |
c1c6733f AM |
2461 | +{ |
2462 | + struct list_head *list; | |
b7b72b66 | 2463 | + struct dlm_rsb *start_rsb = NULL, *rsb; |
c1c6733f AM |
2464 | + int offset = 0, start_namelen, error; |
2465 | + char *start_name; | |
b7b72b66 | 2466 | + struct resmov tmp; |
c1c6733f AM |
2467 | + uint32_t dir_nodeid; |
2468 | + | |
2469 | + /* | |
2470 | + * Find the rsb where we left off (or start again) | |
2471 | + */ | |
2472 | + | |
2473 | + start_namelen = inlen; | |
2474 | + start_name = inbuf; | |
2475 | + | |
2476 | + if (start_namelen > 1) { | |
b7b72b66 AM |
2477 | + error = find_rsb(ls, NULL, start_name, start_namelen, 0, |
2478 | + &start_rsb); | |
2479 | + DLM_ASSERT(!error && start_rsb, printk("error %d\n", error);); | |
c1c6733f AM |
2480 | + release_rsb(start_rsb); |
2481 | + } | |
2482 | + | |
2483 | + /* | |
2484 | + * Send rsb names for rsb's we're master of and whose directory node | |
2485 | + * matches the requesting node. | |
2486 | + */ | |
2487 | + | |
b7b72b66 | 2488 | + down_read(&ls->ls_root_lock); |
c1c6733f AM |
2489 | + if (start_rsb) |
2490 | + list = start_rsb->res_rootlist.next; | |
2491 | + else | |
2492 | + list = ls->ls_rootres.next; | |
2493 | + | |
2494 | + for (offset = 0; list != &ls->ls_rootres; list = list->next) { | |
b7b72b66 | 2495 | + rsb = list_entry(list, struct dlm_rsb, res_rootlist); |
c1c6733f AM |
2496 | + if (rsb->res_nodeid) |
2497 | + continue; | |
2498 | + | |
2499 | + dir_nodeid = get_directory_nodeid(rsb); | |
2500 | + if (dir_nodeid != nodeid) | |
2501 | + continue; | |
2502 | + | |
b7b72b66 | 2503 | + if (offset + sizeof(struct resmov)*2 + rsb->res_length > outlen) { |
c1c6733f | 2504 | + /* Write end-of-block record */ |
b7b72b66 AM |
2505 | + memset(&tmp, 0, sizeof(struct resmov)); |
2506 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); | |
2507 | + offset += sizeof(struct resmov); | |
c1c6733f AM |
2508 | + goto out; |
2509 | + } | |
2510 | + | |
b7b72b66 | 2511 | + memset(&tmp, 0, sizeof(struct resmov)); |
c1c6733f AM |
2512 | + tmp.rm_nodeid = cpu_to_be32(our_nodeid()); |
2513 | + tmp.rm_length = cpu_to_be16(rsb->res_length); | |
2514 | + | |
b7b72b66 AM |
2515 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); |
2516 | + offset += sizeof(struct resmov); | |
c1c6733f AM |
2517 | + |
2518 | + memcpy(outbuf + offset, rsb->res_name, rsb->res_length); | |
2519 | + offset += rsb->res_length; | |
2520 | + } | |
2521 | + | |
2522 | + /* | |
2523 | + * If we've reached the end of the list (and there's room) write a | |
2524 | + * terminating record. | |
2525 | + */ | |
2526 | + | |
2527 | + if ((list == &ls->ls_rootres) && | |
b7b72b66 | 2528 | + (offset + sizeof(struct resmov) <= outlen)) { |
c1c6733f | 2529 | + |
b7b72b66 | 2530 | + memset(&tmp, 0, sizeof(struct resmov)); |
c1c6733f AM |
2531 | + /* This only needs to be non-zero */ |
2532 | + tmp.rm_nodeid = cpu_to_be32(1); | |
2533 | + /* and this must be zero */ | |
2534 | + tmp.rm_length = 0; | |
b7b72b66 AM |
2535 | + memcpy(outbuf + offset, &tmp, sizeof(struct resmov)); |
2536 | + offset += sizeof(struct resmov); | |
c1c6733f AM |
2537 | + } |
2538 | + | |
2539 | + out: | |
b7b72b66 | 2540 | + up_read(&ls->ls_root_lock); |
c1c6733f AM |
2541 | + return offset; |
2542 | +} | |
2543 | + | |
b7b72b66 AM |
2544 | +static int get_entry(struct dlm_ls *ls, uint32_t nodeid, char *name, |
2545 | + int namelen, uint32_t *r_nodeid) | |
c1c6733f | 2546 | +{ |
b7b72b66 | 2547 | + struct dlm_direntry *de, *tmp; |
c1c6733f AM |
2548 | + uint32_t bucket; |
2549 | + | |
b7b72b66 | 2550 | + bucket = dir_hash(ls, name, namelen); |
c1c6733f | 2551 | + |
b7b72b66 AM |
2552 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
2553 | + de = search_bucket(ls, name, namelen, bucket); | |
2554 | + if (de) { | |
2555 | + *r_nodeid = de->master_nodeid; | |
2556 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
2557 | + if (*r_nodeid == nodeid) | |
2558 | + return -EEXIST; | |
2559 | + return 0; | |
2560 | + } | |
c1c6733f | 2561 | + |
b7b72b66 | 2562 | + write_unlock(&ls->ls_dirtbl[bucket].lock); |
c1c6733f | 2563 | + |
b7b72b66 AM |
2564 | + de = allocate_direntry(ls, namelen); |
2565 | + if (!de) | |
c1c6733f AM |
2566 | + return -ENOMEM; |
2567 | + | |
b7b72b66 AM |
2568 | + de->master_nodeid = nodeid; |
2569 | + de->length = namelen; | |
2570 | + memcpy(de->name, name, namelen); | |
c1c6733f | 2571 | + |
b7b72b66 AM |
2572 | + write_lock(&ls->ls_dirtbl[bucket].lock); |
2573 | + tmp = search_bucket(ls, name, namelen, bucket); | |
c1c6733f | 2574 | + if (tmp) { |
b7b72b66 AM |
2575 | + free_direntry(de); |
2576 | + de = tmp; | |
2577 | + } else { | |
2578 | + list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); | |
c1c6733f | 2579 | + } |
b7b72b66 AM |
2580 | + *r_nodeid = de->master_nodeid; |
2581 | + write_unlock(&ls->ls_dirtbl[bucket].lock); | |
c1c6733f AM |
2582 | + return 0; |
2583 | +} | |
2584 | + | |
b7b72b66 AM |
2585 | +int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen, |
2586 | + uint32_t *r_nodeid) | |
2587 | +{ | |
2588 | + return get_entry(ls, nodeid, name, namelen, r_nodeid); | |
2589 | +} | |
2590 | + | |
c1c6733f AM |
2591 | +/* |
2592 | + * The node with lowest id queries all nodes to determine when all are done. | |
2593 | + * All other nodes query the low nodeid for this. | |
2594 | + */ | |
2595 | + | |
b7b72b66 | 2596 | +int dlm_dir_rebuild_wait(struct dlm_ls *ls) |
c1c6733f AM |
2597 | +{ |
2598 | + int error; | |
2599 | + | |
2600 | + if (ls->ls_low_nodeid == our_nodeid()) { | |
b7b72b66 | 2601 | + error = dlm_wait_status_all(ls, RESDIR_VALID); |
c1c6733f AM |
2602 | + if (!error) |
2603 | + set_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags); | |
2604 | + } else | |
b7b72b66 | 2605 | + error = dlm_wait_status_low(ls, RESDIR_ALL_VALID); |
c1c6733f AM |
2606 | + |
2607 | + return error; | |
2608 | +} | |
2609 | diff -urN linux-orig/cluster/dlm/dir.h linux-patched/cluster/dlm/dir.h | |
2610 | --- linux-orig/cluster/dlm/dir.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 2611 | +++ linux-patched/cluster/dlm/dir.h 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 2612 | @@ -0,0 +1,33 @@ |
c1c6733f AM |
2613 | +/****************************************************************************** |
2614 | +******************************************************************************* | |
2615 | +** | |
2616 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2617 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2618 | +** | |
2619 | +** This copyrighted material is made available to anyone wishing to use, | |
2620 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2621 | +** of the GNU General Public License v.2. | |
2622 | +** | |
2623 | +******************************************************************************* | |
2624 | +******************************************************************************/ | |
2625 | + | |
2626 | +#ifndef __DIR_DOT_H__ | |
2627 | +#define __DIR_DOT_H__ | |
2628 | + | |
b7b72b66 AM |
2629 | +void print_name(char *b, int len); |
2630 | +uint32_t name_to_directory_nodeid(struct dlm_ls *ls, char *name, int length); | |
2631 | +uint32_t get_directory_nodeid(struct dlm_rsb *rsb); | |
2632 | + | |
2633 | +int dlm_dir_lookup(struct dlm_ls *ls, uint32_t nodeid, char *name, int namelen, | |
2634 | + uint32_t *r_nodeid); | |
2635 | +void dlm_dir_remove(struct dlm_ls *ls, uint32_t nodeid, char *name, | |
2636 | + int namelen); | |
2637 | +int dlm_dir_rebuild_local(struct dlm_ls *ls); | |
2638 | +int dlm_dir_rebuild_send(struct dlm_ls *ls, char *inbuf, int inlen, | |
2639 | + char *outbuf, int outlen, uint32_t nodeid); | |
2640 | +int dlm_dir_rebuild_wait(struct dlm_ls * ls); | |
2641 | +void dlm_dir_clear(struct dlm_ls *ls); | |
2642 | +void dlm_dir_dump(struct dlm_ls *ls); | |
c783755a | 2643 | +void clear_free_de(struct dlm_ls *ls); |
c1c6733f AM |
2644 | + |
2645 | +#endif /* __DIR_DOT_H__ */ | |
2646 | diff -urN linux-orig/cluster/dlm/dlm_internal.h linux-patched/cluster/dlm/dlm_internal.h | |
2647 | --- linux-orig/cluster/dlm/dlm_internal.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
2648 | +++ linux-patched/cluster/dlm/dlm_internal.h 2004-11-03 11:31:56.000000000 +0800 |
2649 | @@ -0,0 +1,612 @@ | |
c1c6733f AM |
2650 | +/****************************************************************************** |
2651 | +******************************************************************************* | |
2652 | +** | |
2653 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
2654 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
2655 | +** | |
2656 | +** This copyrighted material is made available to anyone wishing to use, | |
2657 | +** modify, copy, or redistribute it subject to the terms and conditions | |
2658 | +** of the GNU General Public License v.2. | |
2659 | +** | |
2660 | +******************************************************************************* | |
2661 | +******************************************************************************/ | |
2662 | + | |
2663 | +#ifndef __DLM_INTERNAL_DOT_H__ | |
2664 | +#define __DLM_INTERNAL_DOT_H__ | |
2665 | + | |
2666 | +/* | |
2667 | + * This is the main header file to be included in each DLM source file. | |
2668 | + */ | |
2669 | + | |
2670 | +#define DLM_RELEASE_NAME "<CVS>" | |
2671 | + | |
2672 | +#include <linux/slab.h> | |
2673 | +#include <linux/sched.h> | |
2674 | +#include <asm/semaphore.h> | |
2675 | +#include <linux/types.h> | |
2676 | +#include <linux/spinlock.h> | |
2677 | +#include <linux/vmalloc.h> | |
2678 | +#include <asm/uaccess.h> | |
2679 | +#include <linux/list.h> | |
2680 | +#include <linux/errno.h> | |
2681 | +#include <linux/random.h> | |
b7b72b66 AM |
2682 | +#include <linux/delay.h> |
2683 | +#include <linux/interrupt.h> | |
2684 | +#include <linux/kthread.h> | |
c1c6733f AM |
2685 | + |
2686 | +#include <cluster/dlm.h> | |
2687 | +#include <cluster/dlm_device.h> | |
2688 | +#include <cluster/service.h> | |
2689 | + | |
2690 | +#ifndef TRUE | |
2691 | +#define TRUE (1) | |
2692 | +#endif | |
2693 | + | |
2694 | +#ifndef FALSE | |
2695 | +#define FALSE (0) | |
2696 | +#endif | |
2697 | + | |
2698 | +#if (BITS_PER_LONG == 64) | |
2699 | +#define PRIu64 "lu" | |
2700 | +#define PRId64 "ld" | |
2701 | +#define PRIo64 "lo" | |
2702 | +#define PRIx64 "lx" | |
2703 | +#define PRIX64 "lX" | |
2704 | +#define SCNu64 "lu" | |
2705 | +#define SCNd64 "ld" | |
2706 | +#define SCNo64 "lo" | |
2707 | +#define SCNx64 "lx" | |
2708 | +#define SCNX64 "lX" | |
2709 | +#else | |
2710 | +#define PRIu64 "Lu" | |
2711 | +#define PRId64 "Ld" | |
2712 | +#define PRIo64 "Lo" | |
2713 | +#define PRIx64 "Lx" | |
2714 | +#define PRIX64 "LX" | |
2715 | +#define SCNu64 "Lu" | |
2716 | +#define SCNd64 "Ld" | |
2717 | +#define SCNo64 "Lo" | |
2718 | +#define SCNx64 "Lx" | |
2719 | +#define SCNX64 "LX" | |
2720 | +#endif | |
2721 | + | |
2722 | +#define wchan_cond_sleep_intr(chan, sleep_cond) \ | |
2723 | +do \ | |
2724 | +{ \ | |
2725 | + DECLARE_WAITQUEUE(__wait_chan, current); \ | |
2726 | + current->state = TASK_INTERRUPTIBLE; \ | |
2727 | + add_wait_queue(&chan, &__wait_chan); \ | |
2728 | + if ((sleep_cond)) \ | |
2729 | + schedule(); \ | |
2730 | + remove_wait_queue(&chan, &__wait_chan); \ | |
2731 | + current->state = TASK_RUNNING; \ | |
2732 | +} \ | |
2733 | +while (0) | |
2734 | + | |
2735 | +static inline int check_timeout(unsigned long stamp, unsigned int seconds) | |
2736 | +{ | |
2737 | + return time_after(jiffies, stamp + seconds * HZ); | |
2738 | +} | |
2739 | + | |
2740 | + | |
2741 | +#define log_print(fmt, args...) printk("dlm: "fmt"\n", ##args) | |
2742 | + | |
2743 | +#define log_all(ls, fmt, args...) \ | |
2744 | + do { \ | |
2745 | + printk("dlm: %s: " fmt "\n", (ls)->ls_name, ##args); \ | |
2746 | + dlm_debug_log(ls, fmt, ##args); \ | |
2747 | + } while (0) | |
2748 | + | |
2749 | +#define log_error log_all | |
2750 | + | |
b7b72b66 AM |
2751 | +#if defined(DLM_DEBUG2) |
2752 | +int nibbler_printf(const char *fmt, ...); | |
2753 | +#define log_debug2(fmt, args...) nibbler_printf(fmt"\n", ##args) | |
2754 | +#else | |
2755 | +#define log_debug2(fmt, args...) | |
2756 | +#endif | |
c1c6733f AM |
2757 | + |
2758 | +#define DLM_DEBUG | |
2759 | +#if defined(DLM_DEBUG) | |
2760 | +#define log_debug(ls, fmt, args...) dlm_debug_log(ls, fmt, ##args) | |
2761 | +#else | |
2762 | +#define log_debug(ls, fmt, args...) | |
2763 | +#endif | |
2764 | + | |
2765 | +#if defined(DLM_DEBUG) && defined(DLM_DEBUG_ALL) | |
2766 | +#undef log_debug | |
2767 | +#define log_debug log_all | |
2768 | +#endif | |
2769 | + | |
2770 | + | |
b7b72b66 | 2771 | +#define DLM_ASSERT(x, do) \ |
c1c6733f AM |
2772 | +{ \ |
2773 | + if (!(x)) \ | |
2774 | + { \ | |
b7b72b66 | 2775 | + dlm_locks_dump(); \ |
c1c6733f AM |
2776 | + dlm_debug_dump(); \ |
2777 | + printk("\nDLM: Assertion failed on line %d of file %s\n" \ | |
2778 | + "DLM: assertion: \"%s\"\n" \ | |
2779 | + "DLM: time = %lu\n", \ | |
2780 | + __LINE__, __FILE__, #x, jiffies); \ | |
2781 | + {do} \ | |
2782 | + printk("\n"); \ | |
2783 | + BUG(); \ | |
2784 | + panic("DLM: Record message above and reboot.\n"); \ | |
2785 | + } \ | |
2786 | +} | |
2787 | + | |
2788 | + | |
b7b72b66 AM |
2789 | +struct dlm_ls; |
2790 | +struct dlm_lkb; | |
2791 | +struct dlm_rsb; | |
2792 | +struct dlm_csb; | |
2793 | +struct dlm_node; | |
2794 | +struct dlm_lkbtable; | |
2795 | +struct dlm_rsbtable; | |
2796 | +struct dlm_dirtable; | |
2797 | +struct dlm_direntry; | |
2798 | +struct dlm_recover; | |
2799 | +struct dlm_header; | |
2800 | +struct dlm_request; | |
2801 | +struct dlm_reply; | |
2802 | +struct dlm_rcom; | |
2803 | +struct dlm_query_request; | |
2804 | +struct dlm_query_reply; | |
c1c6733f | 2805 | + |
c1c6733f | 2806 | + |
b7b72b66 AM |
2807 | +struct dlm_direntry { |
2808 | + struct list_head list; | |
2809 | + uint32_t master_nodeid; | |
2810 | + uint16_t length; | |
2811 | + char name[1]; | |
c1c6733f AM |
2812 | +}; |
2813 | + | |
b7b72b66 AM |
2814 | +struct dlm_dirtable { |
2815 | + struct list_head list; | |
2816 | + rwlock_t lock; | |
c1c6733f AM |
2817 | +}; |
2818 | + | |
b7b72b66 AM |
2819 | +struct dlm_rsbtable { |
2820 | + struct list_head list; | |
2821 | + rwlock_t lock; | |
c1c6733f AM |
2822 | +}; |
2823 | + | |
b7b72b66 AM |
2824 | +struct dlm_lkbtable { |
2825 | + struct list_head list; | |
2826 | + rwlock_t lock; | |
2827 | + uint16_t counter; | |
c1c6733f AM |
2828 | +}; |
2829 | + | |
2830 | +/* | |
2831 | + * Cluster node (per node in cluster) | |
2832 | + */ | |
2833 | + | |
b7b72b66 AM |
2834 | +struct dlm_node { |
2835 | + struct list_head list; | |
2836 | + uint32_t nodeid; | |
c783755a | 2837 | + atomic_t refcount; /* num csb's referencing */ |
c1c6733f AM |
2838 | +}; |
2839 | + | |
2840 | +/* | |
2841 | + * Cluster System Block (per node in a ls) | |
2842 | + */ | |
2843 | + | |
b7b72b66 AM |
2844 | +struct dlm_csb { |
2845 | + struct list_head list; /* per-lockspace node list */ | |
2846 | + struct dlm_node * node; /* global node structure */ | |
2847 | + int gone_event; /* event id when node removed */ | |
c1c6733f AM |
2848 | +}; |
2849 | + | |
2850 | +/* | |
b7b72b66 | 2851 | + * Used to save and manage recovery state for a lockspace. |
c1c6733f AM |
2852 | + */ |
2853 | + | |
b7b72b66 AM |
2854 | +struct dlm_recover { |
2855 | + struct list_head list; | |
2856 | + uint32_t * nodeids; | |
2857 | + int node_count; | |
2858 | + int event_id; | |
c1c6733f AM |
2859 | +}; |
2860 | + | |
2861 | +/* | |
b7b72b66 | 2862 | + * Elements in the range array |
c1c6733f AM |
2863 | + */ |
2864 | + | |
b7b72b66 AM |
2865 | +#define GR_RANGE_START (0) |
2866 | +#define GR_RANGE_END (1) | |
2867 | +#define RQ_RANGE_START (2) | |
2868 | +#define RQ_RANGE_END (3) | |
c1c6733f | 2869 | + |
b7b72b66 AM |
2870 | +/* |
2871 | + * Lockspace structure | |
2872 | + */ | |
2873 | + | |
2874 | +#define LSFL_WORK (0) | |
2875 | +#define LSFL_LS_RUN (1) | |
2876 | +#define LSFL_LS_STOP (2) | |
2877 | +#define LSFL_LS_START (3) | |
2878 | +#define LSFL_LS_FINISH (4) | |
2879 | +#define LSFL_RECCOMM_WAIT (5) | |
2880 | +#define LSFL_RECCOMM_READY (6) | |
2881 | +#define LSFL_NOTIMERS (7) | |
2882 | +#define LSFL_FINISH_RECOVERY (8) | |
2883 | +#define LSFL_RESDIR_VALID (9) | |
2884 | +#define LSFL_ALL_RESDIR_VALID (10) | |
2885 | +#define LSFL_NODES_VALID (11) | |
2886 | +#define LSFL_ALL_NODES_VALID (12) | |
2887 | +#define LSFL_REQUEST_WARN (13) | |
c783755a | 2888 | +#define LSFL_RECOVERD_EXIT (14) |
b7b72b66 AM |
2889 | + |
2890 | +#define LSST_NONE (0) | |
2891 | +#define LSST_INIT (1) | |
2892 | +#define LSST_INIT_DONE (2) | |
2893 | +#define LSST_CLEAR (3) | |
2894 | +#define LSST_WAIT_START (4) | |
2895 | +#define LSST_RECONFIG_DONE (5) | |
2896 | + | |
2897 | +struct dlm_ls { | |
2898 | + struct list_head ls_list; /* list of lockspaces */ | |
2899 | + uint32_t ls_local_id; /* local unique lockspace ID */ | |
2900 | + uint32_t ls_global_id; /* global unique lockspace ID */ | |
2901 | + int ls_allocation; /* Memory allocation policy */ | |
2902 | + int ls_count; /* reference count */ | |
2903 | + unsigned long ls_flags; /* LSFL_ */ | |
2904 | + | |
2905 | + struct dlm_rsbtable * ls_rsbtbl; | |
2906 | + uint32_t ls_rsbtbl_size; | |
2907 | + | |
2908 | + struct dlm_lkbtable * ls_lkbtbl; | |
2909 | + uint32_t ls_lkbtbl_size; | |
2910 | + | |
2911 | + struct dlm_dirtable * ls_dirtbl; | |
2912 | + uint32_t ls_dirtbl_size; | |
2913 | + | |
bb1d8b11 | 2914 | + struct list_head ls_nodes; /* current nodes in ls */ |
b7b72b66 | 2915 | + struct list_head ls_nodes_gone; /* dead node list, recovery */ |
bb1d8b11 | 2916 | + uint32_t ls_num_nodes; /* number of nodes in ls */ |
b7b72b66 | 2917 | + uint32_t ls_low_nodeid; |
bb1d8b11 | 2918 | + uint32_t * ls_node_array; |
b7b72b66 AM |
2919 | + |
2920 | + struct rw_semaphore ls_unlock_sem; /* To prevent unlock on a | |
2921 | + parent lock racing with a | |
2922 | + new child lock */ | |
2923 | + | |
2924 | + struct list_head ls_deadlockq; /* List of locks in conversion | |
2925 | + ordered by duetime. for | |
2926 | + deadlock detection */ | |
2927 | + | |
2928 | + /* recovery related */ | |
2929 | + | |
2930 | + struct task_struct * ls_recoverd_task; | |
c783755a | 2931 | + struct semaphore ls_recoverd_lock; |
b7b72b66 AM |
2932 | + struct list_head ls_recover; /* dlm_recover structs */ |
2933 | + spinlock_t ls_recover_lock; | |
2934 | + int ls_last_stop; | |
2935 | + int ls_last_start; | |
2936 | + int ls_last_finish; | |
2937 | + int ls_state; /* recovery states */ | |
2938 | + | |
2939 | + struct rw_semaphore ls_in_recovery; /* block local requests */ | |
2940 | + struct list_head ls_requestqueue;/* queue remote requests */ | |
2941 | + struct semaphore ls_requestqueue_lock; | |
2942 | + | |
2943 | + struct dlm_rcom * ls_rcom; /* recovery comms */ | |
2944 | + uint32_t ls_rcom_msgid; | |
2945 | + struct semaphore ls_rcom_lock; | |
2946 | + | |
2947 | + struct list_head ls_recover_list; | |
2948 | + spinlock_t ls_recover_list_lock; | |
2949 | + int ls_recover_list_count; | |
2950 | + wait_queue_head_t ls_wait_general; | |
2951 | + | |
2952 | + struct list_head ls_rootres; /* root resources */ | |
2953 | + struct rw_semaphore ls_root_lock; /* protect rootres list */ | |
2954 | + | |
2955 | + struct list_head ls_rebuild_rootrsb_list; /* Root of lock trees | |
2956 | + we're deserialising */ | |
2957 | + int ls_namelen; | |
2958 | + char ls_name[1]; | |
2959 | +}; | |
c1c6733f | 2960 | + |
b7b72b66 AM |
2961 | +/* |
2962 | + * Resource block | |
2963 | + */ | |
c1c6733f | 2964 | + |
b7b72b66 AM |
2965 | +#define RESFL_NEW_MASTER (0) |
2966 | +#define RESFL_RECOVER_LIST (1) | |
2967 | +#define RESFL_MASTER (2) | |
c1c6733f | 2968 | + |
b7b72b66 AM |
2969 | +struct dlm_rsb { |
2970 | + struct list_head res_hashchain; | |
2971 | + uint32_t res_bucket; | |
c1c6733f | 2972 | + |
b7b72b66 | 2973 | + struct dlm_ls * res_ls; /* The owning lockspace */ |
c1c6733f | 2974 | + |
b7b72b66 | 2975 | + struct list_head res_rootlist; /* List of root rsb's */ |
c1c6733f | 2976 | + |
b7b72b66 AM |
2977 | + struct list_head res_subreslist; /* List of all sub-resources |
2978 | + for this root rsb */ | |
c1c6733f | 2979 | + |
b7b72b66 AM |
2980 | + uint8_t res_depth; /* Depth in resource tree */ |
2981 | + unsigned long res_flags; /* Flags, RESFL_ */ | |
c1c6733f | 2982 | + |
b7b72b66 AM |
2983 | + struct list_head res_grantqueue; |
2984 | + struct list_head res_convertqueue; | |
2985 | + struct list_head res_waitqueue; | |
c1c6733f | 2986 | + |
b7b72b66 | 2987 | + uint32_t res_nodeid; /* nodeid of master node */ |
c1c6733f | 2988 | + |
b7b72b66 AM |
2989 | + struct dlm_rsb * res_root; /* root rsb if a subresource */ |
2990 | + struct dlm_rsb * res_parent; /* parent rsb (if any) */ | |
c1c6733f | 2991 | + |
b7b72b66 AM |
2992 | + atomic_t res_ref; /* Number of lkb's */ |
2993 | + uint16_t res_remasterid; /* ID used during remaster */ | |
c1c6733f | 2994 | + |
b7b72b66 AM |
2995 | + struct list_head res_recover_list; /* General list for use |
2996 | + during recovery */ | |
2997 | + int res_recover_msgid; | |
2998 | + int res_newlkid_expect; | |
c1c6733f | 2999 | + |
b7b72b66 | 3000 | + struct rw_semaphore res_lock; |
c1c6733f | 3001 | + |
b7b72b66 | 3002 | + char * res_lvbptr; /* Lock value block */ |
c1c6733f | 3003 | + |
b7b72b66 AM |
3004 | + uint8_t res_length; |
3005 | + char res_name[1]; /* <res_length> bytes */ | |
c1c6733f AM |
3006 | +}; |
3007 | + | |
3008 | +/* | |
b7b72b66 AM |
3009 | + * Lock block. To avoid confusion, where flags mirror the public flags, they |
3010 | + * should have the same value. | |
3011 | + * | |
3012 | + * In general, DLM_LKF flags from dlm.h apply only to lkb_lockqueue_flags | |
3013 | + * and GDLM_LKFLG flags from dlm_internal.h apply only to lkb_flags. | |
3014 | + * The rr_flags field in the request struct is a copy of lkb_lockqueue_flags. | |
3015 | + * There is one dangerous exception: GDLM_LKFLG_RANGE is set in rr_flags | |
3016 | + * when sending a remote range lock request. This value is then copied into | |
3017 | + * the remote lkb_lockqueue_flags field. This means GDLM_LKFLG_RANGE must | |
3018 | + * not have the same value as any external DLM_LKF flag. | |
3019 | + */ | |
3020 | + | |
3021 | +#define GDLM_LKSTS_NEW (0) | |
3022 | +#define GDLM_LKSTS_WAITING (1) | |
3023 | +#define GDLM_LKSTS_GRANTED (2) | |
3024 | +#define GDLM_LKSTS_CONVERT (3) | |
3025 | + | |
3026 | +/* mirror external flags */ | |
3027 | +#define GDLM_LKFLG_VALBLK (0x00000008) | |
3028 | +#define GDLM_LKFLG_PERSISTENT (0x00000080) | |
3029 | +#define GDLM_LKFLG_NODLCKWT (0x00000100) | |
3030 | +#define GDLM_LKFLG_EXPEDITE (0x00000400) | |
c783755a AM |
3031 | +#define GDLM_LKFLG_ORPHAN (0x00004000) |
3032 | +/* external flags now go up to: (0x00004000) : DLM_LKF_ORPHAN */ | |
b7b72b66 AM |
3033 | + |
3034 | +/* internal-only flags */ | |
3035 | +#define GDLM_LKFLG_RANGE (0x00010000) | |
3036 | +#define GDLM_LKFLG_MSTCPY (0x00020000) | |
3037 | +#define GDLM_LKFLG_DELETED (0x00040000) | |
3038 | +#define GDLM_LKFLG_LQCONVERT (0x00080000) | |
3039 | +#define GDLM_LKFLG_LQRESEND (0x00100000) | |
3040 | +#define GDLM_LKFLG_DEMOTED (0x00200000) | |
3041 | +#define GDLM_LKFLG_RESENT (0x00400000) | |
3042 | +#define GDLM_LKFLG_NOREBUILD (0x00800000) | |
3043 | +#define GDLM_LKFLG_UNLOCKDONE (0x01000000) | |
3044 | + | |
3045 | +#define AST_COMP (1) | |
3046 | +#define AST_BAST (2) | |
3047 | +#define AST_DEL (4) | |
3048 | + | |
3049 | +struct dlm_lkb { | |
3050 | + uint32_t lkb_flags; | |
3051 | + uint16_t lkb_status; /* grant, wait, convert */ | |
3052 | + int8_t lkb_rqmode; /* requested lock mode */ | |
3053 | + int8_t lkb_grmode; /* granted lock mode */ | |
3054 | + uint32_t lkb_retstatus; /* status to return in lksb */ | |
3055 | + uint32_t lkb_id; /* our lock ID */ | |
3056 | + struct dlm_lksb * lkb_lksb; /* status block of caller */ | |
3057 | + struct list_head lkb_idtbl_list; /* lockidtbl */ | |
3058 | + struct list_head lkb_statequeue; /* rsb's g/c/w queue */ | |
3059 | + struct dlm_rsb * lkb_resource; | |
3060 | + struct dlm_lkb * lkb_parent; /* parent lock if any */ | |
3061 | + atomic_t lkb_childcnt; /* number of children */ | |
3062 | + | |
3063 | + struct list_head lkb_lockqueue; /* queue of locks waiting | |
3064 | + for remote reply */ | |
3065 | + int lkb_lockqueue_state; /* reason on lockqueue */ | |
3066 | + uint32_t lkb_lockqueue_flags; /* as passed into | |
3067 | + lock/unlock */ | |
3068 | + int lkb_ownpid; /* pid of lock owner */ | |
3069 | + unsigned long lkb_lockqueue_time; /* time lkb went on the | |
3070 | + lockqueue */ | |
3071 | + unsigned long lkb_duetime; /* for deadlock detection */ | |
3072 | + | |
3073 | + uint32_t lkb_remid; /* id on remote partner */ | |
3074 | + uint32_t lkb_nodeid; /* id of remote partner */ | |
b7b72b66 AM |
3075 | + void * lkb_astaddr; |
3076 | + void * lkb_bastaddr; | |
3077 | + long lkb_astparam; | |
3078 | + struct list_head lkb_astqueue; /* locks with asts to deliver */ | |
3079 | + uint16_t lkb_astflags; /* COMP, BAST, DEL */ | |
3080 | + uint8_t lkb_bastmode; /* requested mode */ | |
3081 | + uint8_t lkb_highbast; /* highest mode bast sent for */ | |
3082 | + | |
3083 | + struct dlm_request * lkb_request; | |
3084 | + | |
3085 | + struct list_head lkb_deadlockq; /* ls_deadlockq list */ | |
3086 | + | |
3087 | + char * lkb_lvbptr; /* points to lksb lvb on local | |
3088 | + lock, allocated lvb on | |
3089 | + on remote lock */ | |
3090 | + uint64_t * lkb_range; /* Points to an array of 64 bit | |
3091 | + numbers that represent the | |
3092 | + requested and granted ranges | |
3093 | + of the lock. NULL implies | |
3094 | + 0-ffffffffffffffff */ | |
c1c6733f AM |
3095 | +}; |
3096 | + | |
3097 | +/* | |
3098 | + * Header part of the mid-level comms system. All packets start with | |
3099 | + * this header so we can identify them. The comms packet can | |
3100 | + * contain many of these structs but the are split into individual | |
3101 | + * work units before being passed to the lockqueue routines. | |
3102 | + * below this are the structs that this is a header for | |
3103 | + */ | |
3104 | + | |
b7b72b66 AM |
3105 | +struct dlm_header { |
3106 | + uint8_t rh_cmd; /* What we are */ | |
3107 | + uint8_t rh_flags; /* maybe just a pad */ | |
3108 | + uint16_t rh_length; /* Length of struct (so we can | |
3109 | + send many in 1 message) */ | |
3110 | + uint32_t rh_lkid; /* Lock ID tag: ie the local | |
3111 | + (requesting) lock ID */ | |
3112 | + uint32_t rh_lockspace; /* Lockspace ID */ | |
3113 | +} __attribute__((packed)); | |
c1c6733f AM |
3114 | + |
3115 | +/* | |
3116 | + * This is the struct used in a remote lock/unlock/convert request | |
3117 | + * The mid-level comms API should turn this into native byte order. | |
3118 | + * Most "normal" lock operations will use these two structs for | |
3119 | + * communications. Recovery operations use their own structs | |
3120 | + * but still with the gd_req_header on the front. | |
3121 | + */ | |
3122 | + | |
b7b72b66 AM |
3123 | +struct dlm_request { |
3124 | + struct dlm_header rr_header; | |
3125 | + uint32_t rr_remlkid; /* Remote lock ID */ | |
3126 | + uint32_t rr_remparid; /* Parent's remote lock ID */ | |
3127 | + uint32_t rr_flags; /* Flags from lock/convert req*/ | |
3128 | + uint64_t rr_range_start; /* Yes, these are in the right | |
3129 | + place... */ | |
3130 | + uint64_t rr_range_end; | |
3131 | + uint32_t rr_status; /* Status to return if this is | |
3132 | + an AST request */ | |
3133 | + uint32_t rr_pid; /* Owner PID of lock */ | |
3134 | + uint8_t rr_rqmode; /* Requested lock mode */ | |
3135 | + uint8_t rr_asts; /* Whether the LKB has ASTs */ | |
3136 | + char rr_lvb[DLM_LVB_LEN]; | |
3137 | + char rr_name[1]; /* As long as needs be. Only | |
3138 | + used for directory lookups. | |
3139 | + The length of this can be | |
3140 | + worked out from the packet | |
3141 | + length */ | |
3142 | +} __attribute__((packed)); | |
c1c6733f AM |
3143 | + |
3144 | +/* | |
3145 | + * This is the struct returned by a remote lock/unlock/convert request | |
3146 | + * The mid-level comms API should turn this into native byte order. | |
3147 | + */ | |
3148 | + | |
b7b72b66 AM |
3149 | +struct dlm_reply { |
3150 | + struct dlm_header rl_header; | |
3151 | + uint32_t rl_lockstate; /* Whether request was | |
3152 | + queued/granted/waiting */ | |
3153 | + uint32_t rl_nodeid; /* nodeid of lock master */ | |
3154 | + uint32_t rl_status; /* Status to return to caller */ | |
3155 | + uint32_t rl_lkid; /* Remote lkid */ | |
3156 | + char rl_lvb[DLM_LVB_LEN]; | |
3157 | +} __attribute__((packed)); | |
c1c6733f AM |
3158 | + |
3159 | +/* | |
3160 | + * Recovery comms message | |
3161 | + */ | |
3162 | + | |
b7b72b66 AM |
3163 | +struct dlm_rcom { |
3164 | + struct dlm_header rc_header; /* 32 byte aligned */ | |
3165 | + uint32_t rc_msgid; | |
3166 | + uint16_t rc_datalen; | |
3167 | + uint8_t rc_expanded; | |
3168 | + uint8_t rc_subcmd; /* secondary command */ | |
3169 | + char rc_buf[1]; /* first byte of data goes here | |
3170 | + and extends beyond here for | |
3171 | + another datalen - 1 bytes. | |
3172 | + rh_length is set to sizeof | |
3173 | + dlm_rcom + datalen - 1 */ | |
3174 | +} __attribute__((packed)); | |
c1c6733f AM |
3175 | + |
3176 | + | |
3177 | +/* A remote query: GDLM_REMCMD_QUERY */ | |
c1c6733f | 3178 | + |
b7b72b66 AM |
3179 | +struct dlm_query_request { |
3180 | + struct dlm_header rq_header; | |
3181 | + uint32_t rq_mstlkid; /* LockID on master node */ | |
3182 | + uint32_t rq_query; /* query from the user */ | |
3183 | + uint32_t rq_maxlocks; /* max number of locks we can | |
3184 | + cope with */ | |
3185 | +} __attribute__((packed)); | |
c1c6733f AM |
3186 | + |
3187 | +/* First block of a reply query. cmd = GDLM_REMCMD_QUERY */ | |
3188 | +/* There may be subsequent blocks of | |
3189 | + lock info in GDLM_REMCMD_QUERYCONT messages which just have | |
3190 | + a normal header. The last of these will have rh_flags set to | |
3191 | + GDLM_REMFLAG_ENDQUERY | |
3192 | + */ | |
c1c6733f | 3193 | + |
b7b72b66 AM |
3194 | +struct dlm_query_reply { |
3195 | + struct dlm_header rq_header; | |
3196 | + uint32_t rq_numlocks; /* Number of locks in reply */ | |
3197 | + uint32_t rq_startlock; /* Which lock this block starts | |
3198 | + at (for multi-block replies) */ | |
3199 | + uint32_t rq_status; | |
c1c6733f | 3200 | + |
b7b72b66 AM |
3201 | + /* Resource information */ |
3202 | + uint32_t rq_grantcount; /* No. of nodes on grantqueue */ | |
3203 | + uint32_t rq_convcount; /* No. of nodes on convertq */ | |
3204 | + uint32_t rq_waitcount; /* No. of nodes on waitqueue */ | |
3205 | + char rq_valblk[DLM_LVB_LEN]; /* Master's LVB | |
3206 | + contents, if | |
3207 | + applicable */ | |
3208 | +} __attribute__((packed)); | |
c1c6733f AM |
3209 | + |
3210 | +/* | |
3211 | + * Lockqueue wait lock states | |
3212 | + */ | |
3213 | + | |
b7b72b66 AM |
3214 | +#define GDLM_LQSTATE_WAIT_RSB 1 |
3215 | +#define GDLM_LQSTATE_WAIT_CONVERT 2 | |
3216 | +#define GDLM_LQSTATE_WAIT_CONDGRANT 3 | |
3217 | +#define GDLM_LQSTATE_WAIT_UNLOCK 4 | |
c1c6733f AM |
3218 | + |
3219 | +/* Commands sent across the comms link */ | |
b7b72b66 AM |
3220 | +#define GDLM_REMCMD_LOOKUP 1 |
3221 | +#define GDLM_REMCMD_LOCKREQUEST 2 | |
3222 | +#define GDLM_REMCMD_UNLOCKREQUEST 3 | |
3223 | +#define GDLM_REMCMD_CONVREQUEST 4 | |
3224 | +#define GDLM_REMCMD_LOCKREPLY 5 | |
3225 | +#define GDLM_REMCMD_LOCKGRANT 6 | |
3226 | +#define GDLM_REMCMD_SENDBAST 7 | |
3227 | +#define GDLM_REMCMD_SENDCAST 8 | |
3228 | +#define GDLM_REMCMD_REM_RESDATA 9 | |
3229 | +#define GDLM_REMCMD_RECOVERMESSAGE 20 | |
3230 | +#define GDLM_REMCMD_RECOVERREPLY 21 | |
3231 | +#define GDLM_REMCMD_QUERY 30 | |
3232 | +#define GDLM_REMCMD_QUERYREPLY 31 | |
c1c6733f AM |
3233 | + |
3234 | +/* Set in rh_flags when this is the last block of | |
3235 | + query information. Note this could also be the first | |
3236 | + block */ | |
3237 | +#define GDLM_REMFLAG_ENDQUERY 1 | |
3238 | + | |
c783755a AM |
3239 | +#ifdef CONFIG_DLM_STATS |
3240 | +struct dlm_statinfo | |
3241 | +{ | |
3242 | + unsigned int cast; | |
3243 | + unsigned int bast; | |
3244 | + unsigned int lockops; | |
3245 | + unsigned int unlockops; | |
3246 | + unsigned int convertops; | |
3247 | + unsigned long lockqueue_time[5]; | |
3248 | + unsigned long lockqueue_locks[5]; | |
3249 | +}; | |
3250 | +extern struct dlm_statinfo dlm_stats; | |
3251 | +#endif | |
3252 | + | |
c1c6733f AM |
3253 | +#ifndef BUG_ON |
3254 | +#define BUG_ON(x) | |
3255 | +#endif | |
3256 | + | |
b7b72b66 | 3257 | +void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...); |
c1c6733f | 3258 | +void dlm_debug_dump(void); |
b7b72b66 | 3259 | +void dlm_locks_dump(void); |
c1c6733f AM |
3260 | + |
3261 | +#endif /* __DLM_INTERNAL_DOT_H__ */ | |
3262 | diff -urN linux-orig/cluster/dlm/lkb.c linux-patched/cluster/dlm/lkb.c | |
3263 | --- linux-orig/cluster/dlm/lkb.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 3264 | +++ linux-patched/cluster/dlm/lkb.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 3265 | @@ -0,0 +1,183 @@ |
c1c6733f AM |
3266 | +/****************************************************************************** |
3267 | +******************************************************************************* | |
3268 | +** | |
3269 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
3270 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
3271 | +** | |
3272 | +** This copyrighted material is made available to anyone wishing to use, | |
3273 | +** modify, copy, or redistribute it subject to the terms and conditions | |
3274 | +** of the GNU General Public License v.2. | |
3275 | +** | |
3276 | +******************************************************************************* | |
3277 | +******************************************************************************/ | |
3278 | + | |
3279 | +/* | |
3280 | + * lkb.c | |
3281 | + * | |
3282 | + * Allocate and free locks on the lock ID table. | |
3283 | + * | |
3284 | + * This is slightly naff but I don't really like the | |
3285 | + * VMS lockidtbl stuff as it uses a realloced array | |
3286 | + * to hold the locks in. I think this is slightly better | |
3287 | + * in some ways. | |
3288 | + * | |
3289 | + * Any better suggestions gratefully received. Patrick | |
3290 | + * | |
3291 | + */ | |
3292 | + | |
3293 | +#include "dlm_internal.h" | |
3294 | +#include "lockqueue.h" | |
3295 | +#include "lkb.h" | |
3296 | +#include "config.h" | |
3297 | +#include "rsb.h" | |
3298 | +#include "memory.h" | |
3299 | +#include "lockspace.h" | |
3300 | +#include "util.h" | |
3301 | + | |
3302 | +/* | |
3303 | + * Internal find lock by ID. Must be called with the lockidtbl spinlock held. | |
3304 | + */ | |
3305 | + | |
b7b72b66 | 3306 | +static struct dlm_lkb *__find_lock_by_id(struct dlm_ls *ls, uint32_t lkid) |
c1c6733f | 3307 | +{ |
b7b72b66 AM |
3308 | + uint16_t bucket = lkid & 0xFFFF; |
3309 | + struct dlm_lkb *lkb; | |
c1c6733f | 3310 | + |
b7b72b66 | 3311 | + if (bucket >= ls->ls_lkbtbl_size) |
c1c6733f AM |
3312 | + goto out; |
3313 | + | |
b7b72b66 | 3314 | + list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list){ |
c1c6733f AM |
3315 | + if (lkb->lkb_id == lkid) |
3316 | + return lkb; | |
3317 | + } | |
b7b72b66 | 3318 | + out: |
c1c6733f AM |
3319 | + return NULL; |
3320 | +} | |
3321 | + | |
3322 | +/* | |
c1c6733f AM |
3323 | + * LKB lkid's are 32 bits and have two 16 bit parts. The bottom 16 bits are a |
3324 | + * random number between 0 and lockidtbl_size-1. This random number specifies | |
3325 | + * the "bucket" for the lkb in lockidtbl. The upper 16 bits are a sequentially | |
3326 | + * assigned per-bucket id. | |
3327 | + * | |
3328 | + * Because the 16 bit id's per bucket can roll over, a new lkid must be checked | |
3329 | + * against the lkid of all lkb's in the bucket to avoid duplication. | |
3330 | + * | |
3331 | + */ | |
3332 | + | |
b7b72b66 | 3333 | +struct dlm_lkb *create_lkb(struct dlm_ls *ls) |
c1c6733f | 3334 | +{ |
b7b72b66 | 3335 | + struct dlm_lkb *lkb; |
c1c6733f AM |
3336 | + uint32_t lkid; |
3337 | + uint16_t bucket; | |
3338 | + | |
3339 | + lkb = allocate_lkb(ls); | |
3340 | + if (!lkb) | |
3341 | + goto out; | |
3342 | + | |
b7b72b66 AM |
3343 | + retry: |
3344 | + get_random_bytes(&bucket, sizeof(bucket)); | |
3345 | + bucket &= (ls->ls_lkbtbl_size - 1); | |
c1c6733f | 3346 | + |
b7b72b66 | 3347 | + write_lock(&ls->ls_lkbtbl[bucket].lock); |
c1c6733f | 3348 | + |
b7b72b66 AM |
3349 | + lkid = bucket | (ls->ls_lkbtbl[bucket].counter++ << 16); |
3350 | + | |
3351 | + if (__find_lock_by_id(ls, lkid)) { | |
3352 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); | |
3353 | + goto retry; | |
3354 | + } | |
3355 | + | |
3356 | + lkb->lkb_id = lkid; | |
3357 | + list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list); | |
3358 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); | |
3359 | + out: | |
c1c6733f AM |
3360 | + return lkb; |
3361 | +} | |
3362 | + | |
3363 | +/* | |
3364 | + * Free LKB and remove it from the lockidtbl. | |
3365 | + * NB - this always frees the lkb whereas release_rsb doesn't free an | |
3366 | + * rsb unless its reference count is zero. | |
3367 | + */ | |
3368 | + | |
b7b72b66 | 3369 | +void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) |
c1c6733f | 3370 | +{ |
b7b72b66 AM |
3371 | + uint16_t bucket = lkb->lkb_id & 0xFFFF; |
3372 | + | |
c1c6733f AM |
3373 | + if (lkb->lkb_status) { |
3374 | + log_error(ls, "release lkb with status %u", lkb->lkb_status); | |
3375 | + print_lkb(lkb); | |
3376 | + return; | |
3377 | + } | |
3378 | + | |
3379 | + if (lkb->lkb_parent) | |
3380 | + atomic_dec(&lkb->lkb_parent->lkb_childcnt); | |
3381 | + | |
b7b72b66 | 3382 | + write_lock(&ls->ls_lkbtbl[bucket].lock); |
c1c6733f | 3383 | + list_del(&lkb->lkb_idtbl_list); |
b7b72b66 | 3384 | + write_unlock(&ls->ls_lkbtbl[bucket].lock); |
c1c6733f AM |
3385 | + |
3386 | + /* if this is not a master copy then lvbptr points into the user's | |
3387 | + * lksb, so don't free it */ | |
3388 | + if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY) | |
3389 | + free_lvb(lkb->lkb_lvbptr); | |
3390 | + | |
3391 | + if (lkb->lkb_range) | |
3392 | + free_range(lkb->lkb_range); | |
3393 | + | |
3394 | + free_lkb(lkb); | |
3395 | +} | |
3396 | + | |
b7b72b66 | 3397 | +struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid) |
c1c6733f | 3398 | +{ |
b7b72b66 AM |
3399 | + struct dlm_lkb *lkb; |
3400 | + uint16_t bucket = lkid & 0xFFFF; | |
c1c6733f | 3401 | + |
b7b72b66 | 3402 | + read_lock(&ls->ls_lkbtbl[bucket].lock); |
c1c6733f | 3403 | + lkb = __find_lock_by_id(ls, lkid); |
b7b72b66 | 3404 | + read_unlock(&ls->ls_lkbtbl[bucket].lock); |
c1c6733f AM |
3405 | + |
3406 | + return lkb; | |
3407 | +} | |
3408 | + | |
b7b72b66 | 3409 | +struct dlm_lkb *dlm_get_lkb(void *lockspace, uint32_t lkid) |
c1c6733f | 3410 | +{ |
b7b72b66 AM |
3411 | + struct dlm_ls *ls = find_lockspace_by_local_id(lockspace); |
3412 | + struct dlm_lkb *lkb = find_lock_by_id(ls, lkid); | |
3413 | + put_lockspace(ls); | |
3414 | + return lkb; | |
c1c6733f AM |
3415 | +} |
3416 | + | |
3417 | +/* | |
3418 | + * Initialise the range parts of an LKB. | |
3419 | + */ | |
3420 | + | |
b7b72b66 | 3421 | +int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end) |
c1c6733f AM |
3422 | +{ |
3423 | + int ret = -ENOMEM; | |
3424 | + | |
3425 | + /* | |
3426 | + * if this wasn't already a range lock, make it one | |
3427 | + */ | |
3428 | + if (!lkb->lkb_range) { | |
3429 | + lkb->lkb_range = allocate_range(lspace); | |
3430 | + if (!lkb->lkb_range) | |
3431 | + goto out; | |
3432 | + | |
3433 | + /* | |
3434 | + * This is needed for conversions that contain ranges where the | |
3435 | + * original lock didn't but it's harmless for new locks too. | |
3436 | + */ | |
3437 | + lkb->lkb_range[GR_RANGE_START] = 0LL; | |
3438 | + lkb->lkb_range[GR_RANGE_END] = 0xffffffffffffffffULL; | |
3439 | + } | |
3440 | + | |
3441 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
3442 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
3443 | + | |
3444 | + ret = 0; | |
3445 | + | |
3446 | + out: | |
3447 | + return ret; | |
3448 | +} | |
3449 | diff -urN linux-orig/cluster/dlm/lkb.h linux-patched/cluster/dlm/lkb.h | |
3450 | --- linux-orig/cluster/dlm/lkb.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 3451 | +++ linux-patched/cluster/dlm/lkb.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 3452 | @@ -0,0 +1,23 @@ |
c1c6733f AM |
3453 | +/****************************************************************************** |
3454 | +******************************************************************************* | |
3455 | +** | |
3456 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
3457 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
3458 | +** | |
3459 | +** This copyrighted material is made available to anyone wishing to use, | |
3460 | +** modify, copy, or redistribute it subject to the terms and conditions | |
3461 | +** of the GNU General Public License v.2. | |
3462 | +** | |
3463 | +******************************************************************************* | |
3464 | +******************************************************************************/ | |
3465 | + | |
3466 | +#ifndef __LKB_DOT_H__ | |
3467 | +#define __LKB_DOT_H__ | |
3468 | + | |
b7b72b66 AM |
3469 | +struct dlm_lkb *find_lock_by_id(struct dlm_ls *ls, uint32_t lkid); |
3470 | +struct dlm_lkb *create_lkb(struct dlm_ls *ls); | |
3471 | +void release_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb); | |
3472 | +struct dlm_lkb *dlm_get_lkb(void *ls, uint32_t lkid); | |
3473 | +int lkb_set_range(struct dlm_ls *lspace, struct dlm_lkb *lkb, uint64_t start, uint64_t end); | |
c1c6733f AM |
3474 | + |
3475 | +#endif /* __LKB_DOT_H__ */ | |
3476 | diff -urN linux-orig/cluster/dlm/locking.c linux-patched/cluster/dlm/locking.c | |
3477 | --- linux-orig/cluster/dlm/locking.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 3478 | +++ linux-patched/cluster/dlm/locking.c 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 3479 | @@ -0,0 +1,1378 @@ |
c1c6733f AM |
3480 | +/****************************************************************************** |
3481 | +******************************************************************************* | |
3482 | +** | |
3483 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
3484 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
b7b72b66 | 3485 | +** |
c1c6733f AM |
3486 | +** This copyrighted material is made available to anyone wishing to use, |
3487 | +** modify, copy, or redistribute it subject to the terms and conditions | |
3488 | +** of the GNU General Public License v.2. | |
3489 | +** | |
3490 | +******************************************************************************* | |
3491 | +******************************************************************************/ | |
3492 | + | |
b7b72b66 | 3493 | +/* |
c1c6733f AM |
3494 | + * locking.c |
3495 | + * | |
3496 | + * This is where the main work of the DLM goes on | |
3497 | + * | |
3498 | + */ | |
3499 | + | |
3500 | +#include "dlm_internal.h" | |
3501 | +#include "lockqueue.h" | |
3502 | +#include "locking.h" | |
3503 | +#include "lockspace.h" | |
3504 | +#include "lkb.h" | |
3505 | +#include "nodes.h" | |
3506 | +#include "dir.h" | |
3507 | +#include "ast.h" | |
3508 | +#include "memory.h" | |
3509 | +#include "rsb.h" | |
b7b72b66 AM |
3510 | +#include "util.h" |
3511 | +#include "lowcomms.h" | |
3512 | + | |
3513 | +extern struct list_head lslist; | |
c1c6733f AM |
3514 | + |
3515 | +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) | |
3516 | + | |
b7b72b66 | 3517 | +/* |
c1c6733f AM |
3518 | + * Lock compatibilty matrix - thanks Steve |
3519 | + * UN = Unlocked state. Not really a state, used as a flag | |
3520 | + * PD = Padding. Used to make the matrix a nice power of two in size | |
3521 | + * Other states are the same as the VMS DLM. | |
3522 | + * Usage: matrix[grmode+1][rqmode+1] (although m[rq+1][gr+1] is the same) | |
3523 | + */ | |
3524 | + | |
3525 | +#define modes_compat(gr, rq) \ | |
3526 | + __dlm_compat_matrix[(gr)->lkb_grmode + 1][(rq)->lkb_rqmode + 1] | |
3527 | + | |
3528 | +const int __dlm_compat_matrix[8][8] = { | |
3529 | + /* UN NL CR CW PR PW EX PD */ | |
3530 | + {1, 1, 1, 1, 1, 1, 1, 0}, /* UN */ | |
3531 | + {1, 1, 1, 1, 1, 1, 1, 0}, /* NL */ | |
3532 | + {1, 1, 1, 1, 1, 1, 0, 0}, /* CR */ | |
3533 | + {1, 1, 1, 1, 0, 0, 0, 0}, /* CW */ | |
3534 | + {1, 1, 1, 0, 1, 0, 0, 0}, /* PR */ | |
3535 | + {1, 1, 1, 0, 0, 0, 0, 0}, /* PW */ | |
3536 | + {1, 1, 0, 0, 0, 0, 0, 0}, /* EX */ | |
3537 | + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | |
3538 | +}; | |
3539 | + | |
b7b72b66 | 3540 | +/* |
c1c6733f AM |
3541 | + * Compatibility matrix for conversions with QUECVT set. |
3542 | + * Granted mode is the row; requested mode is the column. | |
3543 | + * Usage: matrix[grmode+1][rqmode+1] | |
3544 | + */ | |
3545 | + | |
3546 | +const int __quecvt_compat_matrix[8][8] = { | |
3547 | + /* UN NL CR CW PR PW EX PD */ | |
3548 | + {0, 0, 0, 0, 0, 0, 0, 0}, /* UN */ | |
3549 | + {0, 0, 1, 1, 1, 1, 1, 0}, /* NL */ | |
3550 | + {0, 0, 0, 1, 1, 1, 1, 0}, /* CR */ | |
3551 | + {0, 0, 0, 0, 1, 1, 1, 0}, /* CW */ | |
3552 | + {0, 0, 0, 1, 0, 1, 1, 0}, /* PR */ | |
3553 | + {0, 0, 0, 0, 0, 0, 1, 0}, /* PW */ | |
3554 | + {0, 0, 0, 0, 0, 0, 0, 0}, /* EX */ | |
3555 | + {0, 0, 0, 0, 0, 0, 0, 0} /* PD */ | |
3556 | +}; | |
3557 | + | |
b7b72b66 | 3558 | +/* |
c1c6733f AM |
3559 | + * This defines the direction of transfer of LVB data. |
3560 | + * Granted mode is the row; requested mode is the column. | |
3561 | + * Usage: matrix[grmode+1][rqmode+1] | |
3562 | + * 1 = LVB is returned to the caller | |
3563 | + * 0 = LVB is written to the resource | |
3564 | + * -1 = nothing happens to the LVB | |
3565 | + */ | |
3566 | + | |
3567 | +const int __lvb_operations[8][8] = { | |
3568 | + /* UN NL CR CW PR PW EX PD*/ | |
3569 | + { -1, 1, 1, 1, 1, 1, 1, -1 }, /* UN */ | |
3570 | + { -1, 1, 1, 1, 1, 1, 1, 0 }, /* NL */ | |
3571 | + { -1, -1, 1, 1, 1, 1, 1, 0 }, /* CR */ | |
3572 | + { -1, -1, -1, 1, 1, 1, 1, 0 }, /* CW */ | |
3573 | + { -1, -1, -1, -1, 1, 1, 1, 0 }, /* PR */ | |
3574 | + { -1, 0, 0, 0, 0, 0, 1, 0 }, /* PW */ | |
3575 | + { -1, 0, 0, 0, 0, 0, 0, 0 }, /* EX */ | |
3576 | + { -1, 0, 0, 0, 0, 0, 0, 0 } /* PD */ | |
3577 | +}; | |
3578 | + | |
b7b72b66 AM |
3579 | +static void grant_lock(struct dlm_lkb *lkb, int send_remote); |
3580 | +static void send_blocking_asts(struct dlm_rsb *rsb, struct dlm_lkb *lkb); | |
3581 | +static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb); | |
3582 | +static int convert_lock(struct dlm_ls *ls, int mode, struct dlm_lksb *lksb, | |
3583 | + uint32_t flags, void *ast, void *astarg, void *bast, | |
c1c6733f | 3584 | + struct dlm_range *range); |
b7b72b66 AM |
3585 | +static int dlm_lock_stage1(struct dlm_ls *ls, struct dlm_lkb *lkb, |
3586 | + uint32_t flags, char *name, int namelen); | |
c1c6733f AM |
3587 | + |
3588 | + | |
b7b72b66 | 3589 | +inline int dlm_modes_compat(int mode1, int mode2) |
c1c6733f | 3590 | +{ |
b7b72b66 AM |
3591 | + return __dlm_compat_matrix[mode1 + 1][mode2 + 1]; |
3592 | +} | |
3593 | + | |
3594 | +static inline int first_in_list(struct dlm_lkb *lkb, struct list_head *head) | |
3595 | +{ | |
3596 | + struct dlm_lkb *first = list_entry(head->next, struct dlm_lkb, lkb_statequeue); | |
c1c6733f AM |
3597 | + |
3598 | + if (lkb->lkb_id == first->lkb_id) | |
3599 | + return 1; | |
3600 | + | |
3601 | + return 0; | |
3602 | +} | |
3603 | + | |
b7b72b66 | 3604 | +/* |
c1c6733f AM |
3605 | + * Return 1 if the locks' ranges overlap |
3606 | + * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff | |
3607 | + */ | |
3608 | + | |
b7b72b66 | 3609 | +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) |
c1c6733f AM |
3610 | +{ |
3611 | + if (!lkb1->lkb_range || !lkb2->lkb_range) | |
3612 | + return 1; | |
3613 | + | |
3614 | + if (lkb1->lkb_range[RQ_RANGE_END] < lkb2->lkb_range[GR_RANGE_START] || | |
3615 | + lkb1->lkb_range[RQ_RANGE_START] > lkb2->lkb_range[GR_RANGE_END]) | |
3616 | + return 0; | |
3617 | + | |
3618 | + return 1; | |
3619 | +} | |
3620 | + | |
3621 | +/* | |
c1c6733f AM |
3622 | + * "A conversion deadlock arises with a pair of lock requests in the converting |
3623 | + * queue for one resource. The granted mode of each lock blocks the requested | |
3624 | + * mode of the other lock." | |
3625 | + */ | |
3626 | + | |
b7b72b66 AM |
3627 | +static struct dlm_lkb *conversion_deadlock_detect(struct dlm_rsb *rsb, |
3628 | + struct dlm_lkb *lkb) | |
c1c6733f | 3629 | +{ |
b7b72b66 | 3630 | + struct dlm_lkb *this; |
c1c6733f AM |
3631 | + |
3632 | + list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) { | |
3633 | + if (this == lkb) | |
3634 | + continue; | |
3635 | + | |
3636 | + if (!ranges_overlap(lkb, this)) | |
3637 | + continue; | |
3638 | + | |
3639 | + if (!modes_compat(this, lkb) && !modes_compat(lkb, this)) | |
b7b72b66 | 3640 | + return this; |
c1c6733f | 3641 | + } |
b7b72b66 AM |
3642 | + |
3643 | + return NULL; | |
c1c6733f AM |
3644 | +} |
3645 | + | |
3646 | +/* | |
3647 | + * Check if the given lkb conflicts with another lkb on the queue. | |
3648 | + */ | |
3649 | + | |
b7b72b66 | 3650 | +static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb) |
c1c6733f | 3651 | +{ |
b7b72b66 | 3652 | + struct dlm_lkb *this; |
c1c6733f AM |
3653 | + |
3654 | + list_for_each_entry(this, head, lkb_statequeue) { | |
3655 | + if (this == lkb) | |
3656 | + continue; | |
3657 | + if (ranges_overlap(lkb, this) && !modes_compat(this, lkb)) | |
3658 | + return TRUE; | |
3659 | + } | |
3660 | + return FALSE; | |
3661 | +} | |
3662 | + | |
3663 | +/* | |
b7b72b66 AM |
3664 | + * Return 1 if the lock can be granted, 0 otherwise. |
3665 | + * Also detect and resolve conversion deadlocks. | |
3666 | + * | |
3667 | + * lkb is the lock to be granted | |
3668 | + * | |
3669 | + * now is 1 if the function is being called in the context of the | |
3670 | + * immediate request, it is 0 if called later, after the lock has been | |
3671 | + * queued. | |
3672 | + * | |
3673 | + * References are from chapter 6 of "VAXcluster Principles" by Roy Davis | |
c1c6733f AM |
3674 | + */ |
3675 | + | |
b7b72b66 | 3676 | +static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) |
c1c6733f | 3677 | +{ |
b7b72b66 | 3678 | + int8_t conv = (lkb->lkb_grmode != DLM_LOCK_IV); |
c1c6733f | 3679 | + |
b7b72b66 AM |
3680 | + /* |
3681 | + * 6-10: Version 5.4 introduced an option to address the phenomenon of | |
3682 | + * a new request for a NL mode lock being blocked. | |
3683 | + * | |
3684 | + * 6-11: If the optional EXPEDITE flag is used with the new NL mode | |
3685 | + * request, then it would be granted. In essence, the use of this flag | |
3686 | + * tells the Lock Manager to expedite theis request by not considering | |
3687 | + * what may be in the CONVERTING or WAITING queues... As of this | |
3688 | + * writing, the EXPEDITE flag can be used only with new requests for NL | |
3689 | + * mode locks. This flag is not valid for conversion requests. | |
3690 | + * | |
3691 | + * A shortcut. Earlier checks return an error if EXPEDITE is used in a | |
3692 | + * conversion or used with a non-NL requested mode. We also know an | |
3693 | + * EXPEDITE request is always granted immediately, so now must always | |
3694 | + * be 1. The full condition to grant an expedite request: (now && | |
3695 | + * !conv && lkb->rqmode == DLM_LOCK_NL && (flags & EXPEDITE)) can | |
3696 | + * therefore be shortened to just checking the flag. | |
3697 | + */ | |
c1c6733f | 3698 | + |
b7b72b66 AM |
3699 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_EXPEDITE) |
3700 | + return TRUE; | |
c1c6733f | 3701 | + |
b7b72b66 AM |
3702 | + /* |
3703 | + * A shortcut. Without this, !queue_conflict(grantqueue, lkb) would be | |
3704 | + * added to the remaining conditions. | |
3705 | + */ | |
c1c6733f | 3706 | + |
b7b72b66 AM |
3707 | + if (queue_conflict(&r->res_grantqueue, lkb)) |
3708 | + goto out; | |
3709 | + | |
3710 | + /* | |
3711 | + * 6-3: By default, a conversion request is immediately granted if the | |
3712 | + * requested mode is compatible with the modes of all other granted | |
3713 | + * locks | |
3714 | + */ | |
3715 | + | |
3716 | + if (queue_conflict(&r->res_convertqueue, lkb)) | |
3717 | + goto out; | |
3718 | + | |
3719 | + /* | |
3720 | + * 6-5: But the default algorithm for deciding whether to grant or | |
3721 | + * queue conversion requests does not by itself guarantee that such | |
3722 | + * requests are serviced on a "first come first serve" basis. This, in | |
3723 | + * turn, can lead to a phenomenon known as "indefinate postponement". | |
3724 | + * | |
3725 | + * 6-7: This issue is dealt with by using the optional QUECVT flag with | |
3726 | + * the system service employed to request a lock conversion. This flag | |
3727 | + * forces certain conversion requests to be queued, even if they are | |
3728 | + * compatible with the granted modes of other locks on the same | |
3729 | + * resource. Thus, the use of this flag results in conversion requests | |
3730 | + * being ordered on a "first come first servce" basis. | |
3731 | + */ | |
c1c6733f | 3732 | + |
b7b72b66 | 3733 | + if (now && conv && !(lkb->lkb_lockqueue_flags & DLM_LKF_QUECVT)) |
c1c6733f AM |
3734 | + return TRUE; |
3735 | + | |
b7b72b66 AM |
3736 | + /* |
3737 | + * When using range locks the NOORDER flag is set to avoid the standard | |
3738 | + * vms rules on grant order. | |
3739 | + */ | |
c1c6733f | 3740 | + |
b7b72b66 AM |
3741 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOORDER) |
3742 | + return TRUE; | |
c1c6733f | 3743 | + |
b7b72b66 AM |
3744 | + /* |
3745 | + * 6-3: Once in that queue [CONVERTING], a conversion request cannot be | |
3746 | + * granted until all other conversion requests ahead of it are granted | |
3747 | + * and/or canceled. | |
3748 | + */ | |
c1c6733f | 3749 | + |
b7b72b66 AM |
3750 | + if (!now && conv && first_in_list(lkb, &r->res_convertqueue)) |
3751 | + return TRUE; | |
c1c6733f | 3752 | + |
b7b72b66 AM |
3753 | + /* |
3754 | + * 6-4: By default, a new request is immediately granted only if all | |
3755 | + * three of the following conditions are satisfied when the request is | |
3756 | + * issued: | |
3757 | + * - The queue of ungranted conversion requests for the resource is | |
3758 | + * empty. | |
3759 | + * - The queue of ungranted new requests for the resource is empty. | |
3760 | + * - The mode of the new request is compatible with the most | |
3761 | + * restrictive mode of all granted locks on the resource. | |
3762 | + */ | |
c1c6733f | 3763 | + |
b7b72b66 AM |
3764 | + if (now && !conv && list_empty(&r->res_convertqueue) && |
3765 | + list_empty(&r->res_waitqueue)) | |
3766 | + return TRUE; | |
c1c6733f | 3767 | + |
b7b72b66 AM |
3768 | + /* |
3769 | + * 6-4: Once a lock request is in the queue of ungranted new requests, | |
3770 | + * it cannot be granted until the queue of ungranted conversion | |
3771 | + * requests is empty, all ungranted new requests ahead of it are | |
3772 | + * granted and/or canceled, and it is compatible with the granted mode | |
3773 | + * of the most restrictive lock granted on the resource. | |
3774 | + */ | |
3775 | + | |
3776 | + if (!now && !conv && list_empty(&r->res_convertqueue) && | |
3777 | + first_in_list(lkb, &r->res_waitqueue)) | |
c1c6733f AM |
3778 | + return TRUE; |
3779 | + | |
b7b72b66 AM |
3780 | + out: |
3781 | + /* | |
3782 | + * The following, enabled by CONVDEADLK, departs from VMS. | |
3783 | + */ | |
3784 | + | |
3785 | + if (now && conv && (lkb->lkb_lockqueue_flags & DLM_LKF_CONVDEADLK) && | |
3786 | + conversion_deadlock_detect(r, lkb)) { | |
3787 | + lkb->lkb_grmode = DLM_LOCK_NL; | |
3788 | + lkb->lkb_flags |= GDLM_LKFLG_DEMOTED; | |
3789 | + } | |
3790 | + | |
c1c6733f AM |
3791 | + return FALSE; |
3792 | +} | |
3793 | + | |
3794 | +int dlm_lock(void *lockspace, | |
3795 | + uint32_t mode, | |
3796 | + struct dlm_lksb *lksb, | |
3797 | + uint32_t flags, | |
3798 | + void *name, | |
3799 | + unsigned int namelen, | |
3800 | + uint32_t parent, | |
3801 | + void (*ast) (void *astarg), | |
3802 | + void *astarg, | |
3803 | + void (*bast) (void *astarg, int mode), | |
3804 | + struct dlm_range *range) | |
3805 | +{ | |
b7b72b66 AM |
3806 | + struct dlm_ls *lspace; |
3807 | + struct dlm_lkb *lkb = NULL, *parent_lkb = NULL; | |
c1c6733f AM |
3808 | + int ret = -EINVAL; |
3809 | + | |
3810 | + lspace = find_lockspace_by_local_id(lockspace); | |
3811 | + if (!lspace) | |
b7b72b66 | 3812 | + return ret; |
c1c6733f AM |
3813 | + |
3814 | + if (mode < 0 || mode > DLM_LOCK_EX) | |
3815 | + goto out; | |
3816 | + | |
b7b72b66 | 3817 | + if (!(flags & DLM_LKF_CONVERT) && (namelen > DLM_RESNAME_MAXLEN)) |
c1c6733f AM |
3818 | + goto out; |
3819 | + | |
3820 | + if (flags & DLM_LKF_CANCEL) | |
3821 | + goto out; | |
3822 | + | |
3823 | + if (flags & DLM_LKF_QUECVT && !(flags & DLM_LKF_CONVERT)) | |
3824 | + goto out; | |
3825 | + | |
b7b72b66 AM |
3826 | + if (flags & DLM_LKF_CONVDEADLK && !(flags & DLM_LKF_CONVERT)) |
3827 | + goto out; | |
3828 | + | |
3829 | + if (flags & DLM_LKF_CONVDEADLK && flags & DLM_LKF_NOQUEUE) | |
3830 | + goto out; | |
3831 | + | |
3832 | + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_CONVERT) | |
c1c6733f AM |
3833 | + goto out; |
3834 | + | |
3835 | + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_QUECVT) | |
3836 | + goto out; | |
3837 | + | |
3838 | + if (flags & DLM_LKF_EXPEDITE && flags & DLM_LKF_NOQUEUE) | |
3839 | + goto out; | |
3840 | + | |
b7b72b66 | 3841 | + if (flags & DLM_LKF_EXPEDITE && (mode != DLM_LOCK_NL)) |
c1c6733f AM |
3842 | + goto out; |
3843 | + | |
b7b72b66 | 3844 | + if (!ast || !lksb) |
c1c6733f AM |
3845 | + goto out; |
3846 | + | |
3847 | + if ((flags & DLM_LKF_VALBLK) && !lksb->sb_lvbptr) | |
3848 | + goto out; | |
3849 | + | |
b7b72b66 | 3850 | + /* |
c1c6733f AM |
3851 | + * Take conversion path. |
3852 | + */ | |
3853 | + | |
3854 | + if (flags & DLM_LKF_CONVERT) { | |
3855 | + ret = convert_lock(lspace, mode, lksb, flags, ast, astarg, | |
3856 | + bast, range); | |
3857 | + goto out; | |
3858 | + } | |
3859 | + | |
c783755a AM |
3860 | +#ifdef CONFIG_DLM_STATS |
3861 | + dlm_stats.lockops++; | |
3862 | +#endif | |
b7b72b66 | 3863 | + /* |
c1c6733f AM |
3864 | + * Take new lock path. |
3865 | + */ | |
3866 | + | |
3867 | + if (parent) { | |
3868 | + down_read(&lspace->ls_unlock_sem); | |
3869 | + | |
3870 | + parent_lkb = find_lock_by_id(lspace, parent); | |
3871 | + | |
3872 | + if (!parent_lkb || | |
3873 | + parent_lkb->lkb_flags & GDLM_LKFLG_DELETED || | |
3874 | + parent_lkb->lkb_flags & GDLM_LKFLG_MSTCPY || | |
3875 | + parent_lkb->lkb_status != GDLM_LKSTS_GRANTED) { | |
3876 | + up_read(&lspace->ls_unlock_sem); | |
3877 | + goto out; | |
3878 | + } | |
3879 | + | |
3880 | + atomic_inc(&parent_lkb->lkb_childcnt); | |
3881 | + up_read(&lspace->ls_unlock_sem); | |
3882 | + } | |
3883 | + | |
3884 | + down_read(&lspace->ls_in_recovery); | |
3885 | + | |
3886 | + ret = -ENOMEM; | |
3887 | + | |
3888 | + lkb = create_lkb(lspace); | |
3889 | + if (!lkb) | |
3890 | + goto fail_dec; | |
3891 | + lkb->lkb_astaddr = ast; | |
3892 | + lkb->lkb_astparam = (long) astarg; | |
3893 | + lkb->lkb_bastaddr = bast; | |
3894 | + lkb->lkb_rqmode = mode; | |
3895 | + lkb->lkb_grmode = DLM_LOCK_IV; | |
b7b72b66 | 3896 | + lkb->lkb_nodeid = -1; |
c1c6733f AM |
3897 | + lkb->lkb_lksb = lksb; |
3898 | + lkb->lkb_parent = parent_lkb; | |
3899 | + lkb->lkb_lockqueue_flags = flags; | |
3900 | + lkb->lkb_lvbptr = lksb->sb_lvbptr; | |
3901 | + | |
b7b72b66 AM |
3902 | + if (!in_interrupt() && current) |
3903 | + lkb->lkb_ownpid = (int) current->pid; | |
3904 | + else | |
3905 | + lkb->lkb_ownpid = 0; | |
3906 | + | |
c1c6733f AM |
3907 | + if (range) { |
3908 | + if (range->ra_start > range->ra_end) { | |
3909 | + ret = -EINVAL; | |
3910 | + goto fail_free; | |
3911 | + } | |
3912 | + | |
3913 | + if (lkb_set_range(lspace, lkb, range->ra_start, range->ra_end)) | |
3914 | + goto fail_free; | |
3915 | + } | |
3916 | + | |
3917 | + /* Convert relevant flags to internal numbers */ | |
3918 | + if (flags & DLM_LKF_VALBLK) | |
3919 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; | |
3920 | + if (flags & DLM_LKF_PERSISTENT) | |
3921 | + lkb->lkb_flags |= GDLM_LKFLG_PERSISTENT; | |
3922 | + if (flags & DLM_LKF_NODLCKWT) | |
3923 | + lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT; | |
3924 | + | |
3925 | + lksb->sb_lkid = lkb->lkb_id; | |
3926 | + | |
3927 | + ret = dlm_lock_stage1(lspace, lkb, flags, name, namelen); | |
3928 | + if (ret) | |
3929 | + goto fail_free; | |
3930 | + | |
3931 | + up_read(&lspace->ls_in_recovery); | |
3932 | + | |
3933 | + wake_astd(); | |
3934 | + | |
b7b72b66 | 3935 | + put_lockspace(lspace); |
c1c6733f AM |
3936 | + return 0; |
3937 | + | |
3938 | + fail_free: | |
3939 | + release_lkb(lspace, lkb); | |
3940 | + goto fail_unlock; | |
3941 | + | |
3942 | + fail_dec: | |
3943 | + if (parent_lkb) | |
3944 | + atomic_dec(&parent_lkb->lkb_childcnt); | |
3945 | + | |
3946 | + fail_unlock: | |
3947 | + up_read(&lspace->ls_in_recovery); | |
3948 | + | |
3949 | + out: | |
b7b72b66 | 3950 | + put_lockspace(lspace); |
c1c6733f AM |
3951 | + return ret; |
3952 | +} | |
3953 | + | |
b7b72b66 AM |
3954 | +int dlm_lock_stage1(struct dlm_ls *ls, struct dlm_lkb *lkb, uint32_t flags, |
3955 | + char *name, int namelen) | |
c1c6733f | 3956 | +{ |
b7b72b66 AM |
3957 | + struct dlm_rsb *rsb, *parent_rsb = NULL; |
3958 | + struct dlm_lkb *parent_lkb = lkb->lkb_parent; | |
c1c6733f | 3959 | + uint32_t nodeid; |
b7b72b66 | 3960 | + int error, dir_error = 0; |
c1c6733f AM |
3961 | + |
3962 | + if (parent_lkb) | |
3963 | + parent_rsb = parent_lkb->lkb_resource; | |
3964 | + | |
b7b72b66 | 3965 | + error = find_rsb(ls, parent_rsb, name, namelen, CREATE, &rsb); |
c1c6733f | 3966 | + if (error) |
b7b72b66 | 3967 | + return error; |
c1c6733f | 3968 | + lkb->lkb_resource = rsb; |
b7b72b66 | 3969 | + down_write(&rsb->res_lock); |
c1c6733f | 3970 | + |
b7b72b66 AM |
3971 | + log_debug(ls, "(%d) rq %u %x \"%s\"", lkb->lkb_ownpid, lkb->lkb_rqmode, |
3972 | + lkb->lkb_id, rsb->res_name); | |
3973 | + /* | |
c1c6733f AM |
3974 | + * Next stage, do we need to find the master or can |
3975 | + * we get on with the real locking work ? | |
3976 | + */ | |
3977 | + | |
b7b72b66 | 3978 | + retry: |
c1c6733f AM |
3979 | + if (rsb->res_nodeid == -1) { |
3980 | + if (get_directory_nodeid(rsb) != our_nodeid()) { | |
b7b72b66 AM |
3981 | + remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB); |
3982 | + up_write(&rsb->res_lock); | |
3983 | + return 0; | |
c1c6733f AM |
3984 | + } |
3985 | + | |
b7b72b66 AM |
3986 | + error = dlm_dir_lookup(ls, our_nodeid(), rsb->res_name, |
3987 | + rsb->res_length, &nodeid); | |
3988 | + if (error) { | |
3989 | + DLM_ASSERT(error == -EEXIST,); | |
b7b72b66 AM |
3990 | + msleep(500); |
3991 | + dir_error = error; | |
3992 | + goto retry; | |
3993 | + } | |
c1c6733f | 3994 | + |
b7b72b66 AM |
3995 | + if (nodeid == our_nodeid()) { |
3996 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
3997 | + rsb->res_nodeid = 0; | |
3998 | + } else { | |
3999 | + clear_bit(RESFL_MASTER, &rsb->res_flags); | |
4000 | + rsb->res_nodeid = nodeid; | |
4001 | + } | |
4002 | + | |
4003 | + if (dir_error) { | |
4004 | + log_all(ls, "dir lookup retry %x %u", lkb->lkb_id, | |
4005 | + nodeid); | |
4006 | + } | |
c1c6733f AM |
4007 | + } |
4008 | + | |
b7b72b66 AM |
4009 | + lkb->lkb_nodeid = rsb->res_nodeid; |
4010 | + up_write(&rsb->res_lock); | |
c1c6733f | 4011 | + |
b7b72b66 | 4012 | + error = dlm_lock_stage2(ls, lkb, rsb, flags); |
c1c6733f AM |
4013 | + |
4014 | + return error; | |
4015 | +} | |
4016 | + | |
b7b72b66 | 4017 | +/* |
c1c6733f AM |
4018 | + * Locking routine called after we have an RSB, either a copy of a remote one |
4019 | + * or a local one, or perhaps a shiny new one all of our very own | |
4020 | + */ | |
4021 | + | |
b7b72b66 AM |
4022 | +int dlm_lock_stage2(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_rsb *rsb, |
4023 | + uint32_t flags) | |
c1c6733f AM |
4024 | +{ |
4025 | + int error = 0; | |
4026 | + | |
b7b72b66 AM |
4027 | + DLM_ASSERT(rsb->res_nodeid != -1, print_lkb(lkb); print_rsb(rsb);); |
4028 | + | |
c1c6733f AM |
4029 | + if (rsb->res_nodeid) { |
4030 | + res_lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
4031 | + error = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONDGRANT); | |
4032 | + } else { | |
4033 | + dlm_lock_stage3(lkb); | |
4034 | + } | |
4035 | + | |
4036 | + return error; | |
4037 | +} | |
4038 | + | |
b7b72b66 | 4039 | +/* |
c1c6733f AM |
4040 | + * Called on an RSB's master node to do stage2 locking for a remote lock |
4041 | + * request. Returns a proper lkb with rsb ready for lock processing. | |
4042 | + * This is analagous to sections of dlm_lock() and dlm_lock_stage1(). | |
4043 | + */ | |
4044 | + | |
b7b72b66 AM |
4045 | +struct dlm_lkb *remote_stage2(int remote_nodeid, struct dlm_ls *ls, |
4046 | + struct dlm_request *freq) | |
c1c6733f | 4047 | +{ |
b7b72b66 AM |
4048 | + struct dlm_rsb *rsb = NULL, *parent_rsb = NULL; |
4049 | + struct dlm_lkb *lkb = NULL, *parent_lkb = NULL; | |
c1c6733f AM |
4050 | + int error, namelen; |
4051 | + | |
4052 | + if (freq->rr_remparid) { | |
4053 | + parent_lkb = find_lock_by_id(ls, freq->rr_remparid); | |
4054 | + if (!parent_lkb) | |
4055 | + goto fail; | |
4056 | + | |
4057 | + atomic_inc(&parent_lkb->lkb_childcnt); | |
4058 | + parent_rsb = parent_lkb->lkb_resource; | |
4059 | + } | |
4060 | + | |
b7b72b66 | 4061 | + /* |
c1c6733f AM |
4062 | + * A new MSTCPY lkb. Initialize lkb fields including the real lkid and |
4063 | + * node actually holding the (non-MSTCPY) lkb. AST address are just | |
4064 | + * flags in the master copy. | |
4065 | + */ | |
4066 | + | |
4067 | + lkb = create_lkb(ls); | |
4068 | + if (!lkb) | |
4069 | + goto fail_dec; | |
4070 | + lkb->lkb_grmode = DLM_LOCK_IV; | |
4071 | + lkb->lkb_rqmode = freq->rr_rqmode; | |
4072 | + lkb->lkb_parent = parent_lkb; | |
b7b72b66 AM |
4073 | + lkb->lkb_astaddr = (void *) (long) (freq->rr_asts & AST_COMP); |
4074 | + lkb->lkb_bastaddr = (void *) (long) (freq->rr_asts & AST_BAST); | |
c1c6733f AM |
4075 | + lkb->lkb_nodeid = remote_nodeid; |
4076 | + lkb->lkb_remid = freq->rr_header.rh_lkid; | |
4077 | + lkb->lkb_flags = GDLM_LKFLG_MSTCPY; | |
4078 | + lkb->lkb_lockqueue_flags = freq->rr_flags; | |
4079 | + | |
4080 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_VALBLK) { | |
4081 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; | |
4082 | + allocate_and_copy_lvb(ls, &lkb->lkb_lvbptr, freq->rr_lvb); | |
4083 | + if (!lkb->lkb_lvbptr) | |
4084 | + goto fail_free; | |
4085 | + } | |
4086 | + | |
4087 | + if (lkb->lkb_lockqueue_flags & GDLM_LKFLG_RANGE) { | |
4088 | + error = lkb_set_range(ls, lkb, freq->rr_range_start, | |
4089 | + freq->rr_range_end); | |
4090 | + if (error) | |
4091 | + goto fail_free; | |
4092 | + } | |
4093 | + | |
b7b72b66 | 4094 | + /* |
c1c6733f AM |
4095 | + * Get the RSB which this lock is for. Create a new RSB if this is a |
4096 | + * new lock on a new resource. We must be the master of any new rsb. | |
4097 | + */ | |
4098 | + | |
4099 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
4100 | + | |
b7b72b66 | 4101 | + error = find_rsb(ls, parent_rsb, freq->rr_name, namelen, MASTER, &rsb); |
c1c6733f AM |
4102 | + if (error) |
4103 | + goto fail_free; | |
4104 | + | |
b7b72b66 AM |
4105 | + if (!rsb) { |
4106 | + log_debug(ls, "send einval to %u", remote_nodeid); | |
4107 | + /* print_name(freq->rr_name, namelen); */ | |
4108 | + lkb->lkb_retstatus = -EINVAL; | |
4109 | + goto out; | |
4110 | + } | |
4111 | + | |
c1c6733f | 4112 | + lkb->lkb_resource = rsb; |
c1c6733f | 4113 | + |
b7b72b66 AM |
4114 | + log_debug(ls, "(%d) rq %u from %u %x \"%s\"", |
4115 | + lkb->lkb_ownpid, lkb->lkb_rqmode, remote_nodeid, | |
4116 | + lkb->lkb_id, rsb->res_name); | |
c1c6733f | 4117 | + |
b7b72b66 AM |
4118 | + out: |
4119 | + return lkb; | |
c1c6733f AM |
4120 | + |
4121 | + fail_free: | |
4122 | + /* release_lkb handles parent */ | |
4123 | + release_lkb(ls, lkb); | |
4124 | + parent_lkb = NULL; | |
4125 | + | |
4126 | + fail_dec: | |
4127 | + if (parent_lkb) | |
4128 | + atomic_dec(&parent_lkb->lkb_childcnt); | |
4129 | + fail: | |
4130 | + return NULL; | |
4131 | +} | |
4132 | + | |
b7b72b66 | 4133 | +/* |
c1c6733f AM |
4134 | + * The final bit of lock request processing on the master node. Here the lock |
4135 | + * is granted and the completion ast is queued, or the lock is put on the | |
4136 | + * waitqueue and blocking asts are sent. | |
4137 | + */ | |
4138 | + | |
b7b72b66 | 4139 | +void dlm_lock_stage3(struct dlm_lkb *lkb) |
c1c6733f | 4140 | +{ |
b7b72b66 | 4141 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
c1c6733f | 4142 | + |
b7b72b66 | 4143 | + /* |
c1c6733f AM |
4144 | + * This is a locally mastered lock on a resource that already exists, |
4145 | + * see if it can be granted or if it must wait. When this function is | |
4146 | + * called for a remote lock request (process_cluster_request, | |
4147 | + * REMCMD_LOCKREQUEST), the result from grant_lock is returned to the | |
4148 | + * requesting node at the end of process_cluster_request, not at the | |
4149 | + * end of grant_lock. | |
4150 | + */ | |
4151 | + | |
4152 | + down_write(&rsb->res_lock); | |
4153 | + | |
b7b72b66 | 4154 | + if (can_be_granted(rsb, lkb, TRUE)) { |
c1c6733f AM |
4155 | + grant_lock(lkb, 0); |
4156 | + goto out; | |
4157 | + } | |
4158 | + | |
b7b72b66 | 4159 | + /* |
c1c6733f AM |
4160 | + * This request is not a conversion, so the lkb didn't exist other than |
4161 | + * for this request and should be freed after EAGAIN is returned in the | |
4162 | + * ast. | |
4163 | + */ | |
4164 | + | |
4165 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) { | |
c1c6733f | 4166 | + lkb->lkb_retstatus = -EAGAIN; |
c1c6733f AM |
4167 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST) |
4168 | + send_blocking_asts_all(rsb, lkb); | |
b7b72b66 | 4169 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); |
c1c6733f AM |
4170 | + goto out; |
4171 | + } | |
4172 | + | |
b7b72b66 | 4173 | + /* |
c1c6733f AM |
4174 | + * The requested lkb must wait. Because the rsb of the requested lkb |
4175 | + * is mastered here, send blocking asts for the lkb's blocking the | |
4176 | + * request. | |
4177 | + */ | |
4178 | + | |
b7b72b66 AM |
4179 | + log_debug2("w %x %d %x %d,%d %d %s", lkb->lkb_id, lkb->lkb_nodeid, |
4180 | + lkb->lkb_remid, lkb->lkb_grmode, lkb->lkb_rqmode, | |
4181 | + lkb->lkb_status, rsb->res_name); | |
4182 | + | |
c1c6733f AM |
4183 | + lkb->lkb_retstatus = 0; |
4184 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
4185 | + | |
4186 | + send_blocking_asts(rsb, lkb); | |
4187 | + | |
4188 | + out: | |
4189 | + up_write(&rsb->res_lock); | |
4190 | +} | |
4191 | + | |
4192 | +int dlm_unlock(void *lockspace, | |
4193 | + uint32_t lkid, | |
4194 | + uint32_t flags, | |
4195 | + struct dlm_lksb *lksb, | |
4196 | + void *astarg) | |
4197 | +{ | |
b7b72b66 AM |
4198 | + struct dlm_ls *ls = find_lockspace_by_local_id(lockspace); |
4199 | + struct dlm_lkb *lkb; | |
4200 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
4201 | + int ret = -EINVAL; |
4202 | + | |
b7b72b66 AM |
4203 | + if (!ls) { |
4204 | + log_print("dlm_unlock: lkid %x lockspace not found", lkid); | |
4205 | + return ret; | |
4206 | + } | |
c1c6733f AM |
4207 | + |
4208 | + lkb = find_lock_by_id(ls, lkid); | |
b7b72b66 AM |
4209 | + if (!lkb) { |
4210 | + log_debug(ls, "unlock %x no id", lkid); | |
c1c6733f | 4211 | + goto out; |
b7b72b66 | 4212 | + } |
c1c6733f AM |
4213 | + |
4214 | + /* Can't dequeue a master copy (a remote node's mastered lock) */ | |
b7b72b66 AM |
4215 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { |
4216 | + log_debug(ls, "(%d) unlock %x lkb_flags %x", | |
4217 | + lkb->lkb_ownpid, lkid, lkb->lkb_flags); | |
c1c6733f | 4218 | + goto out; |
b7b72b66 | 4219 | + } |
c1c6733f AM |
4220 | + |
4221 | + /* Already waiting for a remote lock operation */ | |
4222 | + if (lkb->lkb_lockqueue_state) { | |
b7b72b66 AM |
4223 | + log_debug(ls, "(%d) unlock %x lq%d", |
4224 | + lkb->lkb_ownpid, lkid, lkb->lkb_lockqueue_state); | |
c1c6733f AM |
4225 | + ret = -EBUSY; |
4226 | + goto out; | |
4227 | + } | |
4228 | + | |
c783755a AM |
4229 | +#ifdef CONFIG_DLM_STATS |
4230 | + dlm_stats.unlockops++; | |
4231 | +#endif | |
c1c6733f AM |
4232 | + /* Can only cancel WAITING or CONVERTing locks. |
4233 | + * This is just a quick check - it is also checked in unlock_stage2() | |
4234 | + * (which may be on the master) under the semaphore. | |
4235 | + */ | |
4236 | + if ((flags & DLM_LKF_CANCEL) && | |
b7b72b66 AM |
4237 | + (lkb->lkb_status == GDLM_LKSTS_GRANTED)) { |
4238 | + log_debug(ls, "(%d) unlock %x %x %d", | |
4239 | + lkb->lkb_ownpid, lkid, flags, lkb->lkb_status); | |
c1c6733f | 4240 | + goto out; |
b7b72b66 | 4241 | + } |
c1c6733f AM |
4242 | + |
4243 | + /* "Normal" unlocks must operate on a granted lock */ | |
4244 | + if (!(flags & DLM_LKF_CANCEL) && | |
b7b72b66 AM |
4245 | + (lkb->lkb_status != GDLM_LKSTS_GRANTED)) { |
4246 | + log_debug(ls, "(%d) unlock %x %x %d", | |
4247 | + lkb->lkb_ownpid, lkid, flags, lkb->lkb_status); | |
c1c6733f | 4248 | + goto out; |
b7b72b66 | 4249 | + } |
c1c6733f | 4250 | + |
b7b72b66 AM |
4251 | + if (lkb->lkb_flags & GDLM_LKFLG_DELETED) { |
4252 | + log_debug(ls, "(%d) unlock deleted %x %x %d", | |
4253 | + lkb->lkb_ownpid, lkid, flags, lkb->lkb_status); | |
4254 | + goto out; | |
4255 | + } | |
c1c6733f | 4256 | + |
b7b72b66 | 4257 | + down_write(&ls->ls_unlock_sem); |
c1c6733f AM |
4258 | + /* Can't dequeue a lock with sublocks */ |
4259 | + if (atomic_read(&lkb->lkb_childcnt)) { | |
4260 | + up_write(&ls->ls_unlock_sem); | |
4261 | + ret = -ENOTEMPTY; | |
4262 | + goto out; | |
4263 | + } | |
c1c6733f AM |
4264 | + /* Mark it as deleted so we can't use it as a parent in dlm_lock() */ |
4265 | + if (!(flags & DLM_LKF_CANCEL)) | |
4266 | + lkb->lkb_flags |= GDLM_LKFLG_DELETED; | |
4267 | + up_write(&ls->ls_unlock_sem); | |
4268 | + | |
b7b72b66 AM |
4269 | + down_read(&ls->ls_in_recovery); |
4270 | + rsb = find_rsb_to_unlock(ls, lkb); | |
4271 | + | |
4272 | + log_debug(ls, "(%d) un %x %x %d %d \"%s\"", | |
4273 | + lkb->lkb_ownpid, | |
4274 | + lkb->lkb_id, | |
4275 | + lkb->lkb_flags, | |
4276 | + lkb->lkb_nodeid, | |
4277 | + rsb->res_nodeid, | |
4278 | + rsb->res_name); | |
4279 | + | |
c1c6733f AM |
4280 | + /* Save any new params */ |
4281 | + if (lksb) | |
4282 | + lkb->lkb_lksb = lksb; | |
c783755a | 4283 | + lkb->lkb_astparam = (long) astarg; |
c1c6733f AM |
4284 | + lkb->lkb_lockqueue_flags = flags; |
4285 | + | |
b7b72b66 | 4286 | + if (lkb->lkb_nodeid) |
c1c6733f AM |
4287 | + ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_UNLOCK); |
4288 | + else | |
b7b72b66 | 4289 | + ret = dlm_unlock_stage2(lkb, rsb, flags); |
c1c6733f AM |
4290 | + up_read(&ls->ls_in_recovery); |
4291 | + | |
4292 | + wake_astd(); | |
4293 | + | |
4294 | + out: | |
b7b72b66 | 4295 | + put_lockspace(ls); |
c1c6733f AM |
4296 | + return ret; |
4297 | +} | |
4298 | + | |
b7b72b66 | 4299 | +int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags) |
c1c6733f | 4300 | +{ |
c1c6733f | 4301 | + int remote = lkb->lkb_flags & GDLM_LKFLG_MSTCPY; |
b7b72b66 | 4302 | + int old_status; |
c1c6733f AM |
4303 | + |
4304 | + down_write(&rsb->res_lock); | |
4305 | + | |
4306 | + /* Can only cancel WAITING or CONVERTing locks */ | |
4307 | + if ((flags & DLM_LKF_CANCEL) && | |
4308 | + (lkb->lkb_status == GDLM_LKSTS_GRANTED)) { | |
4309 | + lkb->lkb_retstatus = -EINVAL; | |
b7b72b66 | 4310 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
4311 | + goto out; |
4312 | + } | |
4313 | + | |
b7b72b66 AM |
4314 | + log_debug2("u %x %d %x %d,%d %d %s", lkb->lkb_id, lkb->lkb_nodeid, |
4315 | + lkb->lkb_remid, lkb->lkb_grmode, lkb->lkb_rqmode, | |
4316 | + lkb->lkb_status, rsb->res_name); | |
c1c6733f | 4317 | + |
b7b72b66 | 4318 | + old_status = lkb_dequeue(lkb); |
c1c6733f | 4319 | + |
b7b72b66 | 4320 | + /* |
c1c6733f AM |
4321 | + * Cancelling a conversion |
4322 | + */ | |
4323 | + | |
4324 | + if ((old_status == GDLM_LKSTS_CONVERT) && (flags & DLM_LKF_CANCEL)) { | |
4325 | + /* VMS semantics say we should send blocking ASTs again here */ | |
4326 | + send_blocking_asts(rsb, lkb); | |
4327 | + | |
4328 | + /* Remove from deadlock detection */ | |
4329 | + if (lkb->lkb_duetime) | |
4330 | + remove_from_deadlockqueue(lkb); | |
4331 | + | |
4332 | + /* Stick it back on the granted queue */ | |
4333 | + lkb_enqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4334 | + lkb->lkb_rqmode = lkb->lkb_grmode; | |
4335 | + | |
4336 | + /* Was it blocking any other locks? */ | |
4337 | + if (first_in_list(lkb, &rsb->res_convertqueue)) | |
4338 | + grant_pending_locks(rsb); | |
4339 | + | |
4340 | + lkb->lkb_retstatus = -DLM_ECANCEL; | |
b7b72b66 | 4341 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
4342 | + goto out; |
4343 | + } | |
4344 | + | |
b7b72b66 AM |
4345 | + /* |
4346 | + * If was granted grant any converting or waiting locks | |
4347 | + * and save or clear lvb | |
c1c6733f AM |
4348 | + */ |
4349 | + | |
b7b72b66 AM |
4350 | + if (old_status == GDLM_LKSTS_GRANTED) { |
4351 | + if (rsb->res_lvbptr && (lkb->lkb_grmode >= DLM_LOCK_PW)) { | |
4352 | + if ((flags & DLM_LKF_VALBLK) && lkb->lkb_lvbptr) | |
4353 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, | |
4354 | + DLM_LVB_LEN); | |
4355 | + if (flags & DLM_LKF_IVVALBLK) | |
4356 | + memset(rsb->res_lvbptr, 0, DLM_LVB_LEN); | |
4357 | + } | |
c1c6733f | 4358 | + |
b7b72b66 AM |
4359 | + grant_pending_locks(rsb); |
4360 | + } else | |
4361 | + DLM_ASSERT(0, print_lkb(lkb); print_rsb(rsb);); | |
c1c6733f | 4362 | + |
b7b72b66 | 4363 | + lkb->lkb_retstatus = flags & DLM_LKF_CANCEL ? -DLM_ECANCEL:-DLM_EUNLOCK; |
c1c6733f | 4364 | + |
b7b72b66 AM |
4365 | + if (!remote) { |
4366 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); | |
4367 | + } else { | |
c1c6733f AM |
4368 | + up_write(&rsb->res_lock); |
4369 | + release_lkb(rsb->res_ls, lkb); | |
4370 | + release_rsb(rsb); | |
4371 | + goto out2; | |
4372 | + } | |
4373 | + | |
b7b72b66 | 4374 | + out: |
c1c6733f | 4375 | + up_write(&rsb->res_lock); |
b7b72b66 | 4376 | + out2: |
c1c6733f AM |
4377 | + wake_astd(); |
4378 | + return 0; | |
4379 | +} | |
4380 | + | |
b7b72b66 | 4381 | +/* |
c1c6733f AM |
4382 | + * Lock conversion |
4383 | + */ | |
4384 | + | |
b7b72b66 AM |
4385 | +static int convert_lock(struct dlm_ls *ls, int mode, struct dlm_lksb *lksb, |
4386 | + uint32_t flags, void *ast, void *astarg, void *bast, | |
c1c6733f AM |
4387 | + struct dlm_range *range) |
4388 | +{ | |
b7b72b66 AM |
4389 | + struct dlm_lkb *lkb; |
4390 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
4391 | + int ret = -EINVAL; |
4392 | + | |
4393 | + lkb = find_lock_by_id(ls, lksb->sb_lkid); | |
4394 | + if (!lkb) { | |
4395 | + goto out; | |
4396 | + } | |
4397 | + | |
4398 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) { | |
4399 | + ret = -EBUSY; | |
4400 | + goto out; | |
4401 | + } | |
4402 | + | |
4403 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
4404 | + goto out; | |
4405 | + } | |
4406 | + | |
4407 | + if ((flags & DLM_LKF_QUECVT) && | |
4408 | + !__quecvt_compat_matrix[lkb->lkb_grmode + 1][mode + 1]) { | |
4409 | + goto out; | |
4410 | + } | |
4411 | + | |
4412 | + if (!lksb->sb_lvbptr && (flags & DLM_LKF_VALBLK)) { | |
b7b72b66 | 4413 | + goto out; |
c1c6733f AM |
4414 | + } |
4415 | + | |
c783755a AM |
4416 | +#ifdef CONFIG_DLM_STATS |
4417 | + dlm_stats.convertops++; | |
4418 | +#endif | |
c1c6733f AM |
4419 | + /* Set up the ranges as appropriate */ |
4420 | + if (range) { | |
4421 | + if (range->ra_start > range->ra_end) | |
4422 | + goto out; | |
4423 | + | |
4424 | + if (lkb_set_range(ls, lkb, range->ra_start, range->ra_end)) { | |
4425 | + ret = -ENOMEM; | |
4426 | + goto out; | |
4427 | + } | |
4428 | + } | |
4429 | + | |
4430 | + rsb = lkb->lkb_resource; | |
b7b72b66 AM |
4431 | + down_read(&ls->ls_in_recovery); |
4432 | + | |
4433 | + log_debug(ls, "(%d) cv %u %x \"%s\"", lkb->lkb_ownpid, mode, | |
4434 | + lkb->lkb_id, rsb->res_name); | |
c1c6733f AM |
4435 | + |
4436 | + lkb->lkb_flags &= ~GDLM_LKFLG_VALBLK; | |
4437 | + lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED; | |
4438 | + | |
4439 | + if (flags & DLM_LKF_NODLCKWT) | |
4440 | + lkb->lkb_flags |= GDLM_LKFLG_NODLCKWT; | |
b7b72b66 AM |
4441 | + lkb->lkb_astaddr = ast; |
4442 | + lkb->lkb_astparam = (long) astarg; | |
4443 | + lkb->lkb_bastaddr = bast; | |
c1c6733f AM |
4444 | + lkb->lkb_rqmode = mode; |
4445 | + lkb->lkb_lockqueue_flags = flags; | |
4446 | + lkb->lkb_flags |= (flags & DLM_LKF_VALBLK) ? GDLM_LKFLG_VALBLK : 0; | |
4447 | + lkb->lkb_lvbptr = lksb->sb_lvbptr; | |
4448 | + | |
4449 | + if (rsb->res_nodeid) { | |
4450 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT); | |
4451 | + ret = remote_stage(lkb, GDLM_LQSTATE_WAIT_CONVERT); | |
4452 | + } else { | |
4453 | + ret = dlm_convert_stage2(lkb, FALSE); | |
4454 | + } | |
4455 | + | |
b7b72b66 | 4456 | + up_read(&ls->ls_in_recovery); |
c1c6733f AM |
4457 | + |
4458 | + wake_astd(); | |
4459 | + | |
4460 | + out: | |
4461 | + return ret; | |
4462 | +} | |
4463 | + | |
b7b72b66 | 4464 | +/* |
c1c6733f AM |
4465 | + * For local conversion requests on locally mastered locks this is called |
4466 | + * directly from dlm_lock/convert_lock. This function is also called for | |
4467 | + * remote conversion requests of MSTCPY locks (from process_cluster_request). | |
4468 | + */ | |
4469 | + | |
b7b72b66 | 4470 | +int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast) |
c1c6733f | 4471 | +{ |
b7b72b66 | 4472 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
c1c6733f AM |
4473 | + int ret = 0; |
4474 | + | |
4475 | + down_write(&rsb->res_lock); | |
4476 | + | |
b7b72b66 | 4477 | + if (can_be_granted(rsb, lkb, TRUE)) { |
c1c6733f AM |
4478 | + grant_lock(lkb, 0); |
4479 | + grant_pending_locks(rsb); | |
4480 | + goto out; | |
4481 | + } | |
4482 | + | |
c1c6733f | 4483 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUE) { |
c1c6733f AM |
4484 | + ret = lkb->lkb_retstatus = -EAGAIN; |
4485 | + if (do_ast) | |
b7b72b66 | 4486 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
4487 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_NOQUEUEBAST) |
4488 | + send_blocking_asts_all(rsb, lkb); | |
4489 | + goto out; | |
4490 | + } | |
4491 | + | |
b7b72b66 AM |
4492 | + log_debug2("c %x %d %x %d,%d %d %s", lkb->lkb_id, lkb->lkb_nodeid, |
4493 | + lkb->lkb_remid, lkb->lkb_grmode, lkb->lkb_rqmode, | |
4494 | + lkb->lkb_status, rsb->res_name); | |
c1c6733f AM |
4495 | + |
4496 | + lkb->lkb_retstatus = 0; | |
b7b72b66 | 4497 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT); |
c1c6733f | 4498 | + |
b7b72b66 AM |
4499 | + /* |
4500 | + * The granted mode may have been reduced to NL by conversion deadlock | |
4501 | + * avoidance in can_be_granted(). If so, try to grant other locks. | |
c1c6733f AM |
4502 | + */ |
4503 | + | |
b7b72b66 AM |
4504 | + if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) |
4505 | + grant_pending_locks(rsb); | |
4506 | + | |
c1c6733f AM |
4507 | + send_blocking_asts(rsb, lkb); |
4508 | + | |
4509 | + if (!(lkb->lkb_flags & GDLM_LKFLG_NODLCKWT)) | |
4510 | + add_to_deadlockqueue(lkb); | |
4511 | + | |
4512 | + out: | |
4513 | + up_write(&rsb->res_lock); | |
4514 | + return ret; | |
4515 | +} | |
4516 | + | |
b7b72b66 | 4517 | +/* |
c1c6733f AM |
4518 | + * Remove lkb from any queue it's on, add it to the granted queue, and queue a |
4519 | + * completion ast. rsb res_lock must be held in write when this is called. | |
4520 | + */ | |
4521 | + | |
b7b72b66 | 4522 | +static void grant_lock(struct dlm_lkb *lkb, int send_remote) |
c1c6733f | 4523 | +{ |
b7b72b66 | 4524 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
c1c6733f AM |
4525 | + |
4526 | + if (lkb->lkb_duetime) | |
4527 | + remove_from_deadlockqueue(lkb); | |
4528 | + | |
4529 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
4530 | + int b; | |
b7b72b66 | 4531 | + DLM_ASSERT(lkb->lkb_lvbptr,); |
c1c6733f AM |
4532 | + |
4533 | + if (!rsb->res_lvbptr) | |
4534 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
4535 | + | |
4536 | + b = __lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | |
4537 | + if (b) | |
4538 | + memcpy(lkb->lkb_lvbptr, rsb->res_lvbptr, DLM_LVB_LEN); | |
4539 | + else | |
4540 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
4541 | + } | |
4542 | + | |
4543 | + if (lkb->lkb_range) { | |
4544 | + lkb->lkb_range[GR_RANGE_START] = lkb->lkb_range[RQ_RANGE_START]; | |
4545 | + lkb->lkb_range[GR_RANGE_END] = lkb->lkb_range[RQ_RANGE_END]; | |
4546 | + } | |
4547 | + | |
b7b72b66 AM |
4548 | + log_debug2("g %x %d %x %d,%d %d %s", lkb->lkb_id, lkb->lkb_nodeid, |
4549 | + lkb->lkb_remid, lkb->lkb_grmode, lkb->lkb_rqmode, | |
4550 | + lkb->lkb_status, rsb->res_name); | |
c1c6733f | 4551 | + |
b7b72b66 AM |
4552 | + if (lkb->lkb_grmode != lkb->lkb_rqmode) { |
4553 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
4554 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4555 | + } | |
4556 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
c1c6733f AM |
4557 | + lkb->lkb_highbast = 0; |
4558 | + lkb->lkb_retstatus = 0; | |
b7b72b66 | 4559 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f | 4560 | + |
b7b72b66 | 4561 | + /* |
c1c6733f AM |
4562 | + * A remote conversion request has been granted, either immediately |
4563 | + * upon being requested or after waiting a bit. In the former case, | |
4564 | + * reply_and_grant() is called. In the later case send_remote is 1 and | |
4565 | + * remote_grant() is called. | |
4566 | + * | |
4567 | + * The "send_remote" flag is set only for locks which are granted "out | |
4568 | + * of band" - ie by another lock being converted or unlocked. | |
4569 | + * | |
4570 | + * The second case occurs when this lkb is granted right away as part | |
4571 | + * of processing the initial request. In that case, we send a single | |
4572 | + * message in reply_and_grant which combines the request reply with the | |
4573 | + * grant message. | |
4574 | + */ | |
4575 | + | |
4576 | + if ((lkb->lkb_flags & GDLM_LKFLG_MSTCPY) && lkb->lkb_nodeid) { | |
4577 | + if (send_remote) | |
4578 | + remote_grant(lkb); | |
4579 | + else if (lkb->lkb_request) | |
4580 | + reply_and_grant(lkb); | |
4581 | + } | |
4582 | + | |
4583 | +} | |
4584 | + | |
b7b72b66 | 4585 | +static void send_bast_queue(struct list_head *head, struct dlm_lkb *lkb) |
c1c6733f | 4586 | +{ |
b7b72b66 | 4587 | + struct dlm_lkb *gr; |
c1c6733f AM |
4588 | + |
4589 | + list_for_each_entry(gr, head, lkb_statequeue) { | |
4590 | + if (gr->lkb_bastaddr && | |
4591 | + gr->lkb_highbast < lkb->lkb_rqmode && | |
4592 | + ranges_overlap(lkb, gr) && !modes_compat(gr, lkb)) { | |
b7b72b66 | 4593 | + queue_ast(gr, AST_BAST, lkb->lkb_rqmode); |
c1c6733f AM |
4594 | + gr->lkb_highbast = lkb->lkb_rqmode; |
4595 | + } | |
4596 | + } | |
4597 | +} | |
4598 | + | |
b7b72b66 | 4599 | +/* |
c1c6733f AM |
4600 | + * Notify granted locks if they are blocking a newly forced-to-wait lock. |
4601 | + */ | |
4602 | + | |
b7b72b66 | 4603 | +static void send_blocking_asts(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
c1c6733f AM |
4604 | +{ |
4605 | + send_bast_queue(&rsb->res_grantqueue, lkb); | |
4606 | + /* check if the following improves performance */ | |
4607 | + /* send_bast_queue(&rsb->res_convertqueue, lkb); */ | |
4608 | +} | |
4609 | + | |
b7b72b66 | 4610 | +static void send_blocking_asts_all(struct dlm_rsb *rsb, struct dlm_lkb *lkb) |
c1c6733f AM |
4611 | +{ |
4612 | + send_bast_queue(&rsb->res_grantqueue, lkb); | |
4613 | + send_bast_queue(&rsb->res_convertqueue, lkb); | |
4614 | +} | |
4615 | + | |
b7b72b66 | 4616 | +/* |
c1c6733f AM |
4617 | + * Called when a lock has been dequeued. Look for any locks to grant that are |
4618 | + * waiting for conversion or waiting to be granted. | |
4619 | + * The rsb res_lock must be held in write when this function is called. | |
4620 | + */ | |
4621 | + | |
b7b72b66 | 4622 | +int grant_pending_locks(struct dlm_rsb *r) |
c1c6733f | 4623 | +{ |
b7b72b66 | 4624 | + struct dlm_lkb *lkb, *s; |
c1c6733f AM |
4625 | + int8_t high = DLM_LOCK_IV; |
4626 | + | |
b7b72b66 AM |
4627 | + list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) { |
4628 | + if (can_be_granted(r, lkb, FALSE)) | |
c1c6733f AM |
4629 | + grant_lock(lkb, 1); |
4630 | + else | |
4631 | + high = MAX(lkb->lkb_rqmode, high); | |
4632 | + } | |
4633 | + | |
b7b72b66 AM |
4634 | + list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) { |
4635 | + if (lkb->lkb_lockqueue_state) | |
4636 | + continue; | |
c1c6733f | 4637 | + |
b7b72b66 | 4638 | + if (can_be_granted(r, lkb, FALSE)) |
c1c6733f AM |
4639 | + grant_lock(lkb, 1); |
4640 | + else | |
4641 | + high = MAX(lkb->lkb_rqmode, high); | |
4642 | + } | |
4643 | + | |
b7b72b66 | 4644 | + /* |
c1c6733f AM |
4645 | + * If there are locks left on the wait/convert queue then send blocking |
4646 | + * ASTs to granted locks that are blocking | |
4647 | + * | |
4648 | + * FIXME: This might generate some spurious blocking ASTs for range | |
4649 | + * locks. | |
4650 | + */ | |
4651 | + | |
4652 | + if (high > DLM_LOCK_IV) { | |
b7b72b66 AM |
4653 | + list_for_each_entry_safe(lkb, s, &r->res_grantqueue, |
4654 | + lkb_statequeue) { | |
4655 | + if (lkb->lkb_bastaddr && (lkb->lkb_highbast < high) && | |
c1c6733f | 4656 | + !__dlm_compat_matrix[lkb->lkb_grmode+1][high+1]) { |
b7b72b66 | 4657 | + queue_ast(lkb, AST_BAST, high); |
c1c6733f AM |
4658 | + lkb->lkb_highbast = high; |
4659 | + } | |
4660 | + } | |
4661 | + } | |
4662 | + | |
4663 | + return 0; | |
4664 | +} | |
4665 | + | |
b7b72b66 | 4666 | +/* |
c1c6733f AM |
4667 | + * Called to cancel a locking operation that failed due to some internal |
4668 | + * reason. | |
4669 | + * | |
4670 | + * Waiting locks will be removed, converting locks will be reverted to their | |
4671 | + * granted status, unlocks will be left where they are. | |
4672 | + * | |
4673 | + * A completion AST will be delivered to the caller. | |
4674 | + */ | |
4675 | + | |
b7b72b66 | 4676 | +int cancel_lockop(struct dlm_lkb *lkb, int status) |
c1c6733f AM |
4677 | +{ |
4678 | + int state = lkb->lkb_lockqueue_state; | |
b7b72b66 | 4679 | + uint16_t astflags = AST_COMP; |
c1c6733f AM |
4680 | + |
4681 | + lkb->lkb_lockqueue_state = 0; | |
4682 | + | |
4683 | + switch (state) { | |
4684 | + case GDLM_LQSTATE_WAIT_RSB: | |
b7b72b66 | 4685 | + astflags |= AST_DEL; |
c1c6733f AM |
4686 | + break; |
4687 | + | |
4688 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
4689 | + res_lkb_dequeue(lkb); | |
b7b72b66 | 4690 | + astflags |= AST_DEL; |
c1c6733f AM |
4691 | + break; |
4692 | + | |
4693 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
4694 | + res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED); | |
4695 | + | |
4696 | + /* Remove from deadlock detection */ | |
4697 | + if (lkb->lkb_duetime) { | |
4698 | + remove_from_deadlockqueue(lkb); | |
4699 | + } | |
4700 | + break; | |
4701 | + | |
4702 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
4703 | + /* We can leave this. I think.... */ | |
4704 | + break; | |
4705 | + } | |
4706 | + | |
4707 | + lkb->lkb_retstatus = status; | |
b7b72b66 | 4708 | + queue_ast(lkb, astflags, 0); |
c1c6733f AM |
4709 | + |
4710 | + return 0; | |
4711 | +} | |
4712 | + | |
b7b72b66 | 4713 | +/* |
c1c6733f AM |
4714 | + * Check for conversion deadlock. If a deadlock was found |
4715 | + * return lkb to kill, else return NULL | |
4716 | + */ | |
4717 | + | |
b7b72b66 | 4718 | +struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb) |
c1c6733f | 4719 | +{ |
b7b72b66 | 4720 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
c1c6733f AM |
4721 | + struct list_head *entry; |
4722 | + | |
b7b72b66 | 4723 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_CONVERT,); |
c1c6733f AM |
4724 | + |
4725 | + /* Work our way up to the head of the queue looking for locks that | |
4726 | + * conflict with us */ | |
4727 | + | |
4728 | + down_read(&rsb->res_lock); | |
4729 | + | |
4730 | + entry = lkb->lkb_statequeue.prev; | |
4731 | + while (entry != &rsb->res_convertqueue) { | |
b7b72b66 | 4732 | + struct dlm_lkb *lkb2 = list_entry(entry, struct dlm_lkb, lkb_statequeue); |
c1c6733f AM |
4733 | + |
4734 | + if (ranges_overlap(lkb, lkb2) && !modes_compat(lkb2, lkb)) { | |
4735 | + up_read(&rsb->res_lock); | |
4736 | + return lkb; | |
4737 | + } | |
4738 | + entry = entry->prev; | |
4739 | + } | |
4740 | + up_read(&rsb->res_lock); | |
4741 | + | |
4742 | + return 0; | |
4743 | +} | |
4744 | + | |
b7b72b66 | 4745 | +/* |
c1c6733f AM |
4746 | + * Conversion operation was cancelled by us (not the user). |
4747 | + * ret contains the return code to pass onto the user | |
4748 | + */ | |
4749 | + | |
b7b72b66 | 4750 | +void cancel_conversion(struct dlm_lkb *lkb, int ret) |
c1c6733f | 4751 | +{ |
b7b72b66 | 4752 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
c1c6733f AM |
4753 | + |
4754 | + /* Stick it back on the granted queue */ | |
4755 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
4756 | + lkb->lkb_rqmode = lkb->lkb_grmode; | |
4757 | + | |
4758 | + remove_from_deadlockqueue(lkb); | |
4759 | + | |
4760 | + lkb->lkb_retstatus = ret; | |
b7b72b66 | 4761 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
4762 | + wake_astd(); |
4763 | +} | |
4764 | + | |
b7b72b66 | 4765 | +/* |
c1c6733f AM |
4766 | + * As new master of the rsb for this lkb, we need to handle these requests |
4767 | + * removed from the lockqueue and originating from local processes: | |
4768 | + * GDLM_LQSTATE_WAIT_RSB, GDLM_LQSTATE_WAIT_CONDGRANT, | |
4769 | + * GDLM_LQSTATE_WAIT_UNLOCK, GDLM_LQSTATE_WAIT_CONVERT. | |
4770 | + */ | |
4771 | + | |
b7b72b66 | 4772 | +void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state) |
c1c6733f | 4773 | +{ |
b7b72b66 AM |
4774 | + struct dlm_rsb *rsb; |
4775 | + | |
c1c6733f AM |
4776 | + switch (state) { |
4777 | + case GDLM_LQSTATE_WAIT_RSB: | |
4778 | + dlm_lock_stage1(lkb->lkb_resource->res_ls, lkb, | |
4779 | + lkb->lkb_lockqueue_flags, | |
4780 | + lkb->lkb_resource->res_name, | |
4781 | + lkb->lkb_resource->res_length); | |
4782 | + break; | |
4783 | + | |
4784 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
4785 | + res_lkb_dequeue(lkb); | |
4786 | + dlm_lock_stage3(lkb); | |
4787 | + break; | |
4788 | + | |
4789 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
b7b72b66 AM |
4790 | + rsb = find_rsb_to_unlock(ls, lkb); |
4791 | + dlm_unlock_stage2(lkb, rsb, lkb->lkb_lockqueue_flags); | |
c1c6733f AM |
4792 | + break; |
4793 | + | |
4794 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
4795 | + dlm_convert_stage2(lkb, TRUE); | |
4796 | + break; | |
4797 | + | |
4798 | + default: | |
b7b72b66 AM |
4799 | + DLM_ASSERT(0,); |
4800 | + } | |
4801 | +} | |
4802 | + | |
4803 | +static void dump_queue(struct list_head *head, char *qname) | |
4804 | +{ | |
4805 | + struct dlm_lkb *lkb; | |
4806 | + | |
4807 | + list_for_each_entry(lkb, head, lkb_statequeue) { | |
4808 | + printk("%s %08x gr %d rq %d flg %x sts %u node %u remid %x " | |
4809 | + "lq %d,%x\n", | |
4810 | + qname, | |
4811 | + lkb->lkb_id, | |
4812 | + lkb->lkb_grmode, | |
4813 | + lkb->lkb_rqmode, | |
4814 | + lkb->lkb_flags, | |
4815 | + lkb->lkb_status, | |
4816 | + lkb->lkb_nodeid, | |
4817 | + lkb->lkb_remid, | |
4818 | + lkb->lkb_lockqueue_state, | |
4819 | + lkb->lkb_lockqueue_flags); | |
4820 | + } | |
4821 | +} | |
4822 | + | |
4823 | +static void dump_rsb(struct dlm_rsb *rsb) | |
4824 | +{ | |
4825 | + printk("name \"%s\" flags %lx nodeid %d ref %u\n", | |
4826 | + rsb->res_name, rsb->res_flags, rsb->res_nodeid, | |
4827 | + atomic_read(&rsb->res_ref)); | |
4828 | + | |
4829 | + if (!list_empty(&rsb->res_grantqueue)) | |
4830 | + dump_queue(&rsb->res_grantqueue, "G"); | |
4831 | + | |
4832 | + if (!list_empty(&rsb->res_convertqueue)) | |
4833 | + dump_queue(&rsb->res_convertqueue, "C"); | |
4834 | + | |
4835 | + if (!list_empty(&rsb->res_waitqueue)) | |
4836 | + dump_queue(&rsb->res_waitqueue, "W"); | |
4837 | +} | |
4838 | + | |
4839 | +void dlm_locks_dump(void) | |
4840 | +{ | |
4841 | + struct dlm_ls *ls; | |
4842 | + struct dlm_rsb *rsb; | |
4843 | + struct list_head *head; | |
4844 | + int i; | |
4845 | + | |
4846 | + lowcomms_stop_accept(); | |
4847 | + | |
4848 | + list_for_each_entry(ls, &lslist, ls_list) { | |
4849 | + down_write(&ls->ls_in_recovery); | |
4850 | + for (i = 0; i < ls->ls_rsbtbl_size; i++) { | |
4851 | + head = &ls->ls_rsbtbl[i].list; | |
4852 | + list_for_each_entry(rsb, head, res_hashchain) | |
4853 | + dump_rsb(rsb); | |
4854 | + } | |
c1c6733f AM |
4855 | + } |
4856 | +} | |
b7b72b66 | 4857 | + |
c1c6733f AM |
4858 | diff -urN linux-orig/cluster/dlm/locking.h linux-patched/cluster/dlm/locking.h |
4859 | --- linux-orig/cluster/dlm/locking.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 4860 | +++ linux-patched/cluster/dlm/locking.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
4861 | @@ -0,0 +1,33 @@ |
4862 | +/****************************************************************************** | |
4863 | +******************************************************************************* | |
4864 | +** | |
4865 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
4866 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
4867 | +** | |
4868 | +** This copyrighted material is made available to anyone wishing to use, | |
4869 | +** modify, copy, or redistribute it subject to the terms and conditions | |
4870 | +** of the GNU General Public License v.2. | |
4871 | +** | |
4872 | +******************************************************************************* | |
4873 | +******************************************************************************/ | |
4874 | + | |
4875 | +#ifndef __LOCKING_DOT_H__ | |
4876 | +#define __LOCKING_DOT_H__ | |
4877 | + | |
b7b72b66 AM |
4878 | +int dlm_modes_compat(int mode1, int mode2); |
4879 | +void process_remastered_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb, int state); | |
4880 | +void dlm_lock_stage3(struct dlm_lkb *lkb); | |
4881 | +int dlm_convert_stage2(struct dlm_lkb *lkb, int do_ast); | |
4882 | +int dlm_unlock_stage2(struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags); | |
4883 | +int dlm_lock_stage2(struct dlm_ls *lspace, struct dlm_lkb *lkb, struct dlm_rsb *rsb, uint32_t flags); | |
4884 | +struct dlm_rsb *create_rsb(struct dlm_ls *lspace, struct dlm_lkb *lkb, char *name, int namelen); | |
4885 | +int free_rsb_if_unused(struct dlm_rsb *rsb); | |
4886 | +struct dlm_lkb *remote_stage2(int remote_csid, struct dlm_ls *lspace, | |
4887 | + struct dlm_request *freq); | |
4888 | +int cancel_lockop(struct dlm_lkb *lkb, int status); | |
4889 | +int dlm_remove_lock(struct dlm_lkb *lkb, uint32_t flags); | |
4890 | +int grant_pending_locks(struct dlm_rsb *rsb); | |
4891 | +void cancel_conversion(struct dlm_lkb *lkb, int ret); | |
4892 | +struct dlm_lkb *conversion_deadlock_check(struct dlm_lkb *lkb); | |
c1c6733f AM |
4893 | + |
4894 | +#endif /* __LOCKING_DOT_H__ */ | |
4895 | diff -urN linux-orig/cluster/dlm/lockqueue.c linux-patched/cluster/dlm/lockqueue.c | |
4896 | --- linux-orig/cluster/dlm/lockqueue.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 4897 | +++ linux-patched/cluster/dlm/lockqueue.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 4898 | @@ -0,0 +1,1159 @@ |
c1c6733f AM |
4899 | +/****************************************************************************** |
4900 | +******************************************************************************* | |
4901 | +** | |
4902 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
4903 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
4904 | +** | |
4905 | +** This copyrighted material is made available to anyone wishing to use, | |
4906 | +** modify, copy, or redistribute it subject to the terms and conditions | |
4907 | +** of the GNU General Public License v.2. | |
4908 | +** | |
4909 | +******************************************************************************* | |
4910 | +******************************************************************************/ | |
4911 | + | |
4912 | +/* | |
4913 | + * lockqueue.c | |
4914 | + * | |
4915 | + * This controls the lock queue, which is where locks | |
4916 | + * come when they need to wait for a remote operation | |
4917 | + * to complete. | |
4918 | + * | |
4919 | + * This could also be thought of as the "high-level" comms | |
4920 | + * layer. | |
4921 | + * | |
4922 | + */ | |
4923 | + | |
4924 | +#include "dlm_internal.h" | |
4925 | +#include "lockqueue.h" | |
4926 | +#include "dir.h" | |
4927 | +#include "locking.h" | |
4928 | +#include "lkb.h" | |
4929 | +#include "lowcomms.h" | |
4930 | +#include "midcomms.h" | |
4931 | +#include "reccomms.h" | |
4932 | +#include "nodes.h" | |
4933 | +#include "lockspace.h" | |
4934 | +#include "ast.h" | |
4935 | +#include "memory.h" | |
4936 | +#include "rsb.h" | |
4937 | +#include "queries.h" | |
b7b72b66 | 4938 | +#include "util.h" |
c1c6733f | 4939 | + |
b7b72b66 AM |
4940 | +static void add_reply_lvb(struct dlm_lkb * lkb, struct dlm_reply *reply); |
4941 | +static void add_request_lvb(struct dlm_lkb * lkb, struct dlm_request *req); | |
c1c6733f AM |
4942 | + |
4943 | +/* | |
4944 | + * format of an entry on the request queue | |
4945 | + */ | |
4946 | +struct rq_entry { | |
4947 | + struct list_head rqe_list; | |
4948 | + uint32_t rqe_nodeid; | |
4949 | + char rqe_request[1]; | |
4950 | +}; | |
4951 | + | |
4952 | +/* | |
4953 | + * Add a new request (if appropriate) to the request queue and send the remote | |
4954 | + * request out. - runs in the context of the locking caller | |
4955 | + * | |
4956 | + * Recovery of a remote_stage request if the remote end fails while the lkb | |
4957 | + * is still on the lockqueue: | |
4958 | + * | |
4959 | + * o lkbs on the lockqueue are flagged with GDLM_LKFLG_LQRESEND in | |
4960 | + * lockqueue_lkb_mark() at the start of recovery. | |
4961 | + * | |
4962 | + * o Some lkb's will be rebuilt on new master rsb's during recovery. | |
4963 | + * (depends on the type of request, see below). | |
4964 | + * | |
4965 | + * o At the end of recovery, resend_cluster_requests() looks at these | |
4966 | + * LQRESEND lkb's and either: | |
4967 | + * | |
4968 | + * i) resends the request to the new master for the rsb where the | |
4969 | + * request is processed as usual. The lkb remains on the lockqueue until | |
4970 | + * the new master replies and we run process_lockqueue_reply(). | |
4971 | + * | |
4972 | + * ii) if we've become the rsb master, remove the lkb from the lockqueue | |
4973 | + * and processes the request locally via process_remastered_lkb(). | |
4974 | + * | |
4975 | + * GDLM_LQSTATE_WAIT_RSB (1) - these lockqueue lkb's are not on any rsb queue | |
4976 | + * and the request should be resent if dest node is failed. | |
4977 | + * | |
4978 | + * GDLM_LQSTATE_WAIT_CONDGRANT (3) - this lockqueue lkb is on a local rsb's | |
4979 | + * wait queue. Don't rebuild this lkb on a new master rsb (the NOREBUILD flag | |
4980 | + * makes send_lkb_queue() skip it). Resend this request to the new master. | |
4981 | + * | |
4982 | + * GDLM_LQSTATE_WAIT_UNLOCK (4) - this lkb is on a local rsb's queue. It will | |
4983 | + * be rebuilt on the rsb on the new master (restbl_lkb_send/send_lkb_queue). | |
4984 | + * Resend this request to the new master. | |
4985 | + * | |
4986 | + * GDLM_LQSTATE_WAIT_CONVERT (2) - this lkb is on a local rsb convert queue. | |
4987 | + * It will be rebuilt on the new master rsb's granted queue. Resend this | |
4988 | + * request to the new master. | |
4989 | + */ | |
4990 | + | |
b7b72b66 | 4991 | +int remote_stage(struct dlm_lkb *lkb, int state) |
c1c6733f AM |
4992 | +{ |
4993 | + int error; | |
4994 | + | |
4995 | + lkb->lkb_lockqueue_state = state; | |
4996 | + add_to_lockqueue(lkb); | |
4997 | + | |
4998 | + error = send_cluster_request(lkb, state); | |
4999 | + if (error < 0) { | |
b7b72b66 AM |
5000 | + log_error(lkb->lkb_resource->res_ls, "remote_stage error %d %x", |
5001 | + error, lkb->lkb_id); | |
c1c6733f AM |
5002 | + /* Leave on lockqueue, it will be resent to correct node during |
5003 | + * recovery. */ | |
c1c6733f AM |
5004 | + } |
5005 | + return 0; | |
5006 | +} | |
5007 | + | |
5008 | +/* | |
5009 | + * Requests received while the lockspace is in recovery get added to the | |
5010 | + * request queue and processed when recovery is complete. | |
5011 | + */ | |
5012 | + | |
b7b72b66 | 5013 | +void add_to_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) |
c1c6733f AM |
5014 | +{ |
5015 | + struct rq_entry *entry; | |
b7b72b66 AM |
5016 | + int length = hd->rh_length; |
5017 | + | |
5018 | + if (test_bit(LSFL_REQUEST_WARN, &ls->ls_flags)) | |
5019 | + log_error(ls, "request during recovery from %u", nodeid); | |
c1c6733f AM |
5020 | + |
5021 | + if (in_nodes_gone(ls, nodeid)) | |
5022 | + return; | |
5023 | + | |
5024 | + entry = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); | |
5025 | + if (!entry) { | |
5026 | + // TODO something better | |
5027 | + printk("dlm: add_to_requestqueue: out of memory\n"); | |
5028 | + return; | |
5029 | + } | |
5030 | + | |
b7b72b66 | 5031 | + log_debug(ls, "add_to_requestq cmd %d fr %d", hd->rh_cmd, nodeid); |
c1c6733f | 5032 | + entry->rqe_nodeid = nodeid; |
b7b72b66 AM |
5033 | + memcpy(entry->rqe_request, hd, length); |
5034 | + | |
5035 | + down(&ls->ls_requestqueue_lock); | |
c1c6733f | 5036 | + list_add_tail(&entry->rqe_list, &ls->ls_requestqueue); |
b7b72b66 | 5037 | + up(&ls->ls_requestqueue_lock); |
c1c6733f AM |
5038 | +} |
5039 | + | |
b7b72b66 | 5040 | +int process_requestqueue(struct dlm_ls *ls) |
c1c6733f AM |
5041 | +{ |
5042 | + int error = 0, count = 0; | |
b7b72b66 AM |
5043 | + struct rq_entry *entry; |
5044 | + struct dlm_header *hd; | |
c1c6733f AM |
5045 | + |
5046 | + log_all(ls, "process held requests"); | |
5047 | + | |
b7b72b66 | 5048 | + down(&ls->ls_requestqueue_lock); |
c1c6733f | 5049 | + |
b7b72b66 AM |
5050 | + for (;;) { |
5051 | + if (list_empty(&ls->ls_requestqueue)) { | |
5052 | + up(&ls->ls_requestqueue_lock); | |
5053 | + error = 0; | |
c1c6733f AM |
5054 | + break; |
5055 | + } | |
5056 | + | |
b7b72b66 AM |
5057 | + entry = list_entry(ls->ls_requestqueue.next, struct rq_entry, |
5058 | + rqe_list); | |
5059 | + up(&ls->ls_requestqueue_lock); | |
5060 | + hd = (struct dlm_header *) entry->rqe_request; | |
5061 | + | |
5062 | + log_debug(ls, "process_requestq cmd %d fr %u", hd->rh_cmd, | |
5063 | + entry->rqe_nodeid); | |
5064 | + | |
5065 | + error = process_cluster_request(entry->rqe_nodeid, hd, TRUE); | |
c1c6733f | 5066 | + if (error == -EINTR) { |
b7b72b66 AM |
5067 | + /* entry is left on requestqueue */ |
5068 | + log_debug(ls, "process_requestqueue abort eintr"); | |
c1c6733f AM |
5069 | + break; |
5070 | + } | |
5071 | + | |
b7b72b66 | 5072 | + down(&ls->ls_requestqueue_lock); |
c1c6733f AM |
5073 | + list_del(&entry->rqe_list); |
5074 | + kfree(entry); | |
5075 | + count++; | |
b7b72b66 AM |
5076 | + |
5077 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
5078 | + log_debug(ls, "process_requestqueue abort ls_run"); | |
5079 | + up(&ls->ls_requestqueue_lock); | |
5080 | + error = -EINTR; | |
5081 | + break; | |
5082 | + } | |
c1c6733f AM |
5083 | + } |
5084 | + | |
5085 | + log_all(ls, "processed %d requests", count); | |
5086 | + return error; | |
5087 | +} | |
5088 | + | |
b7b72b66 | 5089 | +void wait_requestqueue(struct dlm_ls *ls) |
c1c6733f | 5090 | +{ |
b7b72b66 AM |
5091 | + for (;;) { |
5092 | + down(&ls->ls_requestqueue_lock); | |
5093 | + if (list_empty(&ls->ls_requestqueue)) | |
5094 | + break; | |
5095 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) | |
5096 | + break; | |
5097 | + up(&ls->ls_requestqueue_lock); | |
c1c6733f | 5098 | + schedule(); |
b7b72b66 AM |
5099 | + } |
5100 | + up(&ls->ls_requestqueue_lock); | |
c1c6733f AM |
5101 | +} |
5102 | + | |
5103 | +/* | |
5104 | + * Resdir requests (lookup or remove) and replies from before recovery are | |
5105 | + * invalid since the resdir was rebuilt. Clear them. Requests from nodes now | |
5106 | + * gone are also invalid. | |
5107 | + */ | |
5108 | + | |
b7b72b66 | 5109 | +void purge_requestqueue(struct dlm_ls *ls) |
c1c6733f AM |
5110 | +{ |
5111 | + int count = 0; | |
5112 | + struct rq_entry *entry, *safe; | |
b7b72b66 AM |
5113 | + struct dlm_header *hd; |
5114 | + struct dlm_lkb *lkb; | |
c1c6733f AM |
5115 | + |
5116 | + log_all(ls, "purge requests"); | |
5117 | + | |
b7b72b66 AM |
5118 | + down(&ls->ls_requestqueue_lock); |
5119 | + | |
c1c6733f | 5120 | + list_for_each_entry_safe(entry, safe, &ls->ls_requestqueue, rqe_list) { |
b7b72b66 | 5121 | + hd = (struct dlm_header *) entry->rqe_request; |
c1c6733f | 5122 | + |
b7b72b66 AM |
5123 | + if (hd->rh_cmd == GDLM_REMCMD_REM_RESDATA || |
5124 | + hd->rh_cmd == GDLM_REMCMD_LOOKUP || | |
c1c6733f AM |
5125 | + in_nodes_gone(ls, entry->rqe_nodeid)) { |
5126 | + | |
5127 | + list_del(&entry->rqe_list); | |
5128 | + kfree(entry); | |
5129 | + count++; | |
5130 | + | |
b7b72b66 | 5131 | + } else if (hd->rh_cmd == GDLM_REMCMD_LOCKREPLY) { |
c1c6733f AM |
5132 | + |
5133 | + /* | |
5134 | + * Replies to resdir lookups are invalid and must be | |
5135 | + * purged. The lookup requests are marked in | |
5136 | + * lockqueue_lkb_mark and will be resent in | |
5137 | + * resend_cluster_requests. The only way to check if | |
5138 | + * this is a lookup reply is to look at the | |
5139 | + * lockqueue_state of the lkb. | |
5140 | + */ | |
5141 | + | |
b7b72b66 AM |
5142 | + lkb = find_lock_by_id(ls, hd->rh_lkid); |
5143 | + DLM_ASSERT(lkb,); | |
c1c6733f AM |
5144 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_RSB) { |
5145 | + list_del(&entry->rqe_list); | |
5146 | + kfree(entry); | |
5147 | + count++; | |
5148 | + } | |
5149 | + } | |
5150 | + } | |
b7b72b66 | 5151 | + up(&ls->ls_requestqueue_lock); |
c1c6733f AM |
5152 | + |
5153 | + log_all(ls, "purged %d requests", count); | |
5154 | +} | |
5155 | + | |
5156 | +/* | |
5157 | + * Check if there's a reply for the given lkid in the requestqueue. | |
5158 | + */ | |
5159 | + | |
b7b72b66 | 5160 | +int reply_in_requestqueue(struct dlm_ls *ls, int lkid) |
c1c6733f AM |
5161 | +{ |
5162 | + int rv = FALSE; | |
b7b72b66 AM |
5163 | + struct rq_entry *entry; |
5164 | + struct dlm_header *hd; | |
c1c6733f | 5165 | + |
b7b72b66 | 5166 | + down(&ls->ls_requestqueue_lock); |
c1c6733f | 5167 | + |
b7b72b66 AM |
5168 | + list_for_each_entry(entry, &ls->ls_requestqueue, rqe_list) { |
5169 | + hd = (struct dlm_header *) entry->rqe_request; | |
5170 | + if (hd->rh_cmd == GDLM_REMCMD_LOCKREPLY && hd->rh_lkid == lkid){ | |
5171 | + log_debug(ls, "reply_in_requestq cmd %d fr %d id %x", | |
5172 | + hd->rh_cmd, entry->rqe_nodeid, lkid); | |
c1c6733f AM |
5173 | + rv = TRUE; |
5174 | + break; | |
5175 | + } | |
5176 | + } | |
b7b72b66 | 5177 | + up(&ls->ls_requestqueue_lock); |
c1c6733f AM |
5178 | + |
5179 | + return rv; | |
5180 | +} | |
5181 | + | |
b7b72b66 | 5182 | +void allocate_and_copy_lvb(struct dlm_ls *ls, char **lvbptr, char *src) |
c1c6733f AM |
5183 | +{ |
5184 | + if (!*lvbptr) | |
5185 | + *lvbptr = allocate_lvb(ls); | |
5186 | + if (*lvbptr) | |
5187 | + memcpy(*lvbptr, src, DLM_LVB_LEN); | |
5188 | +} | |
5189 | + | |
5190 | +/* | |
5191 | + * Process a lockqueue LKB after it has had it's remote processing complete and | |
b7b72b66 AM |
5192 | + * been pulled from the lockqueue. Runs in the context of the DLM recvd thread |
5193 | + * on the machine that requested the lock. | |
c1c6733f AM |
5194 | + */ |
5195 | + | |
b7b72b66 AM |
5196 | +static void process_lockqueue_reply(struct dlm_lkb *lkb, |
5197 | + struct dlm_reply *reply, | |
5198 | + uint32_t nodeid) | |
c1c6733f | 5199 | +{ |
b7b72b66 AM |
5200 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
5201 | + struct dlm_ls *ls = rsb->res_ls; | |
5202 | + int oldstate, state = lkb->lkb_lockqueue_state; | |
c1c6733f | 5203 | + |
c1c6733f AM |
5204 | + if (state) |
5205 | + remove_from_lockqueue(lkb); | |
5206 | + | |
5207 | + switch (state) { | |
5208 | + case GDLM_LQSTATE_WAIT_RSB: | |
5209 | + | |
b7b72b66 AM |
5210 | + if (reply->rl_status) { |
5211 | + DLM_ASSERT(reply->rl_status == -EEXIST,); | |
b7b72b66 AM |
5212 | + if (rsb->res_nodeid == -1) { |
5213 | + msleep(500); | |
5214 | + remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB); | |
5215 | + break; | |
5216 | + } | |
5217 | + } else { | |
5218 | + if (reply->rl_nodeid == our_nodeid()) { | |
5219 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
5220 | + rsb->res_nodeid = 0; | |
5221 | + } else { | |
5222 | + clear_bit(RESFL_MASTER, &rsb->res_flags); | |
5223 | + rsb->res_nodeid = reply->rl_nodeid; | |
5224 | + } | |
5225 | + } | |
c1c6733f | 5226 | + |
b7b72b66 AM |
5227 | + log_debug(ls, "(%d) lu rep %x fr %u %u", lkb->lkb_ownpid, |
5228 | + lkb->lkb_id, nodeid, | |
5229 | + rsb->res_nodeid); | |
c1c6733f | 5230 | + |
b7b72b66 AM |
5231 | + lkb->lkb_nodeid = rsb->res_nodeid; |
5232 | + dlm_lock_stage2(ls, lkb, rsb, lkb->lkb_lockqueue_flags); | |
c1c6733f AM |
5233 | + break; |
5234 | + | |
5235 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
5236 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
5237 | + | |
5238 | + /* | |
b7b72b66 AM |
5239 | + * the destination wasn't the master |
5240 | + * this implies the request was a CONDGRANT | |
5241 | + */ | |
5242 | + | |
5243 | + if (reply->rl_status == -EINVAL) { | |
5244 | + int master_nodeid; | |
5245 | + | |
5246 | + DLM_ASSERT(state == GDLM_LQSTATE_WAIT_CONDGRANT, ); | |
5247 | + | |
5248 | + log_debug(ls, "(%d) req reply einval %x fr %d r %d %s", | |
5249 | + lkb->lkb_ownpid, lkb->lkb_id, nodeid, | |
5250 | + rsb->res_nodeid, rsb->res_name); | |
5251 | + | |
5252 | + lkb_dequeue(lkb); | |
5253 | + | |
5254 | + if (rsb->res_nodeid == lkb->lkb_nodeid || rsb->res_nodeid == -1){ | |
5255 | + /* | |
5256 | + * We need to re-lookup the master and resend our | |
5257 | + * request to it. | |
5258 | + */ | |
5259 | + | |
5260 | + lkb->lkb_nodeid = -1; | |
5261 | + rsb->res_nodeid = -1; | |
5262 | + | |
5263 | + if (get_directory_nodeid(rsb) != our_nodeid()) | |
5264 | + remote_stage(lkb, GDLM_LQSTATE_WAIT_RSB); | |
5265 | + else { | |
5266 | + int error = dlm_dir_lookup(ls, our_nodeid(), | |
5267 | + rsb->res_name, | |
5268 | + rsb->res_length, | |
5269 | + &master_nodeid); | |
5270 | + if (error == -EEXIST) { | |
5271 | + /* don't expect this will happen */ | |
5272 | + log_all(ls, "EEXIST %x", lkb->lkb_id); | |
5273 | + print_lkb(lkb); | |
5274 | + print_rsb(rsb); | |
5275 | + } | |
5276 | + | |
5277 | + if (master_nodeid == our_nodeid()) { | |
5278 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
5279 | + master_nodeid = 0; | |
5280 | + } else | |
5281 | + clear_bit(RESFL_MASTER,&rsb->res_flags); | |
5282 | + | |
5283 | + rsb->res_nodeid = master_nodeid; | |
5284 | + lkb->lkb_nodeid = master_nodeid; | |
5285 | + | |
5286 | + dlm_lock_stage2(ls, lkb, rsb, | |
5287 | + lkb->lkb_lockqueue_flags); | |
5288 | + } | |
5289 | + } else { | |
5290 | + /* | |
5291 | + * Another request on this rsb has since found | |
5292 | + * the master, we'll use that one although it too | |
5293 | + * may be invalid requiring us to retry again. | |
5294 | + */ | |
5295 | + | |
5296 | + lkb->lkb_nodeid = rsb->res_nodeid; | |
5297 | + dlm_lock_stage2(ls, lkb, rsb, | |
5298 | + lkb->lkb_lockqueue_flags); | |
5299 | + } | |
5300 | + | |
5301 | + break; | |
5302 | + } | |
5303 | + | |
5304 | + | |
5305 | + /* | |
c1c6733f AM |
5306 | + * After a remote lock/conversion/grant request we put the lock |
5307 | + * on the right queue and send an AST if appropriate. Any lock | |
5308 | + * shuffling (eg newly granted locks because this one was | |
5309 | + * converted downwards) will be dealt with in seperate messages | |
5310 | + * (which may be in the same network message) | |
5311 | + */ | |
5312 | + | |
5313 | + if (!lkb->lkb_remid) | |
5314 | + lkb->lkb_remid = reply->rl_lkid; | |
5315 | + | |
5316 | + /* | |
5317 | + * The remote request failed (we assume because of NOQUEUE). | |
5318 | + * If this is a new request (non-conv) the lkb was created just | |
5319 | + * for it so the lkb should be freed. If this was a | |
5320 | + * conversion, the lkb already existed so we should put it back | |
5321 | + * on the grant queue. | |
5322 | + */ | |
5323 | + | |
5324 | + if (reply->rl_status != 0) { | |
b7b72b66 | 5325 | + DLM_ASSERT(reply->rl_status == -EAGAIN,); |
c1c6733f AM |
5326 | + |
5327 | + if (state == GDLM_LQSTATE_WAIT_CONDGRANT) { | |
5328 | + res_lkb_dequeue(lkb); | |
b7b72b66 AM |
5329 | + lkb->lkb_retstatus = reply->rl_status; |
5330 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); | |
5331 | + } else { | |
c1c6733f | 5332 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); |
b7b72b66 AM |
5333 | + lkb->lkb_retstatus = reply->rl_status; |
5334 | + queue_ast(lkb, AST_COMP, 0); | |
5335 | + } | |
c1c6733f AM |
5336 | + break; |
5337 | + } | |
5338 | + | |
5339 | + /* | |
5340 | + * The remote request was successful in granting the request or | |
5341 | + * queuing it to be granted later. Add the lkb to the | |
5342 | + * appropriate rsb queue. | |
5343 | + */ | |
5344 | + | |
5345 | + switch (reply->rl_lockstate) { | |
5346 | + case GDLM_LKSTS_GRANTED: | |
5347 | + | |
5348 | + /* Compact version of grant_lock(). */ | |
5349 | + | |
5350 | + down_write(&rsb->res_lock); | |
5351 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
5352 | + memcpy(lkb->lkb_lvbptr, reply->rl_lvb, | |
5353 | + DLM_LVB_LEN); | |
5354 | + | |
5355 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
5356 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
5357 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
5358 | + | |
5359 | + if (lkb->lkb_range) { | |
5360 | + lkb->lkb_range[GR_RANGE_START] = | |
5361 | + lkb->lkb_range[RQ_RANGE_START]; | |
5362 | + lkb->lkb_range[GR_RANGE_END] = | |
5363 | + lkb->lkb_range[RQ_RANGE_END]; | |
5364 | + } | |
5365 | + up_write(&rsb->res_lock); | |
5366 | + | |
5367 | + lkb->lkb_retstatus = 0; | |
b7b72b66 | 5368 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
5369 | + break; |
5370 | + | |
5371 | + case GDLM_LKSTS_WAITING: | |
5372 | + | |
5373 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
5374 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_WAITING); | |
5375 | + else | |
5376 | + log_error(ls, "wait reply for granted %x %u", | |
5377 | + lkb->lkb_id, lkb->lkb_nodeid); | |
5378 | + break; | |
5379 | + | |
5380 | + case GDLM_LKSTS_CONVERT: | |
5381 | + | |
5382 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
5383 | + res_lkb_swqueue(rsb, lkb, GDLM_LKSTS_CONVERT); | |
5384 | + else | |
5385 | + log_error(ls, "convert reply for granted %x %u", | |
5386 | + lkb->lkb_id, lkb->lkb_nodeid); | |
5387 | + break; | |
5388 | + | |
5389 | + default: | |
5390 | + log_error(ls, "process_lockqueue_reply state %d", | |
5391 | + reply->rl_lockstate); | |
5392 | + } | |
5393 | + | |
5394 | + break; | |
5395 | + | |
5396 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
5397 | + | |
5398 | + /* | |
5399 | + * Unlocks should never fail. Update local lock info. This | |
5400 | + * always sends completion AST with status in lksb | |
5401 | + */ | |
5402 | + | |
b7b72b66 | 5403 | + DLM_ASSERT(reply->rl_status == 0,); |
c1c6733f AM |
5404 | + oldstate = res_lkb_dequeue(lkb); |
5405 | + | |
5406 | + /* Differentiate between unlocks and conversion cancellations */ | |
b7b72b66 AM |
5407 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_CANCEL) { |
5408 | + if (oldstate == GDLM_LKSTS_CONVERT) { | |
5409 | + res_lkb_enqueue(lkb->lkb_resource, lkb, | |
5410 | + GDLM_LKSTS_GRANTED); | |
5411 | + lkb->lkb_retstatus = -DLM_ECANCEL; | |
5412 | + queue_ast(lkb, AST_COMP, 0); | |
5413 | + } else | |
5414 | + log_error(ls, "cancel state %d", oldstate); | |
c1c6733f | 5415 | + } else { |
b7b72b66 AM |
5416 | + DLM_ASSERT(oldstate == GDLM_LKSTS_GRANTED, |
5417 | + print_lkb(lkb);); | |
5418 | + | |
c1c6733f | 5419 | + lkb->lkb_retstatus = -DLM_EUNLOCK; |
b7b72b66 | 5420 | + queue_ast(lkb, AST_COMP | AST_DEL, 0); |
c1c6733f | 5421 | + } |
c1c6733f AM |
5422 | + break; |
5423 | + | |
5424 | + default: | |
5425 | + log_error(ls, "process_lockqueue_reply id %x state %d", | |
5426 | + lkb->lkb_id, state); | |
5427 | + } | |
5428 | +} | |
5429 | + | |
5430 | +/* | |
5431 | + * Tell a remote node to grant a lock. This happens when we are the master | |
5432 | + * copy for a lock that is actually held on a remote node. The remote end is | |
5433 | + * also responsible for sending the completion AST. | |
5434 | + */ | |
5435 | + | |
b7b72b66 | 5436 | +void remote_grant(struct dlm_lkb *lkb) |
c1c6733f AM |
5437 | +{ |
5438 | + struct writequeue_entry *e; | |
b7b72b66 | 5439 | + struct dlm_request *req; |
c1c6733f AM |
5440 | + |
5441 | + // TODO Error handling | |
5442 | + e = lowcomms_get_buffer(lkb->lkb_nodeid, | |
b7b72b66 | 5443 | + sizeof(struct dlm_request), |
c1c6733f AM |
5444 | + lkb->lkb_resource->res_ls->ls_allocation, |
5445 | + (char **) &req); | |
5446 | + if (!e) | |
5447 | + return; | |
5448 | + | |
5449 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOCKGRANT; | |
b7b72b66 | 5450 | + req->rr_header.rh_length = sizeof(struct dlm_request); |
c1c6733f AM |
5451 | + req->rr_header.rh_flags = 0; |
5452 | + req->rr_header.rh_lkid = lkb->lkb_id; | |
5453 | + req->rr_header.rh_lockspace = lkb->lkb_resource->res_ls->ls_global_id; | |
5454 | + req->rr_remlkid = lkb->lkb_remid; | |
5455 | + req->rr_flags = 0; | |
5456 | + | |
5457 | + if (lkb->lkb_flags & GDLM_LKFLG_DEMOTED) { | |
5458 | + /* This is a confusing non-standard use of rr_flags which is | |
5459 | + * usually used to pass lockqueue_flags. */ | |
5460 | + req->rr_flags |= GDLM_LKFLG_DEMOTED; | |
5461 | + } | |
5462 | + | |
5463 | + add_request_lvb(lkb, req); | |
5464 | + midcomms_send_buffer(&req->rr_header, e); | |
5465 | +} | |
5466 | + | |
b7b72b66 | 5467 | +void reply_and_grant(struct dlm_lkb *lkb) |
c1c6733f | 5468 | +{ |
b7b72b66 AM |
5469 | + struct dlm_request *req = lkb->lkb_request; |
5470 | + struct dlm_reply *reply; | |
c1c6733f AM |
5471 | + struct writequeue_entry *e; |
5472 | + | |
5473 | + // TODO Error handling | |
5474 | + e = lowcomms_get_buffer(lkb->lkb_nodeid, | |
b7b72b66 | 5475 | + sizeof(struct dlm_reply), |
c1c6733f AM |
5476 | + lkb->lkb_resource->res_ls->ls_allocation, |
5477 | + (char **) &reply); | |
5478 | + if (!e) | |
5479 | + return; | |
5480 | + | |
5481 | + reply->rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY; | |
5482 | + reply->rl_header.rh_flags = 0; | |
b7b72b66 | 5483 | + reply->rl_header.rh_length = sizeof(struct dlm_reply); |
c1c6733f AM |
5484 | + reply->rl_header.rh_lkid = req->rr_header.rh_lkid; |
5485 | + reply->rl_header.rh_lockspace = req->rr_header.rh_lockspace; | |
5486 | + | |
5487 | + reply->rl_status = lkb->lkb_retstatus; | |
5488 | + reply->rl_lockstate = lkb->lkb_status; | |
5489 | + reply->rl_lkid = lkb->lkb_id; | |
5490 | + | |
b7b72b66 | 5491 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_DEMOTED),); |
c1c6733f AM |
5492 | + |
5493 | + lkb->lkb_request = NULL; | |
5494 | + | |
5495 | + add_reply_lvb(lkb, reply); | |
5496 | + midcomms_send_buffer(&reply->rl_header, e); | |
5497 | +} | |
5498 | + | |
5499 | +/* | |
5500 | + * Request removal of a dead entry in the resource directory | |
5501 | + */ | |
5502 | + | |
b7b72b66 AM |
5503 | +void remote_remove_direntry(struct dlm_ls *ls, int nodeid, char *name, |
5504 | + int namelen) | |
c1c6733f AM |
5505 | +{ |
5506 | + struct writequeue_entry *e; | |
b7b72b66 | 5507 | + struct dlm_request *req; |
c1c6733f AM |
5508 | + |
5509 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
b7b72b66 | 5510 | + struct dlm_rcom *rc = allocate_rcom_buffer(ls); |
c1c6733f AM |
5511 | + |
5512 | + memcpy(rc->rc_buf, name, namelen); | |
5513 | + rc->rc_datalen = namelen; | |
5514 | + | |
5515 | + rcom_send_message(ls, nodeid, RECCOMM_REMRESDATA, rc, 0); | |
5516 | + | |
5517 | + free_rcom_buffer(rc); | |
5518 | + return; | |
5519 | + } | |
5520 | + // TODO Error handling | |
5521 | + e = lowcomms_get_buffer(nodeid, | |
b7b72b66 | 5522 | + sizeof(struct dlm_request) + namelen - 1, |
c1c6733f AM |
5523 | + ls->ls_allocation, (char **) &req); |
5524 | + if (!e) | |
5525 | + return; | |
5526 | + | |
b7b72b66 | 5527 | + memset(req, 0, sizeof(struct dlm_request) + namelen - 1); |
c1c6733f AM |
5528 | + req->rr_header.rh_cmd = GDLM_REMCMD_REM_RESDATA; |
5529 | + req->rr_header.rh_length = | |
b7b72b66 | 5530 | + sizeof(struct dlm_request) + namelen - 1; |
c1c6733f AM |
5531 | + req->rr_header.rh_flags = 0; |
5532 | + req->rr_header.rh_lkid = 0; | |
5533 | + req->rr_header.rh_lockspace = ls->ls_global_id; | |
5534 | + req->rr_remlkid = 0; | |
c1c6733f AM |
5535 | + memcpy(req->rr_name, name, namelen); |
5536 | + | |
5537 | + midcomms_send_buffer(&req->rr_header, e); | |
5538 | +} | |
5539 | + | |
5540 | +/* | |
5541 | + * Send remote cluster request to directory or master node before the request | |
5542 | + * is put on the lock queue. Runs in the context of the locking caller. | |
5543 | + */ | |
5544 | + | |
b7b72b66 | 5545 | +int send_cluster_request(struct dlm_lkb *lkb, int state) |
c1c6733f AM |
5546 | +{ |
5547 | + uint32_t target_nodeid; | |
b7b72b66 AM |
5548 | + struct dlm_rsb *rsb = lkb->lkb_resource; |
5549 | + struct dlm_ls *ls = rsb->res_ls; | |
5550 | + struct dlm_request *req; | |
c1c6733f AM |
5551 | + struct writequeue_entry *e; |
5552 | + | |
c1c6733f AM |
5553 | + if (state == GDLM_LQSTATE_WAIT_RSB) |
5554 | + target_nodeid = get_directory_nodeid(rsb); | |
b7b72b66 AM |
5555 | + else |
5556 | + target_nodeid = lkb->lkb_nodeid; | |
c1c6733f | 5557 | + |
b7b72b66 AM |
5558 | + /* during recovery it's valid for target_nodeid to equal our own; |
5559 | + resend_cluster_requests does this to get requests back on track */ | |
5560 | + | |
5561 | + DLM_ASSERT(target_nodeid && target_nodeid != -1, | |
5562 | + print_lkb(lkb); | |
5563 | + print_rsb(rsb); | |
5564 | + printk("target_nodeid %u\n", target_nodeid);); | |
c1c6733f AM |
5565 | + |
5566 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
5567 | + /* this may happen when called by resend_cluster_request */ | |
5568 | + log_error(ls, "send_cluster_request to %u state %d recovery", | |
5569 | + target_nodeid, state); | |
5570 | + } | |
5571 | + | |
5572 | + e = lowcomms_get_buffer(target_nodeid, | |
b7b72b66 | 5573 | + sizeof(struct dlm_request) + |
c1c6733f AM |
5574 | + rsb->res_length - 1, ls->ls_allocation, |
5575 | + (char **) &req); | |
5576 | + if (!e) | |
5577 | + return -ENOBUFS; | |
b7b72b66 | 5578 | + memset(req, 0, sizeof(struct dlm_request) + rsb->res_length - 1); |
c1c6733f AM |
5579 | + |
5580 | + /* Common stuff, some are just defaults */ | |
5581 | + | |
5582 | + if (lkb->lkb_bastaddr) | |
b7b72b66 | 5583 | + req->rr_asts = AST_BAST; |
c1c6733f | 5584 | + if (lkb->lkb_astaddr) |
b7b72b66 | 5585 | + req->rr_asts |= AST_COMP; |
c1c6733f AM |
5586 | + if (lkb->lkb_parent) |
5587 | + req->rr_remparid = lkb->lkb_parent->lkb_remid; | |
5588 | + | |
5589 | + req->rr_flags = lkb->lkb_lockqueue_flags; | |
5590 | + req->rr_rqmode = lkb->lkb_rqmode; | |
5591 | + req->rr_remlkid = lkb->lkb_remid; | |
b7b72b66 | 5592 | + req->rr_pid = lkb->lkb_ownpid; |
c1c6733f | 5593 | + req->rr_header.rh_length = |
b7b72b66 | 5594 | + sizeof(struct dlm_request) + rsb->res_length - 1; |
c1c6733f AM |
5595 | + req->rr_header.rh_flags = 0; |
5596 | + req->rr_header.rh_lkid = lkb->lkb_id; | |
5597 | + req->rr_header.rh_lockspace = ls->ls_global_id; | |
5598 | + | |
5599 | + switch (state) { | |
5600 | + | |
5601 | + case GDLM_LQSTATE_WAIT_RSB: | |
5602 | + | |
b7b72b66 AM |
5603 | + DLM_ASSERT(!lkb->lkb_parent, |
5604 | + print_lkb(lkb); | |
5605 | + print_rsb(rsb);); | |
5606 | + | |
5607 | + log_debug(ls, "(%d) send lu %x to %u", | |
5608 | + lkb->lkb_ownpid, lkb->lkb_id, target_nodeid); | |
c1c6733f AM |
5609 | + |
5610 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOOKUP; | |
5611 | + memcpy(req->rr_name, rsb->res_name, rsb->res_length); | |
5612 | + break; | |
5613 | + | |
5614 | + case GDLM_LQSTATE_WAIT_CONVERT: | |
5615 | + | |
b7b72b66 AM |
5616 | + DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid, |
5617 | + print_lkb(lkb); | |
5618 | + print_rsb(rsb);); | |
5619 | + | |
5620 | + log_debug(ls, "(%d) send cv %x to %u", | |
5621 | + lkb->lkb_ownpid, lkb->lkb_id, target_nodeid); | |
5622 | + | |
c1c6733f AM |
5623 | + req->rr_header.rh_cmd = GDLM_REMCMD_CONVREQUEST; |
5624 | + if (lkb->lkb_range) { | |
5625 | + req->rr_flags |= GDLM_LKFLG_RANGE; | |
5626 | + req->rr_range_start = lkb->lkb_range[RQ_RANGE_START]; | |
5627 | + req->rr_range_end = lkb->lkb_range[RQ_RANGE_END]; | |
5628 | + } | |
5629 | + break; | |
5630 | + | |
5631 | + case GDLM_LQSTATE_WAIT_CONDGRANT: | |
5632 | + | |
b7b72b66 AM |
5633 | + DLM_ASSERT(lkb->lkb_nodeid == rsb->res_nodeid, |
5634 | + print_lkb(lkb); | |
5635 | + print_rsb(rsb);); | |
5636 | + | |
5637 | + log_debug(ls, "(%d) send rq %x to %u", | |
5638 | + lkb->lkb_ownpid, lkb->lkb_id, target_nodeid); | |
5639 | + | |
c1c6733f | 5640 | + req->rr_header.rh_cmd = GDLM_REMCMD_LOCKREQUEST; |
c1c6733f AM |
5641 | + memcpy(req->rr_name, rsb->res_name, rsb->res_length); |
5642 | + if (lkb->lkb_range) { | |
5643 | + req->rr_flags |= GDLM_LKFLG_RANGE; | |
5644 | + req->rr_range_start = lkb->lkb_range[RQ_RANGE_START]; | |
5645 | + req->rr_range_end = lkb->lkb_range[RQ_RANGE_END]; | |
5646 | + } | |
5647 | + break; | |
5648 | + | |
5649 | + case GDLM_LQSTATE_WAIT_UNLOCK: | |
5650 | + | |
b7b72b66 AM |
5651 | + log_debug(ls, "(%d) send un %x to %u", |
5652 | + lkb->lkb_ownpid, lkb->lkb_id, target_nodeid); | |
5653 | + | |
c1c6733f AM |
5654 | + req->rr_header.rh_cmd = GDLM_REMCMD_UNLOCKREQUEST; |
5655 | + break; | |
5656 | + | |
5657 | + default: | |
b7b72b66 | 5658 | + DLM_ASSERT(0, printk("Unknown cluster request\n");); |
c1c6733f AM |
5659 | + } |
5660 | + | |
5661 | + add_request_lvb(lkb, req); | |
5662 | + midcomms_send_buffer(&req->rr_header, e); | |
5663 | + | |
5664 | + return 0; | |
5665 | +} | |
5666 | + | |
5667 | +/* | |
5668 | + * We got a request from another cluster node, process it and return an info | |
5669 | + * structure with the lock state/LVB etc as required. Executes in the DLM's | |
5670 | + * recvd thread. | |
5671 | + */ | |
5672 | + | |
b7b72b66 | 5673 | +int process_cluster_request(int nodeid, struct dlm_header *req, int recovery) |
c1c6733f | 5674 | +{ |
b7b72b66 AM |
5675 | + struct dlm_ls *lspace; |
5676 | + struct dlm_lkb *lkb = NULL; | |
5677 | + struct dlm_rsb *rsb; | |
c1c6733f | 5678 | + int send_reply = 0, status = 0, namelen; |
b7b72b66 AM |
5679 | + struct dlm_request *freq = (struct dlm_request *) req; |
5680 | + struct dlm_reply *rp = (struct dlm_reply *) req; | |
5681 | + struct dlm_reply reply; | |
c1c6733f AM |
5682 | + |
5683 | + lspace = find_lockspace_by_global_id(req->rh_lockspace); | |
5684 | + | |
5685 | + if (!lspace) { | |
5686 | + log_print("process_cluster_request invalid lockspace %x " | |
5687 | + "from %d req %u", req->rh_lockspace, nodeid, | |
5688 | + req->rh_cmd); | |
b7b72b66 | 5689 | + return -EINVAL; |
c1c6733f AM |
5690 | + } |
5691 | + | |
5692 | + /* wait for recoverd to drain requestqueue */ | |
5693 | + if (!recovery) | |
5694 | + wait_requestqueue(lspace); | |
5695 | + | |
5696 | + /* | |
5697 | + * If we're in recovery then queue the request for later. Otherwise, | |
5698 | + * we still need to get the "in_recovery" lock to make sure the | |
5699 | + * recovery itself doesn't start until we are done. | |
5700 | + */ | |
5701 | + retry: | |
5702 | + if (!test_bit(LSFL_LS_RUN, &lspace->ls_flags)) { | |
b7b72b66 AM |
5703 | + if (!recovery) |
5704 | + add_to_requestqueue(lspace, nodeid, req); | |
c1c6733f AM |
5705 | + status = -EINTR; |
5706 | + goto out; | |
5707 | + } | |
5708 | + if (!down_read_trylock(&lspace->ls_in_recovery)) { | |
5709 | + schedule(); | |
5710 | + goto retry; | |
5711 | + } | |
5712 | + | |
5713 | + | |
5714 | + /* | |
5715 | + * Process the request. | |
5716 | + */ | |
5717 | + | |
5718 | + switch (req->rh_cmd) { | |
5719 | + | |
5720 | + case GDLM_REMCMD_LOOKUP: | |
5721 | + { | |
b7b72b66 | 5722 | + uint32_t dir_nodeid, r_nodeid; |
c1c6733f | 5723 | + int status; |
c1c6733f AM |
5724 | + |
5725 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
5726 | + | |
5727 | + dir_nodeid = name_to_directory_nodeid(lspace, | |
5728 | + freq->rr_name, | |
5729 | + namelen); | |
5730 | + if (dir_nodeid != our_nodeid()) | |
5731 | + log_debug(lspace, "ignoring directory lookup"); | |
5732 | + | |
b7b72b66 AM |
5733 | + status = dlm_dir_lookup(lspace, nodeid, freq->rr_name, |
5734 | + namelen, &r_nodeid); | |
c1c6733f AM |
5735 | + reply.rl_status = status; |
5736 | + reply.rl_lockstate = 0; | |
b7b72b66 | 5737 | + reply.rl_nodeid = r_nodeid; |
c1c6733f AM |
5738 | + } |
5739 | + send_reply = 1; | |
5740 | + break; | |
5741 | + | |
5742 | + case GDLM_REMCMD_REM_RESDATA: | |
5743 | + | |
5744 | + namelen = freq->rr_header.rh_length - sizeof(*freq) + 1; | |
b7b72b66 | 5745 | + dlm_dir_remove(lspace, nodeid, freq->rr_name, namelen); |
c1c6733f AM |
5746 | + break; |
5747 | + | |
5748 | + case GDLM_REMCMD_LOCKREQUEST: | |
5749 | + | |
5750 | + lkb = remote_stage2(nodeid, lspace, freq); | |
5751 | + if (lkb) { | |
5752 | + lkb->lkb_request = freq; | |
b7b72b66 AM |
5753 | + lkb->lkb_ownpid = freq->rr_pid; |
5754 | + if (lkb->lkb_retstatus != -EINVAL) | |
5755 | + dlm_lock_stage3(lkb); | |
c1c6733f AM |
5756 | + |
5757 | + /* | |
5758 | + * If the request was granted in lock_stage3, then a | |
5759 | + * reply message was already sent in combination with | |
5760 | + * the grant message and lkb_request is NULL. | |
5761 | + */ | |
5762 | + | |
5763 | + if (lkb->lkb_request) { | |
5764 | + lkb->lkb_request = NULL; | |
5765 | + send_reply = 1; | |
5766 | + reply.rl_status = lkb->lkb_retstatus; | |
5767 | + reply.rl_lockstate = lkb->lkb_status; | |
5768 | + reply.rl_lkid = lkb->lkb_id; | |
5769 | + | |
5770 | + /* | |
5771 | + * If the request could not be granted and the | |
5772 | + * user won't wait, then free up the LKB | |
5773 | + */ | |
5774 | + | |
b7b72b66 | 5775 | + if (lkb->lkb_retstatus == -EAGAIN) { |
c1c6733f AM |
5776 | + rsb = lkb->lkb_resource; |
5777 | + release_lkb(lspace, lkb); | |
5778 | + release_rsb(rsb); | |
5779 | + lkb = NULL; | |
5780 | + } | |
b7b72b66 AM |
5781 | + else if (lkb->lkb_retstatus == -EINVAL) { |
5782 | + release_lkb(lspace, lkb); | |
5783 | + lkb = NULL; | |
5784 | + } | |
c1c6733f AM |
5785 | + } |
5786 | + } else { | |
5787 | + reply.rl_status = -ENOMEM; | |
5788 | + send_reply = 1; | |
5789 | + } | |
5790 | + break; | |
5791 | + | |
5792 | + case GDLM_REMCMD_CONVREQUEST: | |
5793 | + | |
5794 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5795 | + | |
c783755a | 5796 | + |
b7b72b66 AM |
5797 | + DLM_ASSERT(lkb, |
5798 | + print_request(freq); | |
5799 | + printk("nodeid %u\n", nodeid);); | |
5800 | + | |
5801 | + rsb = lkb->lkb_resource; | |
c1c6733f | 5802 | + |
b7b72b66 AM |
5803 | + DLM_ASSERT(rsb, |
5804 | + print_lkb(lkb); | |
5805 | + print_request(freq); | |
5806 | + printk("nodeid %u\n", nodeid);); | |
5807 | + | |
5808 | + DLM_ASSERT(!rsb->res_nodeid, | |
5809 | + print_lkb(lkb); | |
5810 | + print_rsb(rsb); | |
5811 | + print_request(freq); | |
5812 | + printk("nodeid %u\n", nodeid);); | |
5813 | + | |
5814 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY, | |
5815 | + print_lkb(lkb); | |
5816 | + print_rsb(rsb); | |
5817 | + print_request(freq); | |
5818 | + printk("nodeid %u\n", nodeid);); | |
5819 | + | |
5820 | + DLM_ASSERT(lkb->lkb_status == GDLM_LKSTS_GRANTED, | |
5821 | + print_lkb(lkb); | |
5822 | + print_rsb(rsb); | |
5823 | + print_request(freq); | |
5824 | + printk("nodeid %u\n", nodeid);); | |
c1c6733f | 5825 | + |
c783755a AM |
5826 | + /* Update orphan lock status */ |
5827 | + if (freq->rr_flags & DLM_LKF_ORPHAN) { | |
5828 | + lkb->lkb_flags |= GDLM_LKFLG_ORPHAN; | |
5829 | + } | |
5830 | + | |
c1c6733f AM |
5831 | + lkb->lkb_rqmode = freq->rr_rqmode; |
5832 | + lkb->lkb_lockqueue_flags = freq->rr_flags; | |
5833 | + lkb->lkb_request = freq; | |
5834 | + lkb->lkb_flags &= ~GDLM_LKFLG_DEMOTED; | |
5835 | + | |
b7b72b66 AM |
5836 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK || |
5837 | + freq->rr_flags & DLM_LKF_VALBLK) { | |
c1c6733f AM |
5838 | + lkb->lkb_flags |= GDLM_LKFLG_VALBLK; |
5839 | + allocate_and_copy_lvb(lspace, &lkb->lkb_lvbptr, | |
5840 | + freq->rr_lvb); | |
5841 | + } | |
5842 | + | |
5843 | + if (freq->rr_flags & GDLM_LKFLG_RANGE) { | |
5844 | + if (lkb_set_range(lspace, lkb, freq->rr_range_start, | |
5845 | + freq->rr_range_end)) { | |
5846 | + reply.rl_status = -ENOMEM; | |
5847 | + send_reply = 1; | |
5848 | + goto out; | |
5849 | + } | |
5850 | + } | |
5851 | + | |
b7b72b66 AM |
5852 | + log_debug(lspace, "(%d) cv %u from %u %x \"%s\"", |
5853 | + lkb->lkb_ownpid, lkb->lkb_rqmode, nodeid, | |
5854 | + lkb->lkb_id, rsb->res_name); | |
5855 | + | |
c1c6733f AM |
5856 | + dlm_convert_stage2(lkb, FALSE); |
5857 | + | |
5858 | + /* | |
5859 | + * If the conv request was granted in stage2, then a reply | |
5860 | + * message was already sent in combination with the grant | |
5861 | + * message. | |
5862 | + */ | |
5863 | + | |
5864 | + if (lkb->lkb_request) { | |
5865 | + lkb->lkb_request = NULL; | |
5866 | + send_reply = 1; | |
5867 | + reply.rl_status = lkb->lkb_retstatus; | |
5868 | + reply.rl_lockstate = lkb->lkb_status; | |
5869 | + reply.rl_lkid = lkb->lkb_id; | |
5870 | + } | |
5871 | + break; | |
5872 | + | |
5873 | + case GDLM_REMCMD_LOCKREPLY: | |
5874 | + | |
b7b72b66 | 5875 | + lkb = find_lock_by_id(lspace, req->rh_lkid); |
c1c6733f | 5876 | + |
b7b72b66 AM |
5877 | + DLM_ASSERT(lkb, |
5878 | + print_reply(rp); | |
5879 | + printk("nodeid %u\n", nodeid);); | |
c1c6733f | 5880 | + |
b7b72b66 AM |
5881 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY), |
5882 | + print_lkb(lkb); | |
5883 | + print_reply(rp); | |
5884 | + printk("nodeid %u\n", nodeid);); | |
5885 | + | |
5886 | + process_lockqueue_reply(lkb, rp, nodeid); | |
c1c6733f AM |
5887 | + break; |
5888 | + | |
5889 | + case GDLM_REMCMD_LOCKGRANT: | |
5890 | + | |
5891 | + /* | |
5892 | + * Remote lock has been granted asynchronously. Do a compact | |
5893 | + * version of what grant_lock() does. | |
5894 | + */ | |
5895 | + | |
5896 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5897 | + | |
b7b72b66 AM |
5898 | + DLM_ASSERT(lkb, |
5899 | + print_request(freq); | |
5900 | + printk("nodeid %u\n", nodeid);); | |
c1c6733f AM |
5901 | + |
5902 | + rsb = lkb->lkb_resource; | |
5903 | + | |
b7b72b66 AM |
5904 | + DLM_ASSERT(rsb, |
5905 | + print_lkb(lkb); | |
5906 | + print_request(freq); | |
5907 | + printk("nodeid %u\n", nodeid);); | |
5908 | + | |
5909 | + DLM_ASSERT(rsb->res_nodeid, | |
5910 | + print_lkb(lkb); | |
5911 | + print_rsb(rsb); | |
5912 | + print_request(freq); | |
5913 | + printk("nodeid %u\n", nodeid);); | |
5914 | + | |
5915 | + DLM_ASSERT(!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY), | |
5916 | + print_lkb(lkb); | |
5917 | + print_rsb(rsb); | |
5918 | + print_request(freq); | |
5919 | + printk("nodeid %u\n", nodeid);); | |
5920 | + | |
5921 | + if (lkb->lkb_lockqueue_state) { | |
5922 | + log_debug(rsb->res_ls, "grant lock on lockqueue %d", | |
5923 | + lkb->lkb_lockqueue_state); | |
5924 | + | |
5925 | + /* Don't grant locks that are waiting for an unlock */ | |
5926 | + if (lkb->lkb_lockqueue_state == GDLM_LQSTATE_WAIT_UNLOCK) | |
5927 | + return 0; | |
5928 | + | |
5929 | + print_lkb(lkb); | |
5930 | + print_request(freq); | |
b7b72b66 AM |
5931 | + remove_from_lockqueue(lkb); |
5932 | + if (!lkb->lkb_remid) | |
5933 | + lkb->lkb_remid = req->rh_lkid; | |
5934 | + } | |
c1c6733f AM |
5935 | + |
5936 | + down_write(&rsb->res_lock); | |
5937 | + | |
5938 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
b7b72b66 | 5939 | + allocate_and_copy_lvb(lspace, &lkb->lkb_lvbptr, freq->rr_lvb); |
c1c6733f AM |
5940 | + |
5941 | + lkb->lkb_grmode = lkb->lkb_rqmode; | |
5942 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
5943 | + | |
5944 | + if (lkb->lkb_range) { | |
5945 | + lkb->lkb_range[GR_RANGE_START] = | |
5946 | + lkb->lkb_range[RQ_RANGE_START]; | |
5947 | + lkb->lkb_range[GR_RANGE_END] = | |
5948 | + lkb->lkb_range[RQ_RANGE_END]; | |
5949 | + } | |
5950 | + | |
5951 | + lkb_swqueue(rsb, lkb, GDLM_LKSTS_GRANTED); | |
5952 | + up_write(&rsb->res_lock); | |
5953 | + | |
5954 | + if (freq->rr_flags & GDLM_LKFLG_DEMOTED) | |
5955 | + lkb->lkb_flags |= GDLM_LKFLG_DEMOTED; | |
5956 | + | |
5957 | + lkb->lkb_retstatus = 0; | |
b7b72b66 | 5958 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
5959 | + break; |
5960 | + | |
5961 | + case GDLM_REMCMD_SENDBAST: | |
5962 | + | |
5963 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5964 | + | |
b7b72b66 AM |
5965 | + DLM_ASSERT(lkb, |
5966 | + print_request(freq); | |
5967 | + printk("nodeid %u\n", nodeid);); | |
c1c6733f AM |
5968 | + |
5969 | + if (lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
b7b72b66 | 5970 | + queue_ast(lkb, AST_BAST, freq->rr_rqmode); |
c1c6733f AM |
5971 | + break; |
5972 | + | |
5973 | + case GDLM_REMCMD_SENDCAST: | |
5974 | + | |
5975 | + /* This is only used for some error completion ASTs */ | |
5976 | + | |
5977 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5978 | + | |
b7b72b66 AM |
5979 | + DLM_ASSERT(lkb, |
5980 | + print_request(freq); | |
5981 | + printk("nodeid %u\n", nodeid);); | |
c1c6733f AM |
5982 | + |
5983 | + /* Return the lock to granted status */ | |
5984 | + res_lkb_swqueue(lkb->lkb_resource, lkb, GDLM_LKSTS_GRANTED); | |
c1c6733f | 5985 | + lkb->lkb_retstatus = freq->rr_status; |
b7b72b66 | 5986 | + queue_ast(lkb, AST_COMP, 0); |
c1c6733f AM |
5987 | + break; |
5988 | + | |
5989 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
5990 | + | |
5991 | + lkb = find_lock_by_id(lspace, freq->rr_remlkid); | |
5992 | + | |
b7b72b66 AM |
5993 | + DLM_ASSERT(lkb, |
5994 | + print_request(freq); | |
5995 | + printk("nodeid %u\n", nodeid);); | |
5996 | + | |
5997 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY, | |
5998 | + print_lkb(lkb); | |
5999 | + print_request(freq); | |
6000 | + printk("nodeid %u\n", nodeid);); | |
6001 | + | |
6002 | + DLM_ASSERT(lkb->lkb_nodeid == nodeid, | |
6003 | + print_lkb(lkb); | |
6004 | + print_request(freq); | |
6005 | + printk("nodeid %u\n", nodeid);); | |
6006 | + | |
6007 | + rsb = find_rsb_to_unlock(lspace, lkb); | |
c1c6733f | 6008 | + |
b7b72b66 AM |
6009 | + log_debug(lspace, "(%d) un from %u %x \"%s\"", lkb->lkb_ownpid, |
6010 | + nodeid, lkb->lkb_id, rsb->res_name); | |
6011 | + | |
6012 | + reply.rl_status = dlm_unlock_stage2(lkb, rsb, freq->rr_flags); | |
c1c6733f AM |
6013 | + send_reply = 1; |
6014 | + break; | |
6015 | + | |
6016 | + case GDLM_REMCMD_QUERY: | |
6017 | + remote_query(nodeid, lspace, req); | |
6018 | + break; | |
6019 | + | |
6020 | + case GDLM_REMCMD_QUERYREPLY: | |
6021 | + remote_query_reply(nodeid, lspace, req); | |
6022 | + break; | |
6023 | + | |
6024 | + default: | |
6025 | + log_error(lspace, "process_cluster_request cmd %d",req->rh_cmd); | |
6026 | + } | |
6027 | + | |
6028 | + up_read(&lspace->ls_in_recovery); | |
6029 | + | |
6030 | + out: | |
6031 | + if (send_reply) { | |
6032 | + reply.rl_header.rh_cmd = GDLM_REMCMD_LOCKREPLY; | |
6033 | + reply.rl_header.rh_flags = 0; | |
6034 | + reply.rl_header.rh_length = sizeof(reply); | |
6035 | + reply.rl_header.rh_lkid = freq->rr_header.rh_lkid; | |
6036 | + reply.rl_header.rh_lockspace = freq->rr_header.rh_lockspace; | |
6037 | + | |
6038 | + status = midcomms_send_message(nodeid, &reply.rl_header, | |
6039 | + GFP_KERNEL); | |
6040 | + } | |
6041 | + | |
6042 | + wake_astd(); | |
b7b72b66 | 6043 | + put_lockspace(lspace); |
c1c6733f AM |
6044 | + return status; |
6045 | +} | |
6046 | + | |
b7b72b66 | 6047 | +static void add_reply_lvb(struct dlm_lkb *lkb, struct dlm_reply *reply) |
c1c6733f AM |
6048 | +{ |
6049 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
6050 | + memcpy(reply->rl_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
6051 | +} | |
6052 | + | |
b7b72b66 | 6053 | +static void add_request_lvb(struct dlm_lkb *lkb, struct dlm_request *req) |
c1c6733f AM |
6054 | +{ |
6055 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
6056 | + memcpy(req->rr_lvb, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
6057 | +} | |
6058 | diff -urN linux-orig/cluster/dlm/lockqueue.h linux-patched/cluster/dlm/lockqueue.h | |
6059 | --- linux-orig/cluster/dlm/lockqueue.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 6060 | +++ linux-patched/cluster/dlm/lockqueue.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
6061 | @@ -0,0 +1,29 @@ |
6062 | +/****************************************************************************** | |
6063 | +******************************************************************************* | |
6064 | +** | |
6065 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6066 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6067 | +** | |
6068 | +** This copyrighted material is made available to anyone wishing to use, | |
6069 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6070 | +** of the GNU General Public License v.2. | |
6071 | +** | |
6072 | +******************************************************************************* | |
6073 | +******************************************************************************/ | |
6074 | + | |
6075 | +#ifndef __LOCKQUEUE_DOT_H__ | |
6076 | +#define __LOCKQUEUE_DOT_H__ | |
6077 | + | |
b7b72b66 AM |
6078 | +void remote_grant(struct dlm_lkb * lkb); |
6079 | +void reply_and_grant(struct dlm_lkb * lkb); | |
6080 | +int remote_stage(struct dlm_lkb * lkb, int state); | |
6081 | +int process_cluster_request(int csid, struct dlm_header *req, int recovery); | |
6082 | +int send_cluster_request(struct dlm_lkb * lkb, int state); | |
6083 | +void purge_requestqueue(struct dlm_ls * ls); | |
6084 | +int process_requestqueue(struct dlm_ls * ls); | |
6085 | +int reply_in_requestqueue(struct dlm_ls * ls, int lkid); | |
6086 | +void remote_remove_direntry(struct dlm_ls * ls, int nodeid, char *name, | |
6087 | + int namelen); | |
6088 | +void allocate_and_copy_lvb(struct dlm_ls * ls, char **lvbptr, char *src); | |
c1c6733f AM |
6089 | + |
6090 | +#endif /* __LOCKQUEUE_DOT_H__ */ | |
6091 | diff -urN linux-orig/cluster/dlm/lockspace.c linux-patched/cluster/dlm/lockspace.c | |
6092 | --- linux-orig/cluster/dlm/lockspace.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
6093 | +++ linux-patched/cluster/dlm/lockspace.c 2004-11-03 11:31:56.000000000 +0800 |
6094 | @@ -0,0 +1,715 @@ | |
c1c6733f AM |
6095 | +/****************************************************************************** |
6096 | +******************************************************************************* | |
6097 | +** | |
6098 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6099 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6100 | +** | |
6101 | +** This copyrighted material is made available to anyone wishing to use, | |
6102 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6103 | +** of the GNU General Public License v.2. | |
6104 | +** | |
6105 | +******************************************************************************* | |
6106 | +******************************************************************************/ | |
6107 | + | |
6108 | +#include <linux/module.h> | |
6109 | + | |
6110 | +#include "dlm_internal.h" | |
6111 | +#include "recoverd.h" | |
6112 | +#include "ast.h" | |
6113 | +#include "lkb.h" | |
6114 | +#include "nodes.h" | |
6115 | +#include "dir.h" | |
6116 | +#include "lowcomms.h" | |
6117 | +#include "config.h" | |
6118 | +#include "memory.h" | |
6119 | +#include "lockspace.h" | |
6120 | +#include "device.h" | |
6121 | + | |
6122 | +#define GDST_NONE (0) | |
6123 | +#define GDST_RUNNING (1) | |
6124 | + | |
b7b72b66 AM |
6125 | +static int dlmstate; |
6126 | +static int dlmcount; | |
6127 | +static struct semaphore dlmstate_lock; | |
c1c6733f AM |
6128 | +struct list_head lslist; |
6129 | +spinlock_t lslist_lock; | |
6130 | +struct kcl_service_ops ls_ops; | |
6131 | + | |
6132 | +static int new_lockspace(char *name, int namelen, void **lockspace, int flags); | |
6133 | + | |
6134 | + | |
6135 | +void dlm_lockspace_init(void) | |
6136 | +{ | |
b7b72b66 AM |
6137 | + dlmstate = GDST_NONE; |
6138 | + dlmcount = 0; | |
6139 | + init_MUTEX(&dlmstate_lock); | |
c1c6733f AM |
6140 | + INIT_LIST_HEAD(&lslist); |
6141 | + spin_lock_init(&lslist_lock); | |
6142 | +} | |
6143 | + | |
b7b72b66 | 6144 | +struct dlm_ls *find_lockspace_by_name(char *name, int namelen) |
c1c6733f | 6145 | +{ |
b7b72b66 | 6146 | + struct dlm_ls *ls; |
c1c6733f AM |
6147 | + |
6148 | + spin_lock(&lslist_lock); | |
6149 | + | |
6150 | + list_for_each_entry(ls, &lslist, ls_list) { | |
b7b72b66 AM |
6151 | + if (ls->ls_namelen == namelen && |
6152 | + memcmp(ls->ls_name, name, namelen) == 0) | |
c1c6733f AM |
6153 | + goto out; |
6154 | + } | |
6155 | + ls = NULL; | |
6156 | + out: | |
6157 | + spin_unlock(&lslist_lock); | |
6158 | + return ls; | |
6159 | +} | |
6160 | + | |
b7b72b66 | 6161 | +struct dlm_ls *find_lockspace_by_global_id(uint32_t id) |
c1c6733f | 6162 | +{ |
b7b72b66 | 6163 | + struct dlm_ls *ls; |
c1c6733f AM |
6164 | + |
6165 | + spin_lock(&lslist_lock); | |
6166 | + | |
6167 | + list_for_each_entry(ls, &lslist, ls_list) { | |
b7b72b66 AM |
6168 | + if (ls->ls_global_id == id) { |
6169 | + ls->ls_count++; | |
c1c6733f | 6170 | + goto out; |
b7b72b66 | 6171 | + } |
c1c6733f AM |
6172 | + } |
6173 | + ls = NULL; | |
6174 | + out: | |
6175 | + spin_unlock(&lslist_lock); | |
6176 | + return ls; | |
6177 | +} | |
6178 | + | |
b7b72b66 | 6179 | +struct dlm_ls *find_lockspace_by_local_id(void *id) |
c1c6733f | 6180 | +{ |
b7b72b66 | 6181 | + struct dlm_ls *ls; |
c1c6733f AM |
6182 | + |
6183 | + spin_lock(&lslist_lock); | |
6184 | + | |
6185 | + list_for_each_entry(ls, &lslist, ls_list) { | |
b7b72b66 AM |
6186 | + if (ls->ls_local_id == (uint32_t)(long)id) { |
6187 | + ls->ls_count++; | |
c1c6733f | 6188 | + goto out; |
b7b72b66 | 6189 | + } |
c1c6733f AM |
6190 | + } |
6191 | + ls = NULL; | |
6192 | + out: | |
6193 | + spin_unlock(&lslist_lock); | |
6194 | + return ls; | |
6195 | +} | |
6196 | + | |
b7b72b66 AM |
6197 | +/* must be called with lslist_lock held */ |
6198 | +void hold_lockspace(struct dlm_ls *ls) | |
6199 | +{ | |
6200 | + ls->ls_count++; | |
6201 | +} | |
6202 | + | |
6203 | +void put_lockspace(struct dlm_ls *ls) | |
6204 | +{ | |
6205 | + spin_lock(&lslist_lock); | |
6206 | + ls->ls_count--; | |
6207 | + spin_unlock(&lslist_lock); | |
6208 | +} | |
6209 | + | |
6210 | +static void remove_lockspace(struct dlm_ls *ls) | |
6211 | +{ | |
6212 | + for (;;) { | |
6213 | + spin_lock(&lslist_lock); | |
6214 | + if (ls->ls_count == 0) { | |
6215 | + list_del(&ls->ls_list); | |
6216 | + spin_unlock(&lslist_lock); | |
6217 | + return; | |
6218 | + } | |
6219 | + spin_unlock(&lslist_lock); | |
6220 | + set_current_state(TASK_INTERRUPTIBLE); | |
6221 | + schedule_timeout(HZ); | |
6222 | + } | |
6223 | +} | |
6224 | + | |
c1c6733f AM |
6225 | +/* |
6226 | + * Called from dlm_init. These are the general threads which are not | |
b7b72b66 | 6227 | + * lockspace-specific and work for all dlm lockspaces. |
c1c6733f AM |
6228 | + */ |
6229 | + | |
6230 | +static int threads_start(void) | |
6231 | +{ | |
6232 | + int error; | |
6233 | + | |
c1c6733f AM |
6234 | + /* Thread which process lock requests for all ls's */ |
6235 | + error = astd_start(); | |
6236 | + if (error) { | |
6237 | + log_print("cannot start ast thread %d", error); | |
b7b72b66 | 6238 | + goto fail; |
c1c6733f AM |
6239 | + } |
6240 | + | |
6241 | + /* Thread for sending/receiving messages for all ls's */ | |
6242 | + error = lowcomms_start(); | |
6243 | + if (error) { | |
6244 | + log_print("cannot start lowcomms %d", error); | |
6245 | + goto astd_fail; | |
6246 | + } | |
6247 | + | |
6248 | + return 0; | |
6249 | + | |
6250 | + astd_fail: | |
6251 | + astd_stop(); | |
6252 | + | |
c1c6733f AM |
6253 | + fail: |
6254 | + return error; | |
6255 | +} | |
6256 | + | |
6257 | +static void threads_stop(void) | |
6258 | +{ | |
6259 | + lowcomms_stop(); | |
6260 | + astd_stop(); | |
c1c6733f AM |
6261 | +} |
6262 | + | |
6263 | +static int init_internal(void) | |
6264 | +{ | |
6265 | + int error = 0; | |
6266 | + | |
b7b72b66 AM |
6267 | + if (dlmstate == GDST_RUNNING) |
6268 | + dlmcount++; | |
c1c6733f AM |
6269 | + else { |
6270 | + error = threads_start(); | |
6271 | + if (error) | |
6272 | + goto out; | |
6273 | + | |
b7b72b66 AM |
6274 | + dlmstate = GDST_RUNNING; |
6275 | + dlmcount = 1; | |
c1c6733f AM |
6276 | + } |
6277 | + | |
6278 | + out: | |
6279 | + return error; | |
6280 | +} | |
6281 | + | |
c1c6733f | 6282 | +/* |
b7b72b66 | 6283 | + * Called after dlm module is loaded and before any lockspaces are created. |
c1c6733f AM |
6284 | + * Starts and initializes global threads and structures. These global entities |
6285 | + * are shared by and independent of all lockspaces. | |
6286 | + * | |
b7b72b66 | 6287 | + * There should be a dlm-specific user command which a person can run which |
c1c6733f AM |
6288 | + * calls this function. If a user hasn't run that command and something |
6289 | + * creates a new lockspace, this is called first. | |
6290 | + * | |
6291 | + * This also starts the default lockspace. | |
6292 | + */ | |
6293 | + | |
6294 | +int dlm_init(void) | |
6295 | +{ | |
6296 | + int error; | |
6297 | + | |
b7b72b66 | 6298 | + down(&dlmstate_lock); |
c1c6733f | 6299 | + error = init_internal(); |
b7b72b66 | 6300 | + up(&dlmstate_lock); |
c1c6733f AM |
6301 | + |
6302 | + return error; | |
6303 | +} | |
6304 | + | |
6305 | +int dlm_release(void) | |
6306 | +{ | |
6307 | + int error = 0; | |
6308 | + | |
b7b72b66 | 6309 | + down(&dlmstate_lock); |
c1c6733f | 6310 | + |
b7b72b66 | 6311 | + if (dlmstate == GDST_NONE) |
c1c6733f AM |
6312 | + goto out; |
6313 | + | |
b7b72b66 AM |
6314 | + if (dlmcount) |
6315 | + dlmcount--; | |
c1c6733f | 6316 | + |
b7b72b66 | 6317 | + if (dlmcount) |
c1c6733f AM |
6318 | + goto out; |
6319 | + | |
6320 | + spin_lock(&lslist_lock); | |
6321 | + if (!list_empty(&lslist)) { | |
6322 | + spin_unlock(&lslist_lock); | |
6323 | + log_print("cannot stop threads, lockspaces still exist"); | |
6324 | + goto out; | |
6325 | + } | |
6326 | + spin_unlock(&lslist_lock); | |
6327 | + | |
6328 | + threads_stop(); | |
b7b72b66 | 6329 | + dlmstate = GDST_NONE; |
c1c6733f AM |
6330 | + |
6331 | + out: | |
b7b72b66 | 6332 | + up(&dlmstate_lock); |
c1c6733f AM |
6333 | + |
6334 | + return error; | |
6335 | +} | |
6336 | + | |
b7b72b66 | 6337 | +struct dlm_ls *allocate_ls(int namelen) |
c1c6733f | 6338 | +{ |
b7b72b66 | 6339 | + struct dlm_ls *ls; |
c1c6733f | 6340 | + |
b7b72b66 | 6341 | + ls = kmalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
c1c6733f | 6342 | + if (ls) |
b7b72b66 | 6343 | + memset(ls, 0, sizeof(struct dlm_ls) + namelen); |
c1c6733f AM |
6344 | + |
6345 | + return ls; | |
6346 | +} | |
6347 | + | |
c1c6733f AM |
6348 | +static int new_lockspace(char *name, int namelen, void **lockspace, int flags) |
6349 | +{ | |
b7b72b66 AM |
6350 | + struct dlm_ls *ls; |
6351 | + int i, size, error = -ENOMEM; | |
c1c6733f AM |
6352 | + uint32_t local_id = 0; |
6353 | + | |
6354 | + if (!try_module_get(THIS_MODULE)) | |
6355 | + return -EINVAL; | |
6356 | + | |
6357 | + if (namelen > MAX_SERVICE_NAME_LEN) | |
6358 | + return -EINVAL; | |
6359 | + | |
b7b72b66 AM |
6360 | + ls = find_lockspace_by_name(name, namelen); |
6361 | + if (ls) { | |
6362 | + *lockspace = (void *)(long) ls->ls_local_id; | |
c1c6733f AM |
6363 | + return -EEXIST; |
6364 | + } | |
6365 | + | |
6366 | + /* | |
6367 | + * Initialize ls fields | |
6368 | + */ | |
6369 | + | |
6370 | + ls = allocate_ls(namelen); | |
6371 | + if (!ls) | |
6372 | + goto out; | |
6373 | + | |
6374 | + memcpy(ls->ls_name, name, namelen); | |
6375 | + ls->ls_namelen = namelen; | |
6376 | + | |
6377 | + ls->ls_allocation = GFP_KERNEL; | |
b7b72b66 AM |
6378 | + ls->ls_count = 0; |
6379 | + ls->ls_flags = 0; | |
6380 | + | |
6381 | + size = dlm_config.rsbtbl_size; | |
6382 | + ls->ls_rsbtbl_size = size; | |
c1c6733f | 6383 | + |
b7b72b66 AM |
6384 | + ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL); |
6385 | + if (!ls->ls_rsbtbl) | |
c1c6733f | 6386 | + goto out_lsfree; |
b7b72b66 AM |
6387 | + for (i = 0; i < size; i++) { |
6388 | + INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list); | |
6389 | + rwlock_init(&ls->ls_rsbtbl[i].lock); | |
6390 | + } | |
6391 | + | |
6392 | + size = dlm_config.lkbtbl_size; | |
6393 | + ls->ls_lkbtbl_size = size; | |
c1c6733f | 6394 | + |
b7b72b66 AM |
6395 | + ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL); |
6396 | + if (!ls->ls_lkbtbl) | |
6397 | + goto out_rsbfree; | |
6398 | + for (i = 0; i < size; i++) { | |
6399 | + INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list); | |
6400 | + rwlock_init(&ls->ls_lkbtbl[i].lock); | |
6401 | + ls->ls_lkbtbl[i].counter = 1; | |
6402 | + } | |
c1c6733f | 6403 | + |
b7b72b66 AM |
6404 | + size = dlm_config.dirtbl_size; |
6405 | + ls->ls_dirtbl_size = size; | |
c1c6733f | 6406 | + |
b7b72b66 AM |
6407 | + ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL); |
6408 | + if (!ls->ls_dirtbl) | |
6409 | + goto out_lkbfree; | |
6410 | + for (i = 0; i < size; i++) { | |
6411 | + INIT_LIST_HEAD(&ls->ls_dirtbl[i].list); | |
6412 | + rwlock_init(&ls->ls_dirtbl[i].lock); | |
6413 | + } | |
c1c6733f AM |
6414 | + |
6415 | + INIT_LIST_HEAD(&ls->ls_nodes); | |
c1c6733f | 6416 | + INIT_LIST_HEAD(&ls->ls_nodes_gone); |
b7b72b66 | 6417 | + ls->ls_num_nodes = 0; |
bb1d8b11 | 6418 | + ls->ls_node_array = NULL; |
c783755a AM |
6419 | + ls->ls_recoverd_task = NULL; |
6420 | + init_MUTEX(&ls->ls_recoverd_lock); | |
c1c6733f AM |
6421 | + INIT_LIST_HEAD(&ls->ls_recover); |
6422 | + spin_lock_init(&ls->ls_recover_lock); | |
6423 | + INIT_LIST_HEAD(&ls->ls_recover_list); | |
6424 | + ls->ls_recover_list_count = 0; | |
6425 | + spin_lock_init(&ls->ls_recover_list_lock); | |
6426 | + init_waitqueue_head(&ls->ls_wait_general); | |
b7b72b66 | 6427 | + INIT_LIST_HEAD(&ls->ls_rootres); |
c1c6733f AM |
6428 | + INIT_LIST_HEAD(&ls->ls_requestqueue); |
6429 | + INIT_LIST_HEAD(&ls->ls_rebuild_rootrsb_list); | |
6430 | + ls->ls_last_stop = 0; | |
6431 | + ls->ls_last_start = 0; | |
6432 | + ls->ls_last_finish = 0; | |
6433 | + ls->ls_rcom_msgid = 0; | |
b7b72b66 | 6434 | + init_MUTEX(&ls->ls_requestqueue_lock); |
c1c6733f | 6435 | + init_MUTEX(&ls->ls_rcom_lock); |
c1c6733f | 6436 | + init_rwsem(&ls->ls_unlock_sem); |
b7b72b66 AM |
6437 | + init_rwsem(&ls->ls_root_lock); |
6438 | + init_rwsem(&ls->ls_in_recovery); | |
c1c6733f | 6439 | + |
b7b72b66 | 6440 | + down_write(&ls->ls_in_recovery); |
c1c6733f AM |
6441 | + |
6442 | + if (flags & DLM_LSF_NOTIMERS) | |
6443 | + set_bit(LSFL_NOTIMERS, &ls->ls_flags); | |
6444 | + | |
b7b72b66 | 6445 | + |
c1c6733f AM |
6446 | + /* |
6447 | + * Connect this lockspace with the cluster manager | |
6448 | + */ | |
6449 | + | |
6450 | + error = kcl_register_service(name, namelen, SERVICE_LEVEL_GDLM, | |
6451 | + &ls_ops, TRUE, (void *) ls, &local_id); | |
6452 | + if (error) | |
b7b72b66 | 6453 | + goto out_recoverd; |
c1c6733f AM |
6454 | + |
6455 | + ls->ls_state = LSST_INIT; | |
6456 | + ls->ls_local_id = local_id; | |
6457 | + | |
6458 | + spin_lock(&lslist_lock); | |
6459 | + list_add(&ls->ls_list, &lslist); | |
6460 | + spin_unlock(&lslist_lock); | |
6461 | + | |
6462 | + error = kcl_join_service(local_id); | |
6463 | + if (error) { | |
6464 | + log_error(ls, "service manager join error %d", error); | |
6465 | + goto out_reg; | |
6466 | + } | |
6467 | + | |
6468 | + /* The ls isn't actually running until it receives a start() from CMAN. | |
b7b72b66 | 6469 | + Neither does it have a global ls id until started. */ |
c1c6733f AM |
6470 | + |
6471 | + /* Return the local ID as the lockspace handle. I've left this | |
6472 | + cast to a void* as it allows us to replace it with pretty much | |
6473 | + anything at a future date without breaking clients. But returning | |
6474 | + the address of the lockspace is a bad idea as it could get | |
6475 | + forcibly removed, leaving client with a dangling pointer */ | |
c1c6733f | 6476 | + |
b7b72b66 | 6477 | + *lockspace = (void *)(long) local_id; |
c1c6733f AM |
6478 | + return 0; |
6479 | + | |
b7b72b66 | 6480 | + out_reg: |
c1c6733f | 6481 | + kcl_unregister_service(ls->ls_local_id); |
b7b72b66 | 6482 | + out_recoverd: |
c783755a | 6483 | + dlm_recoverd_stop(ls); |
b7b72b66 AM |
6484 | + kfree(ls->ls_dirtbl); |
6485 | + out_lkbfree: | |
6486 | + kfree(ls->ls_lkbtbl); | |
6487 | + out_rsbfree: | |
6488 | + kfree(ls->ls_rsbtbl); | |
6489 | + out_lsfree: | |
6490 | + kfree(ls); | |
6491 | + out: | |
c1c6733f AM |
6492 | + return error; |
6493 | +} | |
6494 | + | |
6495 | +/* | |
6496 | + * Called by a system like GFS which wants independent lock spaces. | |
6497 | + */ | |
6498 | + | |
6499 | +int dlm_new_lockspace(char *name, int namelen, void **lockspace, int flags) | |
6500 | +{ | |
6501 | + int error = -ENOSYS; | |
6502 | + | |
b7b72b66 | 6503 | + down(&dlmstate_lock); |
c1c6733f AM |
6504 | + error = init_internal(); |
6505 | + if (error) | |
6506 | + goto out; | |
6507 | + | |
6508 | + error = new_lockspace(name, namelen, lockspace, flags); | |
b7b72b66 AM |
6509 | + out: |
6510 | + up(&dlmstate_lock); | |
c1c6733f AM |
6511 | + return error; |
6512 | +} | |
6513 | + | |
6514 | +/* Return 1 if the lockspace still has active remote locks, | |
6515 | + * 2 if the lockspace still has active local locks. | |
6516 | + */ | |
b7b72b66 AM |
6517 | +static int lockspace_busy(struct dlm_ls *ls) |
6518 | +{ | |
6519 | + int i, lkb_found = 0; | |
6520 | + struct dlm_lkb *lkb; | |
6521 | + | |
6522 | + /* NOTE: We check the lockidtbl here rather than the resource table. | |
6523 | + This is because there may be LKBs queued as ASTs that have been | |
6524 | + unlinked from their RSBs and are pending deletion once the AST has | |
6525 | + been delivered */ | |
6526 | + | |
6527 | + for (i = 0; i < ls->ls_lkbtbl_size; i++) { | |
6528 | + read_lock(&ls->ls_lkbtbl[i].lock); | |
6529 | + if (!list_empty(&ls->ls_lkbtbl[i].list)) { | |
6530 | + lkb_found = 1; | |
6531 | + list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, | |
6532 | + lkb_idtbl_list) { | |
6533 | + if (!lkb->lkb_nodeid) { | |
6534 | + read_unlock(&ls->ls_lkbtbl[i].lock); | |
6535 | + return 2; | |
6536 | + } | |
6537 | + } | |
c1c6733f | 6538 | + } |
b7b72b66 | 6539 | + read_unlock(&ls->ls_lkbtbl[i].lock); |
c1c6733f | 6540 | + } |
b7b72b66 | 6541 | + return lkb_found; |
c1c6733f AM |
6542 | +} |
6543 | + | |
b7b72b66 | 6544 | +static int release_lockspace(struct dlm_ls *ls, int force) |
c1c6733f | 6545 | +{ |
b7b72b66 AM |
6546 | + struct dlm_lkb *lkb; |
6547 | + struct dlm_rsb *rsb; | |
6548 | + struct dlm_recover *rv; | |
c1c6733f AM |
6549 | + struct list_head *head; |
6550 | + int i; | |
6551 | + int busy = lockspace_busy(ls); | |
6552 | + | |
6553 | + /* Don't destroy a busy lockspace */ | |
6554 | + if (busy > force) | |
6555 | + return -EBUSY; | |
6556 | + | |
6557 | + if (force < 3) { | |
6558 | + kcl_leave_service(ls->ls_local_id); | |
6559 | + kcl_unregister_service(ls->ls_local_id); | |
6560 | + } | |
6561 | + | |
c783755a | 6562 | + dlm_recoverd_stop(ls); |
b7b72b66 AM |
6563 | + |
6564 | + remove_lockspace(ls); | |
c1c6733f AM |
6565 | + |
6566 | + /* | |
b7b72b66 | 6567 | + * Free direntry structs. |
c1c6733f AM |
6568 | + */ |
6569 | + | |
b7b72b66 AM |
6570 | + dlm_dir_clear(ls); |
6571 | + kfree(ls->ls_dirtbl); | |
c1c6733f AM |
6572 | + |
6573 | + /* | |
b7b72b66 | 6574 | + * Free all lkb's on lkbtbl[] lists. |
c1c6733f AM |
6575 | + */ |
6576 | + | |
b7b72b66 AM |
6577 | + for (i = 0; i < ls->ls_lkbtbl_size; i++) { |
6578 | + head = &ls->ls_lkbtbl[i].list; | |
c1c6733f | 6579 | + while (!list_empty(head)) { |
b7b72b66 AM |
6580 | + lkb = list_entry(head->next, struct dlm_lkb, |
6581 | + lkb_idtbl_list); | |
c1c6733f AM |
6582 | + list_del(&lkb->lkb_idtbl_list); |
6583 | + | |
6584 | + if (lkb->lkb_lockqueue_state) | |
6585 | + remove_from_lockqueue(lkb); | |
6586 | + | |
b7b72b66 | 6587 | + if (lkb->lkb_astflags & (AST_COMP | AST_BAST)) |
c1c6733f AM |
6588 | + list_del(&lkb->lkb_astqueue); |
6589 | + | |
b7b72b66 | 6590 | + if (lkb->lkb_lvbptr && lkb->lkb_flags & GDLM_LKFLG_MSTCPY) |
c1c6733f AM |
6591 | + free_lvb(lkb->lkb_lvbptr); |
6592 | + | |
6593 | + free_lkb(lkb); | |
6594 | + } | |
6595 | + } | |
6596 | + | |
b7b72b66 | 6597 | + kfree(ls->ls_lkbtbl); |
c1c6733f AM |
6598 | + |
6599 | + /* | |
b7b72b66 | 6600 | + * Free all rsb's on rsbtbl[] lists |
c1c6733f AM |
6601 | + */ |
6602 | + | |
b7b72b66 AM |
6603 | + for (i = 0; i < ls->ls_rsbtbl_size; i++) { |
6604 | + head = &ls->ls_rsbtbl[i].list; | |
c1c6733f | 6605 | + while (!list_empty(head)) { |
b7b72b66 AM |
6606 | + rsb = list_entry(head->next, struct dlm_rsb, |
6607 | + res_hashchain); | |
c1c6733f AM |
6608 | + list_del(&rsb->res_hashchain); |
6609 | + | |
6610 | + if (rsb->res_lvbptr) | |
6611 | + free_lvb(rsb->res_lvbptr); | |
6612 | + | |
6613 | + free_rsb(rsb); | |
6614 | + } | |
6615 | + } | |
6616 | + | |
b7b72b66 | 6617 | + kfree(ls->ls_rsbtbl); |
c1c6733f AM |
6618 | + |
6619 | + /* | |
6620 | + * Free structures on any other lists | |
6621 | + */ | |
6622 | + | |
6623 | + head = &ls->ls_recover; | |
6624 | + while (!list_empty(head)) { | |
b7b72b66 AM |
6625 | + rv = list_entry(head->next, struct dlm_recover, list); |
6626 | + list_del(&rv->list); | |
6627 | + kfree(rv); | |
c1c6733f AM |
6628 | + } |
6629 | + | |
c783755a | 6630 | + clear_free_de(ls); |
c1c6733f | 6631 | + |
c783755a AM |
6632 | + ls_nodes_clear(ls); |
6633 | + ls_nodes_gone_clear(ls); | |
bb1d8b11 AM |
6634 | + if (ls->ls_node_array) |
6635 | + kfree(ls->ls_node_array); | |
c1c6733f | 6636 | + |
b7b72b66 | 6637 | + kfree(ls); |
c1c6733f | 6638 | + dlm_release(); |
c1c6733f AM |
6639 | + module_put(THIS_MODULE); |
6640 | + return 0; | |
6641 | +} | |
6642 | + | |
6643 | + | |
6644 | +/* | |
6645 | + * Called when a system has released all its locks and is not going to use the | |
6646 | + * lockspace any longer. We blindly free everything we're managing for this | |
6647 | + * lockspace. Remaining nodes will go through the recovery process as if we'd | |
6648 | + * died. The lockspace must continue to function as usual, participating in | |
6649 | + * recoveries, until kcl_leave_service returns. | |
6650 | + * | |
6651 | + * Force has 4 possible values: | |
6652 | + * 0 - don't destroy locksapce if it has any LKBs | |
6653 | + * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs | |
6654 | + * 2 - destroy lockspace regardless of LKBs | |
6655 | + * 3 - destroy lockspace as part of a forced shutdown | |
6656 | + */ | |
6657 | + | |
6658 | +int dlm_release_lockspace(void *lockspace, int force) | |
6659 | +{ | |
b7b72b66 | 6660 | + struct dlm_ls *ls; |
c1c6733f AM |
6661 | + |
6662 | + ls = find_lockspace_by_local_id(lockspace); | |
6663 | + if (!ls) | |
b7b72b66 AM |
6664 | + return -EINVAL; |
6665 | + put_lockspace(ls); | |
c1c6733f AM |
6666 | + return release_lockspace(ls, force); |
6667 | +} | |
6668 | + | |
6669 | + | |
6670 | +/* Called when the cluster is being shut down dirtily */ | |
6671 | +void dlm_emergency_shutdown() | |
6672 | +{ | |
b7b72b66 AM |
6673 | + struct dlm_ls *ls; |
6674 | + struct dlm_ls *tmp; | |
c1c6733f AM |
6675 | + |
6676 | + /* Shut lowcomms down to prevent any socket activity */ | |
6677 | + lowcomms_stop_accept(); | |
6678 | + | |
6679 | + /* Delete the devices that belong the the userland | |
6680 | + lockspaces to be deleted. */ | |
6681 | + dlm_device_free_devices(); | |
6682 | + | |
6683 | + /* Now try to clean the lockspaces */ | |
6684 | + spin_lock(&lslist_lock); | |
6685 | + | |
6686 | + list_for_each_entry_safe(ls, tmp, &lslist, ls_list) { | |
6687 | + spin_unlock(&lslist_lock); | |
6688 | + release_lockspace(ls, 3); | |
6689 | + spin_lock(&lslist_lock); | |
6690 | + } | |
6691 | + | |
6692 | + spin_unlock(&lslist_lock); | |
6693 | +} | |
6694 | + | |
b7b72b66 | 6695 | +struct dlm_recover *allocate_dlm_recover(void) |
c1c6733f | 6696 | +{ |
b7b72b66 | 6697 | + struct dlm_recover *rv; |
c1c6733f | 6698 | + |
b7b72b66 AM |
6699 | + rv = kmalloc(sizeof(struct dlm_recover), GFP_KERNEL); |
6700 | + if (rv) | |
6701 | + memset(rv, 0, sizeof(struct dlm_recover)); | |
6702 | + return rv; | |
c1c6733f AM |
6703 | +} |
6704 | + | |
6705 | +/* | |
6706 | + * Called by CMAN on a specific ls. "stop" means set flag which while set | |
6707 | + * causes all new requests to ls to be queued and not submitted until flag is | |
6708 | + * cleared. stop on a ls also needs to cancel any prior starts on the ls. | |
6709 | + * The recoverd thread carries out any work called for by this event. | |
6710 | + */ | |
6711 | + | |
6712 | +static int dlm_ls_stop(void *servicedata) | |
6713 | +{ | |
b7b72b66 | 6714 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
c1c6733f AM |
6715 | + int new; |
6716 | + | |
6717 | + spin_lock(&ls->ls_recover_lock); | |
6718 | + ls->ls_last_stop = ls->ls_last_start; | |
6719 | + set_bit(LSFL_LS_STOP, &ls->ls_flags); | |
6720 | + new = test_and_clear_bit(LSFL_LS_RUN, &ls->ls_flags); | |
6721 | + spin_unlock(&ls->ls_recover_lock); | |
6722 | + | |
6723 | + /* | |
6724 | + * This in_recovery lock does two things: | |
6725 | + * | |
6726 | + * 1) Keeps this function from returning until all threads are out | |
6727 | + * of locking routines and locking is truely stopped. | |
6728 | + * 2) Keeps any new requests from being processed until it's unlocked | |
6729 | + * when recovery is complete. | |
6730 | + */ | |
6731 | + | |
6732 | + if (new) | |
6733 | + down_write(&ls->ls_in_recovery); | |
6734 | + | |
6735 | + clear_bit(LSFL_RESDIR_VALID, &ls->ls_flags); | |
6736 | + clear_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags); | |
6737 | + clear_bit(LSFL_NODES_VALID, &ls->ls_flags); | |
6738 | + clear_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags); | |
6739 | + | |
b7b72b66 | 6740 | + dlm_recoverd_kick(ls); |
c1c6733f AM |
6741 | + |
6742 | + return 0; | |
6743 | +} | |
6744 | + | |
6745 | +/* | |
6746 | + * Called by CMAN on a specific ls. "start" means enable the lockspace to do | |
6747 | + * request processing which first requires that the recovery procedure be | |
6748 | + * stepped through with all nodes sharing the lockspace (nodeids). The first | |
6749 | + * start on the ls after it's created is a special case and requires some extra | |
6750 | + * work like figuring out our own local nodeid. We can't do all this in the | |
6751 | + * calling CMAN context, so we must pass this work off to the recoverd thread | |
b7b72b66 | 6752 | + * which was created in dlm_init(). The recoverd thread carries out any work |
c1c6733f AM |
6753 | + * called for by this event. |
6754 | + */ | |
6755 | + | |
6756 | +static int dlm_ls_start(void *servicedata, uint32_t *nodeids, int count, | |
6757 | + int event_id, int type) | |
6758 | +{ | |
b7b72b66 AM |
6759 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
6760 | + struct dlm_recover *rv; | |
c1c6733f AM |
6761 | + int error = -ENOMEM; |
6762 | + | |
b7b72b66 AM |
6763 | + rv = allocate_dlm_recover(); |
6764 | + if (!rv) | |
c1c6733f AM |
6765 | + goto out; |
6766 | + | |
b7b72b66 AM |
6767 | + rv->nodeids = nodeids; |
6768 | + rv->node_count = count; | |
6769 | + rv->event_id = event_id; | |
c1c6733f AM |
6770 | + |
6771 | + spin_lock(&ls->ls_recover_lock); | |
b7b72b66 AM |
6772 | + if (ls->ls_last_start == event_id) |
6773 | + log_all(ls, "repeated start %d stop %d finish %d", | |
6774 | + event_id, ls->ls_last_stop, ls->ls_last_finish); | |
c1c6733f | 6775 | + ls->ls_last_start = event_id; |
b7b72b66 | 6776 | + list_add_tail(&rv->list, &ls->ls_recover); |
c1c6733f AM |
6777 | + set_bit(LSFL_LS_START, &ls->ls_flags); |
6778 | + spin_unlock(&ls->ls_recover_lock); | |
6779 | + | |
b7b72b66 | 6780 | + dlm_recoverd_kick(ls); |
c1c6733f AM |
6781 | + error = 0; |
6782 | + | |
6783 | + out: | |
6784 | + return error; | |
6785 | +} | |
6786 | + | |
6787 | +/* | |
6788 | + * Called by CMAN on a specific ls. "finish" means that all nodes which | |
6789 | + * received a "start" have completed the start and called kcl_start_done. | |
6790 | + * The recoverd thread carries out any work called for by this event. | |
6791 | + */ | |
6792 | + | |
6793 | +static void dlm_ls_finish(void *servicedata, int event_id) | |
6794 | +{ | |
b7b72b66 | 6795 | + struct dlm_ls *ls = (struct dlm_ls *) servicedata; |
c1c6733f AM |
6796 | + |
6797 | + spin_lock(&ls->ls_recover_lock); | |
6798 | + ls->ls_last_finish = event_id; | |
6799 | + set_bit(LSFL_LS_FINISH, &ls->ls_flags); | |
6800 | + spin_unlock(&ls->ls_recover_lock); | |
6801 | + | |
b7b72b66 | 6802 | + dlm_recoverd_kick(ls); |
c1c6733f AM |
6803 | +} |
6804 | + | |
6805 | +struct kcl_service_ops ls_ops = { | |
6806 | + .stop = dlm_ls_stop, | |
6807 | + .start = dlm_ls_start, | |
6808 | + .finish = dlm_ls_finish | |
6809 | +}; | |
6810 | diff -urN linux-orig/cluster/dlm/lockspace.h linux-patched/cluster/dlm/lockspace.h | |
6811 | --- linux-orig/cluster/dlm/lockspace.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 6812 | +++ linux-patched/cluster/dlm/lockspace.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
6813 | @@ -0,0 +1,29 @@ |
6814 | +/****************************************************************************** | |
6815 | +******************************************************************************* | |
6816 | +** | |
6817 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6818 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6819 | +** | |
6820 | +** This copyrighted material is made available to anyone wishing to use, | |
6821 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6822 | +** of the GNU General Public License v.2. | |
6823 | +** | |
6824 | +******************************************************************************* | |
6825 | +******************************************************************************/ | |
6826 | + | |
6827 | +#ifndef __LOCKSPACE_DOT_H__ | |
6828 | +#define __LOCKSPACE_DOT_H__ | |
6829 | + | |
6830 | +void dlm_lockspace_init(void); | |
6831 | +int dlm_init(void); | |
6832 | +int dlm_release(void); | |
6833 | +int dlm_new_lockspace(char *name, int namelen, void **ls, int flags); | |
6834 | +int dlm_release_lockspace(void *ls, int force); | |
c1c6733f | 6835 | +void dlm_emergency_shutdown(void); |
b7b72b66 AM |
6836 | +struct dlm_ls *find_lockspace_by_global_id(uint32_t id); |
6837 | +struct dlm_ls *find_lockspace_by_local_id(void *id); | |
6838 | +struct dlm_ls *find_lockspace_by_name(char *name, int namelen); | |
6839 | +void hold_lockspace(struct dlm_ls *ls); | |
6840 | +void put_lockspace(struct dlm_ls *ls); | |
c1c6733f AM |
6841 | + |
6842 | +#endif /* __LOCKSPACE_DOT_H__ */ | |
6843 | diff -urN linux-orig/cluster/dlm/lowcomms.c linux-patched/cluster/dlm/lowcomms.c | |
6844 | --- linux-orig/cluster/dlm/lowcomms.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
6845 | +++ linux-patched/cluster/dlm/lowcomms.c 2004-11-03 11:31:56.000000000 +0800 |
6846 | @@ -0,0 +1,1415 @@ | |
c1c6733f AM |
6847 | +/****************************************************************************** |
6848 | +******************************************************************************* | |
6849 | +** | |
6850 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6851 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
6852 | +** | |
6853 | +** This copyrighted material is made available to anyone wishing to use, | |
6854 | +** modify, copy, or redistribute it subject to the terms and conditions | |
6855 | +** of the GNU General Public License v.2. | |
6856 | +** | |
6857 | +******************************************************************************* | |
6858 | +******************************************************************************/ | |
6859 | + | |
6860 | +/* | |
6861 | + * lowcomms.c | |
6862 | + * | |
6863 | + * This is the "low-level" comms layer. | |
6864 | + * | |
6865 | + * It is responsible for sending/receiving messages | |
6866 | + * from other nodes in the cluster. | |
6867 | + * | |
6868 | + * Cluster nodes are referred to by their nodeids. nodeids are | |
6869 | + * simply 32 bit numbers to the locking module - if they need to | |
6870 | + * be expanded for the cluster infrastructure then that is it's | |
6871 | + * responsibility. It is this layer's | |
6872 | + * responsibility to resolve these into IP address or | |
6873 | + * whatever it needs for inter-node communication. | |
6874 | + * | |
6875 | + * The comms level is two kernel threads that deal mainly with | |
6876 | + * the receiving of messages from other nodes and passing them | |
6877 | + * up to the mid-level comms layer (which understands the | |
6878 | + * message format) for execution by the locking core, and | |
6879 | + * a send thread which does all the setting up of connections | |
6880 | + * to remote nodes and the sending of data. Threads are not allowed | |
6881 | + * to send their own data because it may cause them to wait in times | |
6882 | + * of high load. Also, this way, the sending thread can collect together | |
6883 | + * messages bound for one node and send them in one block. | |
6884 | + * | |
6885 | + * I don't see any problem with the recv thread executing the locking | |
6886 | + * code on behalf of remote processes as the locking code is | |
6887 | + * short, efficient and never waits. | |
6888 | + * | |
6889 | + */ | |
6890 | + | |
6891 | + | |
6892 | +#include <asm/ioctls.h> | |
6893 | +#include <net/sock.h> | |
6894 | +#include <net/tcp.h> | |
6895 | +#include <linux/pagemap.h> | |
6896 | +#include <cluster/cnxman.h> | |
6897 | + | |
6898 | +#include "dlm_internal.h" | |
6899 | +#include "lowcomms.h" | |
6900 | +#include "midcomms.h" | |
6901 | +#include "config.h" | |
6902 | + | |
6903 | +struct cbuf { | |
6904 | + unsigned base; | |
6905 | + unsigned len; | |
6906 | + unsigned mask; | |
6907 | +}; | |
6908 | + | |
6909 | +#define CBUF_INIT(cb, size) do { (cb)->base = (cb)->len = 0; (cb)->mask = ((size)-1); } while(0) | |
6910 | +#define CBUF_ADD(cb, n) do { (cb)->len += n; } while(0) | |
6911 | +#define CBUF_EMPTY(cb) ((cb)->len == 0) | |
6912 | +#define CBUF_MAY_ADD(cb, n) (((cb)->len + (n)) < ((cb)->mask + 1)) | |
6913 | +#define CBUF_EAT(cb, n) do { (cb)->len -= (n); \ | |
6914 | + (cb)->base += (n); (cb)->base &= (cb)->mask; } while(0) | |
6915 | +#define CBUF_DATA(cb) (((cb)->base + (cb)->len) & (cb)->mask) | |
6916 | + | |
6917 | +struct connection { | |
6918 | + struct socket *sock; /* NULL if not connected */ | |
6919 | + uint32_t nodeid; /* So we know who we are in the list */ | |
6920 | + struct rw_semaphore sock_sem; /* Stop connect races */ | |
6921 | + struct list_head read_list; /* On this list when ready for reading */ | |
6922 | + struct list_head write_list; /* On this list when ready for writing */ | |
6923 | + struct list_head state_list; /* On this list when ready to connect */ | |
6924 | + unsigned long flags; /* bit 1,2 = We are on the read/write lists */ | |
6925 | +#define CF_READ_PENDING 1 | |
6926 | +#define CF_WRITE_PENDING 2 | |
6927 | +#define CF_CONNECT_PENDING 3 | |
c783755a | 6928 | +#define CF_IS_OTHERCON 4 |
c1c6733f AM |
6929 | + struct list_head writequeue; /* List of outgoing writequeue_entries */ |
6930 | + struct list_head listenlist; /* List of allocated listening sockets */ | |
6931 | + spinlock_t writequeue_lock; | |
6932 | + int (*rx_action) (struct connection *); /* What to do when active */ | |
6933 | + struct page *rx_page; | |
6934 | + struct cbuf cb; | |
6935 | + int retries; | |
b7b72b66 | 6936 | + atomic_t waiting_requests; |
c1c6733f | 6937 | +#define MAX_CONNECT_RETRIES 3 |
c783755a | 6938 | + struct connection *othercon; |
c1c6733f AM |
6939 | +}; |
6940 | +#define sock2con(x) ((struct connection *)(x)->sk_user_data) | |
c1c6733f AM |
6941 | + |
6942 | +/* An entry waiting to be sent */ | |
6943 | +struct writequeue_entry { | |
6944 | + struct list_head list; | |
6945 | + struct page *page; | |
6946 | + int offset; | |
6947 | + int len; | |
6948 | + int end; | |
6949 | + int users; | |
6950 | + struct connection *con; | |
6951 | +}; | |
6952 | + | |
6953 | +/* "Template" structure for IPv4 and IPv6 used to fill | |
6954 | + * in the missing bits when converting between cman (which knows | |
6955 | + * nothing about sockaddr structs) and real life where we actually | |
6956 | + * have to connect to these addresses. Also one of these structs | |
6957 | + * will hold the cached "us" address. | |
6958 | + * | |
6959 | + * It's an in6 sockaddr just so there's enough space for anything | |
6960 | + * we're likely to see here. | |
6961 | + */ | |
6962 | +static struct sockaddr_in6 local_addr; | |
6963 | + | |
6964 | +/* Manage daemons */ | |
c783755a AM |
6965 | +static struct task_struct *recv_task; |
6966 | +static struct task_struct *send_task; | |
c1c6733f AM |
6967 | + |
6968 | +static wait_queue_t lowcomms_send_waitq_head; | |
6969 | +static wait_queue_head_t lowcomms_send_waitq; | |
c1c6733f AM |
6970 | +static wait_queue_t lowcomms_recv_waitq_head; |
6971 | +static wait_queue_head_t lowcomms_recv_waitq; | |
6972 | + | |
c783755a AM |
6973 | +/* An array of pointers to connections, indexed by NODEID */ |
6974 | +static struct connection **connections; | |
6975 | +static struct rw_semaphore connections_lock; | |
6976 | +static kmem_cache_t *con_cache; | |
6977 | +static int conn_array_size; | |
6978 | +static atomic_t accepting; | |
6979 | + | |
c1c6733f AM |
6980 | +/* List of sockets that have reads pending */ |
6981 | +static struct list_head read_sockets; | |
6982 | +static spinlock_t read_sockets_lock; | |
6983 | + | |
6984 | +/* List of sockets which have writes pending */ | |
6985 | +static struct list_head write_sockets; | |
6986 | +static spinlock_t write_sockets_lock; | |
6987 | + | |
6988 | +/* List of sockets which have connects pending */ | |
6989 | +static struct list_head state_sockets; | |
6990 | +static spinlock_t state_sockets_lock; | |
6991 | + | |
6992 | +/* List of allocated listen sockets */ | |
6993 | +static struct list_head listen_sockets; | |
6994 | + | |
6995 | +static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr); | |
6996 | +static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len); | |
6997 | + | |
6998 | + | |
c783755a AM |
6999 | +static struct connection *nodeid2con(int nodeid, int allocation) |
7000 | +{ | |
7001 | + struct connection *con = NULL; | |
7002 | + | |
7003 | + down_read(&connections_lock); | |
7004 | + if (nodeid >= conn_array_size) { | |
7005 | + int new_size = nodeid + dlm_config.conn_increment; | |
7006 | + struct connection **new_conns; | |
7007 | + | |
7008 | + new_conns = kmalloc(sizeof(struct connection *) * | |
7009 | + new_size, allocation); | |
7010 | + if (!new_conns) | |
7011 | + goto finish; | |
7012 | + | |
7013 | + up_read(&connections_lock); | |
7014 | + /* The worst that can happen here (I think), is that | |
7015 | + we get two consecutive reallocations */ | |
7016 | + down_write(&connections_lock); | |
7017 | + | |
7018 | + memset(new_conns, 0, sizeof(struct connection *) * new_size); | |
7019 | + memcpy(new_conns, connections, sizeof(struct connection *) * conn_array_size); | |
7020 | + conn_array_size = new_size; | |
7021 | + kfree(connections); | |
7022 | + connections = new_conns; | |
7023 | + | |
7024 | + up_write(&connections_lock); | |
7025 | + down_read(&connections_lock); | |
7026 | + } | |
7027 | + | |
7028 | + con = connections[nodeid]; | |
7029 | + if (con == NULL && allocation) { | |
7030 | + con = kmem_cache_alloc(con_cache, allocation); | |
7031 | + if (!con) | |
7032 | + goto finish; | |
7033 | + | |
7034 | + memset(con, 0, sizeof(*con)); | |
7035 | + con->nodeid = nodeid; | |
7036 | + init_rwsem(&con->sock_sem); | |
7037 | + INIT_LIST_HEAD(&con->writequeue); | |
7038 | + spin_lock_init(&con->writequeue_lock); | |
7039 | + | |
7040 | + connections[nodeid] = con; | |
7041 | + } | |
7042 | + | |
7043 | + finish: | |
7044 | + up_read(&connections_lock); | |
7045 | + return con; | |
7046 | +} | |
7047 | + | |
c1c6733f AM |
7048 | +/* Data available on socket or listen socket received a connect */ |
7049 | +static void lowcomms_data_ready(struct sock *sk, int count_unused) | |
7050 | +{ | |
7051 | + struct connection *con = sock2con(sk); | |
7052 | + | |
b7b72b66 | 7053 | + atomic_inc(&con->waiting_requests); |
c1c6733f AM |
7054 | + if (test_and_set_bit(CF_READ_PENDING, &con->flags)) |
7055 | + return; | |
7056 | + | |
7057 | + spin_lock_bh(&read_sockets_lock); | |
7058 | + list_add_tail(&con->read_list, &read_sockets); | |
7059 | + spin_unlock_bh(&read_sockets_lock); | |
7060 | + | |
7061 | + wake_up_interruptible(&lowcomms_recv_waitq); | |
7062 | +} | |
7063 | + | |
7064 | +static void lowcomms_write_space(struct sock *sk) | |
7065 | +{ | |
7066 | + struct connection *con = sock2con(sk); | |
7067 | + | |
7068 | + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | |
7069 | + return; | |
7070 | + | |
7071 | + spin_lock_bh(&write_sockets_lock); | |
7072 | + list_add_tail(&con->write_list, &write_sockets); | |
7073 | + spin_unlock_bh(&write_sockets_lock); | |
7074 | + | |
7075 | + wake_up_interruptible(&lowcomms_send_waitq); | |
7076 | +} | |
7077 | + | |
7078 | +static inline void lowcomms_connect_sock(struct connection *con) | |
7079 | +{ | |
7080 | + if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags)) | |
7081 | + return; | |
7082 | + if (!atomic_read(&accepting)) | |
7083 | + return; | |
7084 | + | |
7085 | + spin_lock_bh(&state_sockets_lock); | |
7086 | + list_add_tail(&con->state_list, &state_sockets); | |
7087 | + spin_unlock_bh(&state_sockets_lock); | |
7088 | + | |
7089 | + wake_up_interruptible(&lowcomms_send_waitq); | |
7090 | +} | |
7091 | + | |
7092 | +static void lowcomms_state_change(struct sock *sk) | |
7093 | +{ | |
7094 | +/* struct connection *con = sock2con(sk); */ | |
7095 | + | |
7096 | + switch (sk->sk_state) { | |
7097 | + case TCP_ESTABLISHED: | |
7098 | + lowcomms_write_space(sk); | |
7099 | + break; | |
7100 | + | |
7101 | + case TCP_FIN_WAIT1: | |
7102 | + case TCP_FIN_WAIT2: | |
7103 | + case TCP_TIME_WAIT: | |
7104 | + case TCP_CLOSE: | |
7105 | + case TCP_CLOSE_WAIT: | |
7106 | + case TCP_LAST_ACK: | |
7107 | + case TCP_CLOSING: | |
7108 | + /* FIXME: I think this causes more trouble than it solves. | |
7109 | + lowcomms wil reconnect anyway when there is something to | |
7110 | + send. This just attempts reconnection if a node goes down! | |
7111 | + */ | |
7112 | + /* lowcomms_connect_sock(con); */ | |
7113 | + break; | |
7114 | + | |
7115 | + default: | |
7116 | + printk("dlm: lowcomms_state_change: state=%d\n", sk->sk_state); | |
7117 | + break; | |
7118 | + } | |
7119 | +} | |
7120 | + | |
7121 | +/* Make a socket active */ | |
7122 | +static int add_sock(struct socket *sock, struct connection *con) | |
7123 | +{ | |
7124 | + con->sock = sock; | |
7125 | + | |
7126 | + /* Install a data_ready callback */ | |
7127 | + con->sock->sk->sk_data_ready = lowcomms_data_ready; | |
7128 | + con->sock->sk->sk_write_space = lowcomms_write_space; | |
7129 | + con->sock->sk->sk_state_change = lowcomms_state_change; | |
7130 | + | |
7131 | + return 0; | |
7132 | +} | |
7133 | + | |
7134 | +/* Add the port number to an IP6 or 4 sockaddr and return the address | |
7135 | + length */ | |
7136 | +static void make_sockaddr(struct sockaddr_in6 *saddr, uint16_t port, | |
7137 | + int *addr_len) | |
7138 | +{ | |
7139 | + saddr->sin6_family = local_addr.sin6_family; | |
7140 | + if (local_addr.sin6_family == AF_INET) { | |
b7b72b66 AM |
7141 | + struct sockaddr_in *in4_addr = (struct sockaddr_in *)saddr; |
7142 | + in4_addr->sin_port = cpu_to_be16(port); | |
7143 | + *addr_len = sizeof(struct sockaddr_in); | |
c1c6733f AM |
7144 | + } |
7145 | + else { | |
b7b72b66 AM |
7146 | + saddr->sin6_port = cpu_to_be16(port); |
7147 | + *addr_len = sizeof(struct sockaddr_in6); | |
c1c6733f AM |
7148 | + } |
7149 | +} | |
7150 | + | |
7151 | +/* Close a remote connection and tidy up */ | |
c783755a | 7152 | +static void close_connection(struct connection *con, int and_other) |
c1c6733f | 7153 | +{ |
c1c6733f AM |
7154 | + down_write(&con->sock_sem); |
7155 | + | |
7156 | + if (con->sock) { | |
7157 | + sock_release(con->sock); | |
7158 | + con->sock = NULL; | |
c783755a AM |
7159 | + if (con->othercon && and_other) { |
7160 | + /* Argh! recursion in kernel code! | |
7161 | + Actually, this isn't a list so it | |
7162 | + will only re-enter once. | |
7163 | + */ | |
7164 | + close_connection(con->othercon, TRUE); | |
c1c6733f AM |
7165 | + } |
7166 | + } | |
7167 | + if (con->rx_page) { | |
7168 | + __free_page(con->rx_page); | |
7169 | + con->rx_page = NULL; | |
7170 | + } | |
7171 | + up_write(&con->sock_sem); | |
7172 | +} | |
7173 | + | |
7174 | +/* Data received from remote end */ | |
7175 | +static int receive_from_sock(struct connection *con) | |
7176 | +{ | |
7177 | + int ret = 0; | |
7178 | + struct msghdr msg; | |
7179 | + struct iovec iov[2]; | |
7180 | + mm_segment_t fs; | |
7181 | + unsigned len; | |
7182 | + int r; | |
7183 | + int call_again_soon = 0; | |
7184 | + | |
7185 | + down_read(&con->sock_sem); | |
7186 | + | |
7187 | + if (con->sock == NULL) | |
7188 | + goto out; | |
7189 | + if (con->rx_page == NULL) { | |
7190 | + /* | |
7191 | + * This doesn't need to be atomic, but I think it should | |
7192 | + * improve performance if it is. | |
7193 | + */ | |
7194 | + con->rx_page = alloc_page(GFP_ATOMIC); | |
7195 | + if (con->rx_page == NULL) | |
7196 | + goto out_resched; | |
7197 | + CBUF_INIT(&con->cb, PAGE_CACHE_SIZE); | |
7198 | + } | |
b7b72b66 | 7199 | + |
c1c6733f | 7200 | + /* |
b7b72b66 AM |
7201 | + * To avoid doing too many short reads, we will reschedule for |
7202 | + * another time if there are less than 20 bytes left in the buffer. | |
c1c6733f | 7203 | + */ |
b7b72b66 | 7204 | + if (!CBUF_MAY_ADD(&con->cb, 20)) |
c1c6733f AM |
7205 | + goto out_resched; |
7206 | + | |
7207 | + msg.msg_control = NULL; | |
7208 | + msg.msg_controllen = 0; | |
7209 | + msg.msg_iovlen = 1; | |
7210 | + msg.msg_iov = iov; | |
7211 | + msg.msg_name = NULL; | |
7212 | + msg.msg_namelen = 0; | |
7213 | + msg.msg_flags = 0; | |
7214 | + | |
7215 | + /* | |
7216 | + * iov[0] is the bit of the circular buffer between the current end | |
7217 | + * point (cb.base + cb.len) and the end of the buffer. | |
7218 | + */ | |
7219 | + iov[0].iov_len = con->cb.base - CBUF_DATA(&con->cb); | |
7220 | + iov[0].iov_base = page_address(con->rx_page) + CBUF_DATA(&con->cb); | |
7221 | + iov[1].iov_len = 0; | |
7222 | + | |
7223 | + /* | |
7224 | + * iov[1] is the bit of the circular buffer between the start of the | |
7225 | + * buffer and the start of the currently used section (cb.base) | |
7226 | + */ | |
7227 | + if (CBUF_DATA(&con->cb) >= con->cb.base) { | |
7228 | + iov[0].iov_len = PAGE_CACHE_SIZE - CBUF_DATA(&con->cb); | |
7229 | + iov[1].iov_len = con->cb.base; | |
7230 | + iov[1].iov_base = page_address(con->rx_page); | |
7231 | + msg.msg_iovlen = 2; | |
7232 | + } | |
7233 | + len = iov[0].iov_len + iov[1].iov_len; | |
7234 | + | |
7235 | + fs = get_fs(); | |
7236 | + set_fs(get_ds()); | |
7237 | + r = ret = sock_recvmsg(con->sock, &msg, len, | |
7238 | + MSG_DONTWAIT | MSG_NOSIGNAL); | |
7239 | + set_fs(fs); | |
7240 | + | |
7241 | + if (ret <= 0) | |
7242 | + goto out_close; | |
7243 | + if (ret == len) | |
7244 | + call_again_soon = 1; | |
7245 | + CBUF_ADD(&con->cb, ret); | |
7246 | + ret = midcomms_process_incoming_buffer(con->nodeid, | |
7247 | + page_address(con->rx_page), | |
7248 | + con->cb.base, con->cb.len, | |
7249 | + PAGE_CACHE_SIZE); | |
7250 | + if (ret == -EBADMSG) { | |
7251 | + printk(KERN_INFO "dlm: lowcomms: addr=%p, base=%u, len=%u, " | |
7252 | + "iov_len=%u, iov_base[0]=%p, read=%d\n", | |
7253 | + page_address(con->rx_page), con->cb.base, con->cb.len, | |
7254 | + len, iov[0].iov_base, r); | |
7255 | + } | |
7256 | + if (ret < 0) | |
7257 | + goto out_close; | |
7258 | + CBUF_EAT(&con->cb, ret); | |
7259 | + | |
7260 | + if (CBUF_EMPTY(&con->cb) && !call_again_soon) { | |
7261 | + __free_page(con->rx_page); | |
7262 | + con->rx_page = NULL; | |
7263 | + } | |
b7b72b66 | 7264 | + |
c1c6733f AM |
7265 | + out: |
7266 | + if (call_again_soon) | |
7267 | + goto out_resched; | |
7268 | + up_read(&con->sock_sem); | |
7269 | + ret = 0; | |
7270 | + goto out_ret; | |
7271 | + | |
7272 | + out_resched: | |
7273 | + lowcomms_data_ready(con->sock->sk, 0); | |
7274 | + up_read(&con->sock_sem); | |
7275 | + ret = 0; | |
7276 | + goto out_ret; | |
7277 | + | |
7278 | + out_close: | |
7279 | + up_read(&con->sock_sem); | |
c783755a AM |
7280 | + if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) { |
7281 | + close_connection(con, FALSE); | |
c1c6733f AM |
7282 | + lowcomms_connect_sock(con); |
7283 | + } | |
7284 | + | |
7285 | + out_ret: | |
7286 | + return ret; | |
7287 | +} | |
7288 | + | |
7289 | +/* Listening socket is busy, accept a connection */ | |
7290 | +static int accept_from_sock(struct connection *con) | |
7291 | +{ | |
7292 | + int result; | |
7293 | + struct sockaddr_in6 peeraddr; | |
7294 | + struct socket *newsock; | |
7295 | + int len; | |
7296 | + int nodeid; | |
7297 | + struct connection *newcon; | |
7298 | + | |
7299 | + memset(&peeraddr, 0, sizeof(peeraddr)); | |
7300 | + newsock = sock_alloc(); | |
7301 | + if (!newsock) | |
7302 | + return -ENOMEM; | |
7303 | + | |
7304 | + down_read(&con->sock_sem); | |
7305 | + | |
7306 | + result = -ENOTCONN; | |
7307 | + if (con->sock == NULL) | |
7308 | + goto accept_err; | |
7309 | + | |
7310 | + newsock->type = con->sock->type; | |
7311 | + newsock->ops = con->sock->ops; | |
7312 | + | |
7313 | + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); | |
7314 | + if (result < 0) | |
7315 | + goto accept_err; | |
7316 | + | |
7317 | + /* Get the connected socket's peer */ | |
7318 | + if (newsock->ops->getname(newsock, (struct sockaddr *)&peeraddr, | |
7319 | + &len, 2)) { | |
7320 | + result = -ECONNABORTED; | |
7321 | + goto accept_err; | |
7322 | + } | |
7323 | + | |
7324 | + /* Get the new node's NODEID */ | |
7325 | + nodeid = lowcomms_nodeid_from_ipaddr((struct sockaddr *)&peeraddr, len); | |
7326 | + if (nodeid == 0) { | |
7327 | + printk("dlm: connect from non cluster node\n"); | |
7328 | + sock_release(newsock); | |
7329 | + up_read(&con->sock_sem); | |
7330 | + return -1; | |
7331 | + } | |
7332 | + | |
7333 | + log_print("got connection from %d", nodeid); | |
7334 | + | |
7335 | + /* Check to see if we already have a connection to this node. This | |
7336 | + * could happen if the two nodes initiate a connection at roughly | |
7337 | + * the same time and the connections cross on the wire. | |
7338 | + * TEMPORARY FIX: | |
c783755a | 7339 | + * In this case we store the incoming one in "othercon" |
c1c6733f | 7340 | + */ |
c783755a AM |
7341 | + newcon = nodeid2con(nodeid, GFP_KERNEL); |
7342 | + if (!newcon) { | |
7343 | + result = -ENOMEM; | |
7344 | + goto accept_err; | |
7345 | + } | |
c1c6733f AM |
7346 | + down_write(&newcon->sock_sem); |
7347 | + if (newcon->sock) { | |
c783755a | 7348 | + struct connection *othercon = newcon->othercon; |
c1c6733f | 7349 | + |
c1c6733f | 7350 | + if (!othercon) { |
c783755a AM |
7351 | + othercon = kmem_cache_alloc(con_cache, GFP_KERNEL); |
7352 | + if (!othercon) { | |
7353 | + printk("dlm: failed to allocate incoming socket\n"); | |
7354 | + up_write(&newcon->sock_sem); | |
7355 | + result = -ENOMEM; | |
7356 | + goto accept_err; | |
7357 | + } | |
7358 | + memset(othercon, 0, sizeof(*othercon)); | |
7359 | + othercon->nodeid = nodeid; | |
7360 | + othercon->rx_action = receive_from_sock; | |
7361 | + init_rwsem(&othercon->sock_sem); | |
7362 | + set_bit(CF_IS_OTHERCON, &othercon->flags); | |
7363 | + newcon->othercon = othercon; | |
7364 | + } | |
c1c6733f | 7365 | + othercon->sock = newsock; |
c1c6733f | 7366 | + newsock->sk->sk_user_data = othercon; |
b7b72b66 AM |
7367 | + add_sock(newsock, othercon); |
7368 | + } | |
7369 | + else { | |
7370 | + newsock->sk->sk_user_data = newcon; | |
7371 | + newcon->rx_action = receive_from_sock; | |
7372 | + add_sock(newsock, newcon); | |
c1c6733f | 7373 | + |
c1c6733f AM |
7374 | + } |
7375 | + | |
c1c6733f AM |
7376 | + up_write(&newcon->sock_sem); |
7377 | + | |
7378 | + /* | |
7379 | + * Add it to the active queue in case we got data | |
7380 | + * beween processing the accept adding the socket | |
7381 | + * to the read_sockets list | |
7382 | + */ | |
7383 | + lowcomms_data_ready(newsock->sk, 0); | |
c1c6733f AM |
7384 | + up_read(&con->sock_sem); |
7385 | + | |
c1c6733f AM |
7386 | + return 0; |
7387 | + | |
7388 | + accept_err: | |
7389 | + up_read(&con->sock_sem); | |
7390 | + sock_release(newsock); | |
7391 | + | |
b7b72b66 AM |
7392 | + if (result != -EAGAIN) |
7393 | + printk("dlm: error accepting connection from node: %d\n", result); | |
c1c6733f AM |
7394 | + return result; |
7395 | +} | |
7396 | + | |
7397 | +/* Connect a new socket to its peer */ | |
7398 | +static int connect_to_sock(struct connection *con) | |
7399 | +{ | |
7400 | + int result = -EHOSTUNREACH; | |
7401 | + struct sockaddr_in6 saddr; | |
7402 | + int addr_len; | |
7403 | + struct socket *sock; | |
7404 | + | |
7405 | + if (con->nodeid == 0) { | |
7406 | + log_print("attempt to connect sock 0 foiled"); | |
7407 | + return 0; | |
7408 | + } | |
7409 | + | |
7410 | + down_write(&con->sock_sem); | |
7411 | + if (con->retries++ > MAX_CONNECT_RETRIES) | |
7412 | + goto out; | |
7413 | + | |
7414 | + // FIXME not sure this should happen, let alone like this. | |
7415 | + if (con->sock) { | |
7416 | + sock_release(con->sock); | |
7417 | + con->sock = NULL; | |
7418 | + } | |
7419 | + | |
7420 | + /* Create a socket to communicate with */ | |
7421 | + result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock); | |
7422 | + if (result < 0) | |
7423 | + goto out_err; | |
7424 | + | |
b7b72b66 | 7425 | + memset(&saddr, 0, sizeof(saddr)); |
c1c6733f AM |
7426 | + if (lowcomms_ipaddr_from_nodeid(con->nodeid, (struct sockaddr *)&saddr) < 0) |
7427 | + goto out_err; | |
7428 | + | |
7429 | + sock->sk->sk_user_data = con; | |
7430 | + con->rx_action = receive_from_sock; | |
7431 | + | |
7432 | + make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len); | |
7433 | + | |
7434 | + add_sock(sock, con); | |
c783755a AM |
7435 | + |
7436 | + log_print("connecting to %d", con->nodeid); | |
c1c6733f AM |
7437 | + result = |
7438 | + sock->ops->connect(sock, (struct sockaddr *) &saddr, addr_len, | |
7439 | + O_NONBLOCK); | |
7440 | + if (result == -EINPROGRESS) | |
7441 | + result = 0; | |
7442 | + if (result != 0) | |
7443 | + goto out_err; | |
7444 | + | |
7445 | + out: | |
7446 | + up_write(&con->sock_sem); | |
7447 | + /* | |
7448 | + * Returning an error here means we've given up trying to connect to | |
7449 | + * a remote node, otherwise we return 0 and reschedule the connetion | |
7450 | + * attempt | |
7451 | + */ | |
7452 | + return result; | |
7453 | + | |
7454 | + out_err: | |
7455 | + if (con->sock) { | |
7456 | + sock_release(con->sock); | |
7457 | + con->sock = NULL; | |
7458 | + } | |
7459 | + /* | |
7460 | + * Some errors are fatal and this list might need adjusting. For other | |
7461 | + * errors we try again until the max number of retries is reached. | |
7462 | + */ | |
7463 | + if (result != -EHOSTUNREACH && result != -ENETUNREACH && | |
7464 | + result != -ENETDOWN && result != EINVAL | |
7465 | + && result != -EPROTONOSUPPORT) { | |
7466 | + lowcomms_connect_sock(con); | |
7467 | + result = 0; | |
7468 | + } | |
7469 | + goto out; | |
7470 | +} | |
7471 | + | |
7472 | +static struct socket *create_listen_sock(struct connection *con, char *addr, int addr_len) | |
7473 | +{ | |
7474 | + struct socket *sock = NULL; | |
7475 | + mm_segment_t fs; | |
7476 | + int result = 0; | |
7477 | + int one = 1; | |
7478 | + struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr; | |
7479 | + | |
7480 | + /* Create a socket to communicate with */ | |
7481 | + result = sock_create_kern(local_addr.sin6_family, SOCK_STREAM, IPPROTO_TCP, &sock); | |
7482 | + if (result < 0) { | |
7483 | + printk("dlm: Can't create listening comms socket\n"); | |
7484 | + goto create_out; | |
7485 | + } | |
7486 | + | |
7487 | + fs = get_fs(); | |
7488 | + set_fs(get_ds()); | |
7489 | + result = sock_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); | |
7490 | + set_fs(fs); | |
7491 | + if (result < 0) { | |
7492 | + printk("dlm: Failed to set SO_REUSEADDR on socket: result=%d\n",result); | |
7493 | + } | |
7494 | + sock->sk->sk_user_data = con; | |
7495 | + con->rx_action = accept_from_sock; | |
7496 | + con->sock = sock; | |
7497 | + | |
7498 | + /* Bind to our port */ | |
7499 | + make_sockaddr(saddr, dlm_config.tcp_port, &addr_len); | |
7500 | + result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); | |
7501 | + if (result < 0) { | |
7502 | + printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port); | |
7503 | + sock_release(sock); | |
7504 | + sock = NULL; | |
7505 | + goto create_out; | |
7506 | + } | |
7507 | + | |
7508 | + fs = get_fs(); | |
7509 | + set_fs(get_ds()); | |
7510 | + | |
7511 | + result = sock_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *)&one, sizeof(one)); | |
7512 | + set_fs(fs); | |
7513 | + if (result < 0) { | |
7514 | + printk("dlm: Set keepalive failed: %d\n", result); | |
7515 | + } | |
7516 | + | |
7517 | + result = sock->ops->listen(sock, 5); | |
7518 | + if (result < 0) { | |
7519 | + printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port); | |
7520 | + sock_release(sock); | |
7521 | + sock = NULL; | |
7522 | + goto create_out; | |
7523 | + } | |
7524 | + | |
7525 | + create_out: | |
7526 | + return sock; | |
7527 | +} | |
7528 | + | |
7529 | + | |
7530 | +/* Listen on all interfaces */ | |
7531 | +static int listen_for_all(void) | |
7532 | +{ | |
7533 | + int result = 0; | |
7534 | + int nodeid; | |
7535 | + struct socket *sock = NULL; | |
7536 | + struct list_head *addr_list; | |
c783755a | 7537 | + struct connection *con = nodeid2con(0, GFP_KERNEL); |
b7b72b66 | 7538 | + struct connection *temp; |
c1c6733f AM |
7539 | + struct cluster_node_addr *node_addr; |
7540 | + char local_addr[sizeof(struct sockaddr_in6)]; | |
7541 | + | |
7542 | + /* This will also fill in local_addr */ | |
7543 | + nodeid = lowcomms_our_nodeid(); | |
7544 | + | |
7545 | + addr_list = kcl_get_node_addresses(nodeid); | |
7546 | + if (!addr_list) { | |
7547 | + printk("dlm: cannot initialise comms layer\n"); | |
7548 | + result = -ENOTCONN; | |
7549 | + goto create_out; | |
7550 | + } | |
7551 | + | |
7552 | + list_for_each_entry(node_addr, addr_list, list) { | |
7553 | + | |
7554 | + if (!con) { | |
c783755a | 7555 | + con = kmem_cache_alloc(con_cache, GFP_KERNEL); |
c1c6733f AM |
7556 | + if (!con) { |
7557 | + printk("dlm: failed to allocate listen socket\n"); | |
b7b72b66 AM |
7558 | + result = -ENOMEM; |
7559 | + goto create_free; | |
c1c6733f AM |
7560 | + } |
7561 | + memset(con, 0, sizeof(*con)); | |
7562 | + init_rwsem(&con->sock_sem); | |
7563 | + spin_lock_init(&con->writequeue_lock); | |
7564 | + INIT_LIST_HEAD(&con->writequeue); | |
c783755a | 7565 | + set_bit(CF_IS_OTHERCON, &con->flags); |
c1c6733f AM |
7566 | + } |
7567 | + | |
7568 | + memcpy(local_addr, node_addr->addr, node_addr->addr_len); | |
7569 | + sock = create_listen_sock(con, local_addr, | |
7570 | + node_addr->addr_len); | |
7571 | + if (sock) { | |
7572 | + add_sock(sock, con); | |
b7b72b66 AM |
7573 | + |
7574 | + /* Keep a list of dynamically allocated listening sockets | |
7575 | + so we can free them at shutdown */ | |
c783755a | 7576 | + if (test_bit(CF_IS_OTHERCON, &con->flags)) { |
b7b72b66 AM |
7577 | + list_add_tail(&con->listenlist, &listen_sockets); |
7578 | + } | |
c1c6733f AM |
7579 | + } |
7580 | + else { | |
b7b72b66 | 7581 | + result = -EADDRINUSE; |
c783755a | 7582 | + kmem_cache_free(con_cache, con); |
b7b72b66 | 7583 | + goto create_free; |
c1c6733f AM |
7584 | + } |
7585 | + | |
c1c6733f AM |
7586 | + con = NULL; |
7587 | + } | |
7588 | + | |
7589 | + create_out: | |
7590 | + return result; | |
b7b72b66 AM |
7591 | + |
7592 | + create_free: | |
7593 | + /* Free up any dynamically allocated listening sockets */ | |
7594 | + list_for_each_entry_safe(con, temp, &listen_sockets, listenlist) { | |
7595 | + sock_release(con->sock); | |
c783755a | 7596 | + kmem_cache_free(con_cache, con); |
b7b72b66 AM |
7597 | + } |
7598 | + return result; | |
c1c6733f AM |
7599 | +} |
7600 | + | |
7601 | + | |
7602 | + | |
7603 | +static struct writequeue_entry *new_writequeue_entry(struct connection *con, | |
7604 | + int allocation) | |
7605 | +{ | |
7606 | + struct writequeue_entry *entry; | |
7607 | + | |
7608 | + entry = kmalloc(sizeof(struct writequeue_entry), allocation); | |
7609 | + if (!entry) | |
7610 | + return NULL; | |
7611 | + | |
7612 | + entry->page = alloc_page(allocation); | |
7613 | + if (!entry->page) { | |
7614 | + kfree(entry); | |
7615 | + return NULL; | |
7616 | + } | |
7617 | + | |
7618 | + entry->offset = 0; | |
7619 | + entry->len = 0; | |
7620 | + entry->end = 0; | |
7621 | + entry->users = 0; | |
7622 | + entry->con = con; | |
7623 | + | |
7624 | + return entry; | |
7625 | +} | |
7626 | + | |
7627 | +struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len, | |
7628 | + int allocation, char **ppc) | |
7629 | +{ | |
c783755a | 7630 | + struct connection *con = nodeid2con(nodeid, allocation); |
c1c6733f AM |
7631 | + struct writequeue_entry *e; |
7632 | + int offset = 0; | |
7633 | + int users = 0; | |
7634 | + | |
c783755a AM |
7635 | + if (!con) |
7636 | + return NULL; | |
7637 | + | |
c1c6733f AM |
7638 | + if (!atomic_read(&accepting)) |
7639 | + return NULL; | |
7640 | + | |
7641 | + spin_lock(&con->writequeue_lock); | |
7642 | + e = list_entry(con->writequeue.prev, struct writequeue_entry, list); | |
7643 | + if (((struct list_head *) e == &con->writequeue) || | |
7644 | + (PAGE_CACHE_SIZE - e->end < len)) { | |
7645 | + e = NULL; | |
7646 | + } else { | |
7647 | + offset = e->end; | |
7648 | + e->end += len; | |
7649 | + users = e->users++; | |
7650 | + } | |
7651 | + spin_unlock(&con->writequeue_lock); | |
7652 | + | |
7653 | + if (e) { | |
7654 | + got_one: | |
7655 | + if (users == 0) | |
7656 | + kmap(e->page); | |
7657 | + *ppc = page_address(e->page) + offset; | |
7658 | + return e; | |
7659 | + } | |
7660 | + | |
7661 | + e = new_writequeue_entry(con, allocation); | |
7662 | + if (e) { | |
7663 | + spin_lock(&con->writequeue_lock); | |
7664 | + offset = e->end; | |
7665 | + e->end += len; | |
7666 | + users = e->users++; | |
7667 | + list_add_tail(&e->list, &con->writequeue); | |
7668 | + spin_unlock(&con->writequeue_lock); | |
c1c6733f AM |
7669 | + goto got_one; |
7670 | + } | |
7671 | + return NULL; | |
7672 | +} | |
7673 | + | |
7674 | +void lowcomms_commit_buffer(struct writequeue_entry *e) | |
7675 | +{ | |
7676 | + struct connection *con = e->con; | |
7677 | + int users; | |
7678 | + | |
7679 | + if (!atomic_read(&accepting)) | |
7680 | + return; | |
7681 | + | |
7682 | + spin_lock(&con->writequeue_lock); | |
7683 | + users = --e->users; | |
7684 | + if (users) | |
7685 | + goto out; | |
7686 | + e->len = e->end - e->offset; | |
7687 | + kunmap(e->page); | |
7688 | + spin_unlock(&con->writequeue_lock); | |
7689 | + | |
7690 | + if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) { | |
7691 | + spin_lock_bh(&write_sockets_lock); | |
7692 | + list_add_tail(&con->write_list, &write_sockets); | |
7693 | + spin_unlock_bh(&write_sockets_lock); | |
7694 | + | |
7695 | + wake_up_interruptible(&lowcomms_send_waitq); | |
7696 | + } | |
7697 | + return; | |
7698 | + | |
7699 | + out: | |
7700 | + spin_unlock(&con->writequeue_lock); | |
7701 | + return; | |
7702 | +} | |
7703 | + | |
7704 | +static void free_entry(struct writequeue_entry *e) | |
7705 | +{ | |
7706 | + __free_page(e->page); | |
7707 | + kfree(e); | |
c1c6733f AM |
7708 | +} |
7709 | + | |
7710 | +/* Send a message */ | |
7711 | +static int send_to_sock(struct connection *con) | |
7712 | +{ | |
7713 | + int ret = 0; | |
7714 | + ssize_t(*sendpage) (struct socket *, struct page *, int, size_t, int); | |
7715 | + const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | |
7716 | + struct writequeue_entry *e; | |
7717 | + int len, offset; | |
7718 | + | |
7719 | + down_read(&con->sock_sem); | |
7720 | + if (con->sock == NULL) | |
7721 | + goto out_connect; | |
7722 | + | |
7723 | + sendpage = con->sock->ops->sendpage; | |
7724 | + | |
7725 | + spin_lock(&con->writequeue_lock); | |
7726 | + for (;;) { | |
7727 | + e = list_entry(con->writequeue.next, struct writequeue_entry, | |
7728 | + list); | |
7729 | + if ((struct list_head *) e == &con->writequeue) | |
7730 | + break; | |
7731 | + | |
7732 | + len = e->len; | |
7733 | + offset = e->offset; | |
7734 | + BUG_ON(len == 0 && e->users == 0); | |
7735 | + spin_unlock(&con->writequeue_lock); | |
7736 | + | |
7737 | + ret = 0; | |
7738 | + if (len) { | |
7739 | + ret = sendpage(con->sock, e->page, offset, len, | |
7740 | + msg_flags); | |
7741 | + if (ret == -EAGAIN || ret == 0) | |
7742 | + goto out; | |
7743 | + if (ret <= 0) | |
7744 | + goto send_error; | |
7745 | + } | |
7746 | + | |
7747 | + spin_lock(&con->writequeue_lock); | |
7748 | + e->offset += ret; | |
7749 | + e->len -= ret; | |
7750 | + | |
7751 | + if (e->len == 0 && e->users == 0) { | |
7752 | + list_del(&e->list); | |
7753 | + free_entry(e); | |
7754 | + continue; | |
7755 | + } | |
7756 | + } | |
7757 | + spin_unlock(&con->writequeue_lock); | |
7758 | + out: | |
7759 | + up_read(&con->sock_sem); | |
7760 | + return ret; | |
7761 | + | |
7762 | + send_error: | |
7763 | + up_read(&con->sock_sem); | |
c783755a | 7764 | + close_connection(con, FALSE); |
c1c6733f AM |
7765 | + lowcomms_connect_sock(con); |
7766 | + return ret; | |
7767 | + | |
7768 | + out_connect: | |
7769 | + up_read(&con->sock_sem); | |
7770 | + lowcomms_connect_sock(con); | |
7771 | + return 0; | |
7772 | +} | |
7773 | + | |
c783755a AM |
7774 | +static void clean_one_writequeue(struct connection *con) |
7775 | +{ | |
7776 | + struct list_head *list; | |
7777 | + struct list_head *temp; | |
7778 | + | |
7779 | + spin_lock(&con->writequeue_lock); | |
7780 | + list_for_each_safe(list, temp, &con->writequeue) { | |
7781 | + struct writequeue_entry *e = | |
7782 | + list_entry(list, struct writequeue_entry, list); | |
7783 | + list_del(&e->list); | |
7784 | + free_entry(e); | |
7785 | + } | |
7786 | + spin_unlock(&con->writequeue_lock); | |
7787 | +} | |
7788 | + | |
7789 | +/* Called from recovery when it knows that a node has | |
c1c6733f AM |
7790 | + left the cluster */ |
7791 | +int lowcomms_close(int nodeid) | |
7792 | +{ | |
7793 | + struct connection *con; | |
7794 | + | |
7795 | + if (!connections) | |
7796 | + goto out; | |
7797 | + | |
c783755a AM |
7798 | + log_print("closing connection to node %d", nodeid); |
7799 | + con = nodeid2con(nodeid, 0); | |
7800 | + if (con) { | |
7801 | + close_connection(con, TRUE); | |
7802 | + clean_one_writequeue(con); | |
7803 | + atomic_set(&con->waiting_requests, 0); | |
c1c6733f | 7804 | + } |
c783755a | 7805 | + return 0; |
c1c6733f AM |
7806 | + |
7807 | + out: | |
7808 | + return -1; | |
7809 | +} | |
7810 | + | |
7811 | +/* API send message call, may queue the request */ | |
7812 | +/* N.B. This is the old interface - use the new one for new calls */ | |
7813 | +int lowcomms_send_message(int nodeid, char *buf, int len, int allocation) | |
7814 | +{ | |
7815 | + struct writequeue_entry *e; | |
7816 | + char *b; | |
7817 | + | |
c1c6733f AM |
7818 | + e = lowcomms_get_buffer(nodeid, len, allocation, &b); |
7819 | + if (e) { | |
7820 | + memcpy(b, buf, len); | |
7821 | + lowcomms_commit_buffer(e); | |
7822 | + return 0; | |
7823 | + } | |
7824 | + return -ENOBUFS; | |
7825 | +} | |
7826 | + | |
7827 | +/* Look for activity on active sockets */ | |
7828 | +static void process_sockets(void) | |
7829 | +{ | |
7830 | + struct list_head *list; | |
7831 | + struct list_head *temp; | |
7832 | + | |
7833 | + spin_lock_bh(&read_sockets_lock); | |
7834 | + list_for_each_safe(list, temp, &read_sockets) { | |
7835 | + struct connection *con = | |
7836 | + list_entry(list, struct connection, read_list); | |
7837 | + list_del(&con->read_list); | |
7838 | + clear_bit(CF_READ_PENDING, &con->flags); | |
7839 | + | |
7840 | + spin_unlock_bh(&read_sockets_lock); | |
7841 | + | |
c783755a | 7842 | + /* This can reach zero if we are processing requests |
b7b72b66 AM |
7843 | + * as they come in. |
7844 | + */ | |
7845 | + if (atomic_read(&con->waiting_requests) == 0) { | |
7846 | + spin_lock_bh(&read_sockets_lock); | |
7847 | + continue; | |
7848 | + } | |
7849 | + | |
7850 | + do { | |
7851 | + con->rx_action(con); | |
c783755a AM |
7852 | + } while (!atomic_dec_and_test(&con->waiting_requests) && |
7853 | + !kthread_should_stop()); | |
c1c6733f AM |
7854 | + |
7855 | + /* Don't starve out everyone else */ | |
7856 | + schedule(); | |
7857 | + spin_lock_bh(&read_sockets_lock); | |
7858 | + } | |
7859 | + spin_unlock_bh(&read_sockets_lock); | |
7860 | +} | |
7861 | + | |
7862 | +/* Try to send any messages that are pending | |
7863 | + */ | |
7864 | +static void process_output_queue(void) | |
7865 | +{ | |
7866 | + struct list_head *list; | |
7867 | + struct list_head *temp; | |
7868 | + int ret; | |
7869 | + | |
7870 | + spin_lock_bh(&write_sockets_lock); | |
7871 | + list_for_each_safe(list, temp, &write_sockets) { | |
7872 | + struct connection *con = | |
7873 | + list_entry(list, struct connection, write_list); | |
7874 | + list_del(&con->write_list); | |
7875 | + clear_bit(CF_WRITE_PENDING, &con->flags); | |
7876 | + | |
7877 | + spin_unlock_bh(&write_sockets_lock); | |
7878 | + | |
7879 | + ret = send_to_sock(con); | |
7880 | + if (ret < 0) { | |
7881 | + } | |
7882 | + spin_lock_bh(&write_sockets_lock); | |
7883 | + } | |
7884 | + spin_unlock_bh(&write_sockets_lock); | |
7885 | +} | |
7886 | + | |
7887 | +static void process_state_queue(void) | |
7888 | +{ | |
7889 | + struct list_head *list; | |
7890 | + struct list_head *temp; | |
7891 | + int ret; | |
7892 | + | |
7893 | + spin_lock_bh(&state_sockets_lock); | |
7894 | + list_for_each_safe(list, temp, &state_sockets) { | |
7895 | + struct connection *con = | |
7896 | + list_entry(list, struct connection, state_list); | |
7897 | + list_del(&con->state_list); | |
7898 | + clear_bit(CF_CONNECT_PENDING, &con->flags); | |
7899 | + spin_unlock_bh(&state_sockets_lock); | |
7900 | + | |
7901 | + ret = connect_to_sock(con); | |
7902 | + if (ret < 0) { | |
7903 | + } | |
7904 | + spin_lock_bh(&state_sockets_lock); | |
7905 | + } | |
7906 | + spin_unlock_bh(&state_sockets_lock); | |
7907 | +} | |
7908 | + | |
c783755a | 7909 | + |
c1c6733f AM |
7910 | +/* Discard all entries on the write queues */ |
7911 | +static void clean_writequeues(void) | |
7912 | +{ | |
c1c6733f AM |
7913 | + int nodeid; |
7914 | + | |
c783755a AM |
7915 | + for (nodeid = 1; nodeid < conn_array_size; nodeid++) { |
7916 | + struct connection *con = nodeid2con(nodeid, 0); | |
c1c6733f | 7917 | + |
c783755a AM |
7918 | + if (con) |
7919 | + clean_one_writequeue(con); | |
c1c6733f AM |
7920 | + } |
7921 | +} | |
7922 | + | |
7923 | +static int read_list_empty(void) | |
7924 | +{ | |
7925 | + int status; | |
7926 | + | |
7927 | + spin_lock_bh(&read_sockets_lock); | |
7928 | + status = list_empty(&read_sockets); | |
7929 | + spin_unlock_bh(&read_sockets_lock); | |
7930 | + | |
7931 | + return status; | |
7932 | +} | |
7933 | + | |
7934 | +/* DLM Transport comms receive daemon */ | |
7935 | +static int dlm_recvd(void *data) | |
7936 | +{ | |
c1c6733f AM |
7937 | + init_waitqueue_head(&lowcomms_recv_waitq); |
7938 | + init_waitqueue_entry(&lowcomms_recv_waitq_head, current); | |
7939 | + add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head); | |
7940 | + | |
c783755a AM |
7941 | + while (!kthread_should_stop()) { |
7942 | + set_current_state(TASK_INTERRUPTIBLE); | |
c1c6733f AM |
7943 | + if (read_list_empty()) |
7944 | + schedule(); | |
c783755a | 7945 | + set_current_state(TASK_RUNNING); |
c1c6733f AM |
7946 | + |
7947 | + process_sockets(); | |
7948 | + } | |
7949 | + | |
c1c6733f AM |
7950 | + return 0; |
7951 | +} | |
7952 | + | |
7953 | +static int write_and_state_lists_empty(void) | |
7954 | +{ | |
7955 | + int status; | |
7956 | + | |
7957 | + spin_lock_bh(&write_sockets_lock); | |
7958 | + status = list_empty(&write_sockets); | |
7959 | + spin_unlock_bh(&write_sockets_lock); | |
7960 | + | |
7961 | + spin_lock_bh(&state_sockets_lock); | |
7962 | + if (list_empty(&state_sockets) == 0) | |
7963 | + status = 0; | |
7964 | + spin_unlock_bh(&state_sockets_lock); | |
7965 | + | |
7966 | + return status; | |
7967 | +} | |
7968 | + | |
7969 | +/* DLM Transport send daemon */ | |
7970 | +static int dlm_sendd(void *data) | |
7971 | +{ | |
c1c6733f AM |
7972 | + init_waitqueue_head(&lowcomms_send_waitq); |
7973 | + init_waitqueue_entry(&lowcomms_send_waitq_head, current); | |
7974 | + add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head); | |
7975 | + | |
c783755a AM |
7976 | + while (!kthread_should_stop()) { |
7977 | + set_current_state(TASK_INTERRUPTIBLE); | |
c1c6733f AM |
7978 | + if (write_and_state_lists_empty()) |
7979 | + schedule(); | |
c783755a | 7980 | + set_current_state(TASK_RUNNING); |
c1c6733f AM |
7981 | + |
7982 | + process_state_queue(); | |
7983 | + process_output_queue(); | |
7984 | + } | |
7985 | + | |
c1c6733f AM |
7986 | + return 0; |
7987 | +} | |
7988 | + | |
7989 | +static void daemons_stop(void) | |
7990 | +{ | |
c783755a AM |
7991 | + kthread_stop(recv_task); |
7992 | + kthread_stop(send_task); | |
c1c6733f AM |
7993 | +} |
7994 | + | |
7995 | +static int daemons_start(void) | |
7996 | +{ | |
c783755a | 7997 | + struct task_struct *p; |
c1c6733f AM |
7998 | + int error; |
7999 | + | |
d3b4771f | 8000 | + p = kthread_run(dlm_recvd, NULL, 0, "dlm_recvd"); |
c783755a AM |
8001 | + error = IS_ERR(p); |
8002 | + if (error) { | |
8003 | + log_print("can't start dlm_recvd %d", error); | |
8004 | + return error; | |
c1c6733f | 8005 | + } |
c783755a | 8006 | + recv_task = p; |
c1c6733f | 8007 | + |
d3b4771f | 8008 | + p = kthread_run(dlm_sendd, NULL, 0, "dlm_sendd"); |
c783755a AM |
8009 | + error = IS_ERR(p); |
8010 | + if (error) { | |
8011 | + log_print("can't start dlm_sendd %d", error); | |
8012 | + kthread_stop(recv_task); | |
8013 | + return error; | |
c1c6733f | 8014 | + } |
c783755a | 8015 | + send_task = p; |
c1c6733f | 8016 | + |
c783755a | 8017 | + return 0; |
c1c6733f AM |
8018 | +} |
8019 | + | |
8020 | +/* | |
8021 | + * Return the largest buffer size we can cope with. | |
8022 | + */ | |
8023 | +int lowcomms_max_buffer_size(void) | |
8024 | +{ | |
8025 | + return PAGE_CACHE_SIZE; | |
8026 | +} | |
8027 | + | |
8028 | +void lowcomms_stop(void) | |
8029 | +{ | |
8030 | + int i; | |
8031 | + struct connection *temp; | |
8032 | + struct connection *lcon; | |
8033 | + | |
8034 | + atomic_set(&accepting, 0); | |
8035 | + | |
8036 | + /* Set all the activity flags to prevent any | |
8037 | + socket activity. | |
8038 | + */ | |
8039 | + for (i = 0; i < conn_array_size; i++) { | |
c783755a AM |
8040 | + if (connections[i]) |
8041 | + connections[i]->flags = 0x7; | |
c1c6733f AM |
8042 | + } |
8043 | + daemons_stop(); | |
8044 | + clean_writequeues(); | |
8045 | + | |
8046 | + for (i = 0; i < conn_array_size; i++) { | |
c783755a AM |
8047 | + if (connections[i]) { |
8048 | + close_connection(connections[i], TRUE); | |
8049 | + if (connections[i]->othercon) | |
8050 | + kmem_cache_free(con_cache, connections[i]->othercon); | |
8051 | + kmem_cache_free(con_cache, connections[i]); | |
8052 | + } | |
c1c6733f AM |
8053 | + } |
8054 | + | |
8055 | + kfree(connections); | |
8056 | + connections = NULL; | |
8057 | + | |
8058 | + /* Free up any dynamically allocated listening sockets */ | |
8059 | + list_for_each_entry_safe(lcon, temp, &listen_sockets, listenlist) { | |
8060 | + sock_release(lcon->sock); | |
c783755a | 8061 | + kmem_cache_free(con_cache, lcon); |
c1c6733f AM |
8062 | + } |
8063 | + | |
c783755a | 8064 | + kmem_cache_destroy(con_cache); |
c1c6733f AM |
8065 | + kcl_releaseref_cluster(); |
8066 | +} | |
8067 | + | |
8068 | +/* This is quite likely to sleep... */ | |
8069 | +int lowcomms_start(void) | |
8070 | +{ | |
8071 | + int error = 0; | |
bb1d8b11 AM |
8072 | + struct connection *temp; |
8073 | + struct connection *lcon; | |
c1c6733f AM |
8074 | + |
8075 | + INIT_LIST_HEAD(&read_sockets); | |
8076 | + INIT_LIST_HEAD(&write_sockets); | |
8077 | + INIT_LIST_HEAD(&state_sockets); | |
8078 | + INIT_LIST_HEAD(&listen_sockets); | |
8079 | + | |
8080 | + spin_lock_init(&read_sockets_lock); | |
8081 | + spin_lock_init(&write_sockets_lock); | |
8082 | + spin_lock_init(&state_sockets_lock); | |
c783755a | 8083 | + init_rwsem(&connections_lock); |
c1c6733f AM |
8084 | + |
8085 | + error = -ENOTCONN; | |
8086 | + if (kcl_addref_cluster()) | |
8087 | + goto out; | |
8088 | + | |
8089 | + /* | |
8090 | + * Temporarily initialise the waitq head so that lowcomms_send_message | |
8091 | + * doesn't crash if it gets called before the thread is fully | |
8092 | + * initialised | |
8093 | + */ | |
8094 | + init_waitqueue_head(&lowcomms_send_waitq); | |
8095 | + | |
8096 | + error = -ENOMEM; | |
c783755a AM |
8097 | + connections = kmalloc(sizeof(struct connection *) * |
8098 | + dlm_config.conn_increment, GFP_KERNEL); | |
c1c6733f AM |
8099 | + if (!connections) |
8100 | + goto out; | |
8101 | + | |
8102 | + memset(connections, 0, | |
c783755a AM |
8103 | + sizeof(struct connection *) * dlm_config.conn_increment); |
8104 | + | |
8105 | + conn_array_size = dlm_config.conn_increment; | |
8106 | + | |
8107 | + con_cache = kmem_cache_create("dlm_conn", sizeof(struct connection), | |
8108 | + __alignof__(struct connection), 0, NULL, NULL); | |
8109 | + if (!con_cache) | |
8110 | + goto fail_free_conn; | |
8111 | + | |
c1c6733f AM |
8112 | + |
8113 | + /* Start listening */ | |
8114 | + error = listen_for_all(); | |
8115 | + if (error) | |
bb1d8b11 | 8116 | + goto fail_unlisten; |
c1c6733f AM |
8117 | + |
8118 | + error = daemons_start(); | |
8119 | + if (error) | |
bb1d8b11 | 8120 | + goto fail_unlisten; |
c1c6733f AM |
8121 | + |
8122 | + atomic_set(&accepting, 1); | |
8123 | + | |
8124 | + return 0; | |
8125 | + | |
bb1d8b11 AM |
8126 | + fail_unlisten: |
8127 | + close_connection(connections[0], 0); | |
8128 | + kmem_cache_free(con_cache, connections[0]); | |
8129 | + list_for_each_entry_safe(lcon, temp, &listen_sockets, listenlist) { | |
8130 | + sock_release(lcon->sock); | |
8131 | + kmem_cache_free(con_cache, lcon); | |
8132 | + } | |
8133 | + | |
c783755a AM |
8134 | + kmem_cache_destroy(con_cache); |
8135 | + | |
c1c6733f | 8136 | + fail_free_conn: |
c783755a | 8137 | + kcl_releaseref_cluster(); |
c1c6733f AM |
8138 | + kfree(connections); |
8139 | + | |
8140 | + out: | |
8141 | + return error; | |
8142 | +} | |
8143 | + | |
8144 | +/* Don't accept any more outgoing work */ | |
8145 | +void lowcomms_stop_accept() | |
8146 | +{ | |
8147 | + atomic_set(&accepting, 0); | |
8148 | +} | |
8149 | + | |
8150 | +/* Cluster Manager interface functions for looking up | |
8151 | + nodeids and IP addresses by each other | |
8152 | +*/ | |
8153 | + | |
8154 | +/* Return the IP address of a node given its NODEID */ | |
8155 | +static int lowcomms_ipaddr_from_nodeid(int nodeid, struct sockaddr *retaddr) | |
8156 | +{ | |
8157 | + struct list_head *addrs; | |
8158 | + struct cluster_node_addr *node_addr; | |
8159 | + struct cluster_node_addr *current_addr = NULL; | |
8160 | + struct sockaddr_in6 *saddr; | |
8161 | + int interface; | |
8162 | + int i; | |
8163 | + | |
8164 | + addrs = kcl_get_node_addresses(nodeid); | |
8165 | + if (!addrs) | |
8166 | + return -1; | |
8167 | + | |
8168 | + interface = kcl_get_current_interface(); | |
8169 | + | |
8170 | + /* Look for address number <interface> */ | |
8171 | + i=0; /* i/f numbers start at 1 */ | |
8172 | + list_for_each_entry(node_addr, addrs, list) { | |
8173 | + if (interface == ++i) { | |
8174 | + current_addr = node_addr; | |
8175 | + break; | |
8176 | + } | |
8177 | + } | |
8178 | + | |
8179 | + /* If that failed then just use the first one */ | |
8180 | + if (!current_addr) | |
8181 | + current_addr = (struct cluster_node_addr *)addrs->next; | |
8182 | + | |
8183 | + saddr = (struct sockaddr_in6 *)current_addr->addr; | |
8184 | + | |
8185 | + /* Extract the IP address */ | |
b7b72b66 | 8186 | + if (local_addr.sin6_family == AF_INET) { |
c1c6733f AM |
8187 | + struct sockaddr_in *in4 = (struct sockaddr_in *)saddr; |
8188 | + struct sockaddr_in *ret4 = (struct sockaddr_in *)retaddr; | |
8189 | + ret4->sin_addr.s_addr = in4->sin_addr.s_addr; | |
8190 | + } | |
8191 | + else { | |
8192 | + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *)retaddr; | |
8193 | + memcpy(&ret6->sin6_addr, &saddr->sin6_addr, sizeof(saddr->sin6_addr)); | |
8194 | + } | |
8195 | + | |
8196 | + return 0; | |
8197 | +} | |
8198 | + | |
8199 | +/* Return the NODEID for a node given its sockaddr */ | |
8200 | +static int lowcomms_nodeid_from_ipaddr(struct sockaddr *addr, int addr_len) | |
8201 | +{ | |
8202 | + struct kcl_cluster_node node; | |
8203 | + struct sockaddr_in6 ipv6_addr; | |
8204 | + struct sockaddr_in ipv4_addr; | |
8205 | + | |
b7b72b66 | 8206 | + if (local_addr.sin6_family == AF_INET) { |
c1c6733f AM |
8207 | + struct sockaddr_in *in4 = (struct sockaddr_in *)addr; |
8208 | + memcpy(&ipv4_addr, &local_addr, addr_len); | |
8209 | + memcpy(&ipv4_addr.sin_addr, &in4->sin_addr, sizeof(ipv4_addr.sin_addr)); | |
8210 | + | |
8211 | + addr = (struct sockaddr *)&ipv4_addr; | |
8212 | + } | |
8213 | + else { | |
8214 | + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *)addr; | |
8215 | + memcpy(&ipv6_addr, &local_addr, addr_len); | |
8216 | + memcpy(&ipv6_addr.sin6_addr, &in6->sin6_addr, sizeof(ipv6_addr.sin6_addr)); | |
8217 | + | |
8218 | + addr = (struct sockaddr *)&ipv6_addr; | |
8219 | + } | |
8220 | + | |
8221 | + if (kcl_get_node_by_addr((char *)addr, addr_len, &node) == 0) | |
8222 | + return node.node_id; | |
8223 | + else | |
8224 | + return 0; | |
8225 | +} | |
8226 | + | |
8227 | +int lowcomms_our_nodeid(void) | |
8228 | +{ | |
8229 | + struct kcl_cluster_node node; | |
8230 | + struct list_head *addrs; | |
8231 | + struct cluster_node_addr *first_addr; | |
8232 | + static int our_nodeid = 0; | |
8233 | + | |
8234 | + if (our_nodeid) | |
8235 | + return our_nodeid; | |
8236 | + | |
8237 | + if (kcl_get_node_by_nodeid(0, &node) == -1) | |
8238 | + return 0; | |
8239 | + | |
8240 | + our_nodeid = node.node_id; | |
8241 | + | |
8242 | + /* Fill in the "template" structure */ | |
8243 | + addrs = kcl_get_node_addresses(our_nodeid); | |
8244 | + if (!addrs) | |
8245 | + return 0; | |
8246 | + | |
8247 | + first_addr = (struct cluster_node_addr *) addrs->next; | |
8248 | + memcpy(&local_addr, &first_addr->addr, first_addr->addr_len); | |
8249 | + | |
8250 | + return node.node_id; | |
8251 | +} | |
8252 | +/* | |
8253 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
8254 | + * Emacs will notice this stuff at the end of the file and automatically | |
8255 | + * adjust the settings for this buffer only. This must remain at the end | |
8256 | + * of the file. | |
8257 | + * --------------------------------------------------------------------------- | |
8258 | + * Local variables: | |
8259 | + * c-file-style: "linux" | |
8260 | + * End: | |
8261 | + */ | |
8262 | diff -urN linux-orig/cluster/dlm/lowcomms.h linux-patched/cluster/dlm/lowcomms.h | |
8263 | --- linux-orig/cluster/dlm/lowcomms.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 8264 | +++ linux-patched/cluster/dlm/lowcomms.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
8265 | @@ -0,0 +1,34 @@ |
8266 | +/****************************************************************************** | |
8267 | +******************************************************************************* | |
8268 | +** | |
8269 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8270 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8271 | +** | |
8272 | +** This copyrighted material is made available to anyone wishing to use, | |
8273 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8274 | +** of the GNU General Public License v.2. | |
8275 | +** | |
8276 | +******************************************************************************* | |
8277 | +******************************************************************************/ | |
8278 | + | |
8279 | +#ifndef __LOWCOMMS_DOT_H__ | |
8280 | +#define __LOWCOMMS_DOT_H__ | |
8281 | + | |
8282 | +/* The old interface */ | |
8283 | +int lowcomms_send_message(int csid, char *buf, int len, int allocation); | |
8284 | + | |
8285 | +/* The new interface */ | |
8286 | +struct writequeue_entry; | |
8287 | +extern struct writequeue_entry *lowcomms_get_buffer(int nodeid, int len, | |
8288 | + int allocation, char **ppc); | |
8289 | +extern void lowcomms_commit_buffer(struct writequeue_entry *e); | |
8290 | + | |
8291 | +int lowcomms_start(void); | |
8292 | +void lowcomms_stop(void); | |
8293 | +void lowcomms_stop_accept(void); | |
8294 | +int lowcomms_close(int nodeid); | |
8295 | +int lowcomms_max_buffer_size(void); | |
8296 | + | |
8297 | +int lowcomms_our_nodeid(void); | |
8298 | + | |
8299 | +#endif /* __LOWCOMMS_DOT_H__ */ | |
8300 | diff -urN linux-orig/cluster/dlm/main.c linux-patched/cluster/dlm/main.c | |
8301 | --- linux-orig/cluster/dlm/main.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 8302 | +++ linux-patched/cluster/dlm/main.c 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 8303 | @@ -0,0 +1,93 @@ |
c1c6733f AM |
8304 | +/****************************************************************************** |
8305 | +******************************************************************************* | |
8306 | +** | |
8307 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8308 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8309 | +** | |
8310 | +** This copyrighted material is made available to anyone wishing to use, | |
8311 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8312 | +** of the GNU General Public License v.2. | |
8313 | +** | |
8314 | +******************************************************************************* | |
8315 | +******************************************************************************/ | |
8316 | + | |
8317 | +#define EXPORT_SYMTAB | |
8318 | + | |
8319 | +#include <linux/init.h> | |
8320 | +#include <linux/proc_fs.h> | |
8321 | +#include <linux/ctype.h> | |
c1c6733f AM |
8322 | +#include <linux/module.h> |
8323 | +#include <net/sock.h> | |
8324 | + | |
8325 | +#include <cluster/cnxman.h> | |
8326 | + | |
8327 | +#include "dlm_internal.h" | |
8328 | +#include "lockspace.h" | |
c1c6733f AM |
8329 | +#include "ast.h" |
8330 | +#include "lkb.h" | |
8331 | +#include "nodes.h" | |
8332 | +#include "locking.h" | |
8333 | +#include "config.h" | |
8334 | +#include "memory.h" | |
8335 | +#include "recover.h" | |
8336 | +#include "lowcomms.h" | |
8337 | + | |
8338 | +int dlm_device_init(void); | |
8339 | +void dlm_device_exit(void); | |
8340 | +void dlm_proc_init(void); | |
8341 | +void dlm_proc_exit(void); | |
8342 | + | |
8343 | + | |
8344 | +/* Cluster manager callbacks, we want to know if a node dies | |
8345 | + N.B. this is independent of lockspace-specific event callbacks from SM */ | |
8346 | + | |
8347 | +static void cman_callback(kcl_callback_reason reason, long arg) | |
8348 | +{ | |
c1c6733f AM |
8349 | + /* This is unconditional. so do what we can to tidy up */ |
8350 | + if (reason == LEAVING) { | |
8351 | + dlm_emergency_shutdown(); | |
8352 | + } | |
8353 | +} | |
8354 | + | |
8355 | +int __init init_dlm(void) | |
8356 | +{ | |
8357 | + dlm_proc_init(); | |
8358 | + dlm_lockspace_init(); | |
c1c6733f AM |
8359 | + dlm_nodes_init(); |
8360 | + dlm_device_init(); | |
8361 | + dlm_memory_init(); | |
8362 | + dlm_config_init(); | |
8363 | + | |
8364 | + kcl_add_callback(cman_callback); | |
8365 | + | |
8366 | + printk("DLM %s (built %s %s) installed\n", | |
8367 | + DLM_RELEASE_NAME, __DATE__, __TIME__); | |
8368 | + | |
8369 | + return 0; | |
8370 | +} | |
8371 | + | |
8372 | +void __exit exit_dlm(void) | |
8373 | +{ | |
8374 | + kcl_remove_callback(cman_callback); | |
8375 | + | |
8376 | + dlm_device_exit(); | |
8377 | + dlm_memory_exit(); | |
8378 | + dlm_config_exit(); | |
8379 | + dlm_proc_exit(); | |
8380 | +} | |
8381 | + | |
8382 | +MODULE_DESCRIPTION("Distributed Lock Manager " DLM_RELEASE_NAME); | |
8383 | +MODULE_AUTHOR("Red Hat, Inc."); | |
8384 | +MODULE_LICENSE("GPL"); | |
8385 | + | |
8386 | +module_init(init_dlm); | |
8387 | +module_exit(exit_dlm); | |
8388 | + | |
8389 | +EXPORT_SYMBOL(dlm_init); | |
8390 | +EXPORT_SYMBOL(dlm_release); | |
8391 | +EXPORT_SYMBOL(dlm_new_lockspace); | |
8392 | +EXPORT_SYMBOL(dlm_release_lockspace); | |
8393 | +EXPORT_SYMBOL(dlm_lock); | |
8394 | +EXPORT_SYMBOL(dlm_unlock); | |
b7b72b66 AM |
8395 | +EXPORT_SYMBOL(dlm_debug_dump); |
8396 | +EXPORT_SYMBOL(dlm_locks_dump); | |
c1c6733f AM |
8397 | diff -urN linux-orig/cluster/dlm/memory.c linux-patched/cluster/dlm/memory.c |
8398 | --- linux-orig/cluster/dlm/memory.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 8399 | +++ linux-patched/cluster/dlm/memory.c 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
8400 | @@ -0,0 +1,238 @@ |
8401 | +/****************************************************************************** | |
8402 | +******************************************************************************* | |
8403 | +** | |
8404 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8405 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8406 | +** | |
8407 | +** This copyrighted material is made available to anyone wishing to use, | |
8408 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8409 | +** of the GNU General Public License v.2. | |
8410 | +** | |
8411 | +******************************************************************************* | |
8412 | +******************************************************************************/ | |
8413 | + | |
8414 | +/* memory.c | |
8415 | + * | |
8416 | + * memory allocation routines | |
8417 | + * | |
8418 | + */ | |
8419 | + | |
8420 | +#include "dlm_internal.h" | |
8421 | +#include "memory.h" | |
8422 | +#include "config.h" | |
8423 | + | |
8424 | +/* as the man says...Shouldn't this be in a header file somewhere? */ | |
8425 | +#define BYTES_PER_WORD sizeof(void *) | |
8426 | + | |
8427 | +static kmem_cache_t *rsb_cache_small; | |
8428 | +static kmem_cache_t *rsb_cache_large; | |
8429 | +static kmem_cache_t *lkb_cache; | |
8430 | +static kmem_cache_t *lvb_cache; | |
8431 | +static kmem_cache_t *resdir_cache_large; | |
8432 | +static kmem_cache_t *resdir_cache_small; | |
8433 | + | |
b7b72b66 | 8434 | +/* The thresholds above which we allocate large RSBs/direntry rather than small |
c1c6733f AM |
8435 | + * ones. This must make the resultant structure end on a word boundary */ |
8436 | +#define LARGE_RSB_NAME 28 | |
8437 | +#define LARGE_RES_NAME 28 | |
8438 | + | |
8439 | +int dlm_memory_init() | |
8440 | +{ | |
8441 | + int ret = -ENOMEM; | |
8442 | + | |
8443 | + | |
8444 | + rsb_cache_small = | |
8445 | + kmem_cache_create("dlm_rsb(small)", | |
b7b72b66 AM |
8446 | + (sizeof(struct dlm_rsb) + LARGE_RSB_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1), |
8447 | + __alignof__(struct dlm_rsb), 0, NULL, NULL); | |
c1c6733f AM |
8448 | + if (!rsb_cache_small) |
8449 | + goto out; | |
8450 | + | |
8451 | + rsb_cache_large = | |
8452 | + kmem_cache_create("dlm_rsb(large)", | |
b7b72b66 AM |
8453 | + sizeof(struct dlm_rsb) + DLM_RESNAME_MAXLEN, |
8454 | + __alignof__(struct dlm_rsb), 0, NULL, NULL); | |
c1c6733f AM |
8455 | + if (!rsb_cache_large) |
8456 | + goto out_free_rsbs; | |
8457 | + | |
b7b72b66 AM |
8458 | + lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), |
8459 | + __alignof__(struct dlm_lkb), 0, NULL, NULL); | |
c1c6733f AM |
8460 | + if (!lkb_cache) |
8461 | + goto out_free_rsbl; | |
8462 | + | |
8463 | + resdir_cache_large = | |
8464 | + kmem_cache_create("dlm_resdir(l)", | |
b7b72b66 AM |
8465 | + sizeof(struct dlm_direntry) + DLM_RESNAME_MAXLEN, |
8466 | + __alignof__(struct dlm_direntry), 0, NULL, NULL); | |
c1c6733f AM |
8467 | + if (!resdir_cache_large) |
8468 | + goto out_free_lkb; | |
8469 | + | |
8470 | + resdir_cache_small = | |
8471 | + kmem_cache_create("dlm_resdir(s)", | |
b7b72b66 AM |
8472 | + (sizeof(struct dlm_direntry) + LARGE_RES_NAME + BYTES_PER_WORD-1) & ~(BYTES_PER_WORD-1), |
8473 | + __alignof__(struct dlm_direntry), 0, NULL, NULL); | |
c1c6733f AM |
8474 | + if (!resdir_cache_small) |
8475 | + goto out_free_resl; | |
8476 | + | |
8477 | + /* LVB cache also holds ranges, so should be 64bit aligned */ | |
8478 | + lvb_cache = kmem_cache_create("dlm_lvb/range", DLM_LVB_LEN, | |
8479 | + __alignof__(uint64_t), 0, NULL, NULL); | |
8480 | + if (!lkb_cache) | |
8481 | + goto out_free_ress; | |
8482 | + | |
8483 | + ret = 0; | |
8484 | + goto out; | |
8485 | + | |
8486 | + out_free_ress: | |
8487 | + kmem_cache_destroy(resdir_cache_small); | |
8488 | + | |
8489 | + out_free_resl: | |
8490 | + kmem_cache_destroy(resdir_cache_large); | |
8491 | + | |
8492 | + out_free_lkb: | |
8493 | + kmem_cache_destroy(lkb_cache); | |
8494 | + | |
8495 | + out_free_rsbl: | |
8496 | + kmem_cache_destroy(rsb_cache_large); | |
8497 | + | |
8498 | + out_free_rsbs: | |
8499 | + kmem_cache_destroy(rsb_cache_small); | |
8500 | + | |
8501 | + out: | |
8502 | + return ret; | |
8503 | +} | |
8504 | + | |
8505 | +void dlm_memory_exit() | |
8506 | +{ | |
8507 | + kmem_cache_destroy(rsb_cache_large); | |
8508 | + kmem_cache_destroy(rsb_cache_small); | |
8509 | + kmem_cache_destroy(lkb_cache); | |
8510 | + kmem_cache_destroy(resdir_cache_small); | |
8511 | + kmem_cache_destroy(resdir_cache_large); | |
8512 | + kmem_cache_destroy(lvb_cache); | |
8513 | +} | |
8514 | + | |
b7b72b66 | 8515 | +struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen) |
c1c6733f | 8516 | +{ |
b7b72b66 | 8517 | + struct dlm_rsb *r; |
c1c6733f | 8518 | + |
b7b72b66 | 8519 | + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); |
c1c6733f AM |
8520 | + |
8521 | + if (namelen >= LARGE_RSB_NAME) | |
8522 | + r = kmem_cache_alloc(rsb_cache_large, ls->ls_allocation); | |
8523 | + else | |
8524 | + r = kmem_cache_alloc(rsb_cache_small, ls->ls_allocation); | |
8525 | + | |
8526 | + if (r) | |
b7b72b66 | 8527 | + memset(r, 0, sizeof(struct dlm_rsb) + namelen); |
c1c6733f AM |
8528 | + |
8529 | + return r; | |
8530 | +} | |
8531 | + | |
b7b72b66 | 8532 | +void free_rsb(struct dlm_rsb *r) |
c1c6733f AM |
8533 | +{ |
8534 | + int length = r->res_length; | |
8535 | + | |
8536 | +#ifdef POISON | |
b7b72b66 | 8537 | + memset(r, 0x55, sizeof(struct dlm_rsb) + r->res_length); |
c1c6733f AM |
8538 | +#endif |
8539 | + | |
8540 | + if (length >= LARGE_RSB_NAME) | |
8541 | + kmem_cache_free(rsb_cache_large, r); | |
8542 | + else | |
8543 | + kmem_cache_free(rsb_cache_small, r); | |
8544 | +} | |
8545 | + | |
b7b72b66 | 8546 | +struct dlm_lkb *allocate_lkb(struct dlm_ls *ls) |
c1c6733f | 8547 | +{ |
b7b72b66 | 8548 | + struct dlm_lkb *l; |
c1c6733f AM |
8549 | + |
8550 | + l = kmem_cache_alloc(lkb_cache, ls->ls_allocation); | |
8551 | + if (l) | |
b7b72b66 | 8552 | + memset(l, 0, sizeof(struct dlm_lkb)); |
c1c6733f AM |
8553 | + |
8554 | + return l; | |
8555 | +} | |
8556 | + | |
b7b72b66 | 8557 | +void free_lkb(struct dlm_lkb *l) |
c1c6733f AM |
8558 | +{ |
8559 | +#ifdef POISON | |
b7b72b66 | 8560 | + memset(l, 0xAA, sizeof(struct dlm_lkb)); |
c1c6733f AM |
8561 | +#endif |
8562 | + kmem_cache_free(lkb_cache, l); | |
8563 | +} | |
8564 | + | |
b7b72b66 | 8565 | +struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen) |
c1c6733f | 8566 | +{ |
b7b72b66 | 8567 | + struct dlm_direntry *rd; |
c1c6733f | 8568 | + |
b7b72b66 | 8569 | + DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); |
c1c6733f AM |
8570 | + |
8571 | + if (namelen >= LARGE_RES_NAME) | |
8572 | + rd = kmem_cache_alloc(resdir_cache_large, ls->ls_allocation); | |
8573 | + else | |
8574 | + rd = kmem_cache_alloc(resdir_cache_small, ls->ls_allocation); | |
8575 | + | |
8576 | + if (rd) | |
b7b72b66 | 8577 | + memset(rd, 0, sizeof(struct dlm_direntry)); |
c1c6733f AM |
8578 | + |
8579 | + return rd; | |
8580 | +} | |
8581 | + | |
b7b72b66 | 8582 | +void free_direntry(struct dlm_direntry *de) |
c1c6733f | 8583 | +{ |
b7b72b66 AM |
8584 | + if (de->length >= LARGE_RES_NAME) |
8585 | + kmem_cache_free(resdir_cache_large, de); | |
c1c6733f | 8586 | + else |
b7b72b66 | 8587 | + kmem_cache_free(resdir_cache_small, de); |
c1c6733f AM |
8588 | +} |
8589 | + | |
b7b72b66 | 8590 | +char *allocate_lvb(struct dlm_ls *ls) |
c1c6733f AM |
8591 | +{ |
8592 | + char *l; | |
8593 | + | |
8594 | + l = kmem_cache_alloc(lvb_cache, ls->ls_allocation); | |
8595 | + if (l) | |
8596 | + memset(l, 0, DLM_LVB_LEN); | |
8597 | + | |
8598 | + return l; | |
8599 | +} | |
8600 | + | |
8601 | +void free_lvb(char *l) | |
8602 | +{ | |
8603 | + kmem_cache_free(lvb_cache, l); | |
8604 | +} | |
8605 | + | |
8606 | +/* Ranges are allocated from the LVB cache as they are the same size (4x64 | |
8607 | + * bits) */ | |
b7b72b66 | 8608 | +uint64_t *allocate_range(struct dlm_ls * ls) |
c1c6733f AM |
8609 | +{ |
8610 | + uint64_t *l; | |
8611 | + | |
8612 | + l = kmem_cache_alloc(lvb_cache, ls->ls_allocation); | |
8613 | + if (l) | |
8614 | + memset(l, 0, DLM_LVB_LEN); | |
8615 | + | |
8616 | + return l; | |
8617 | +} | |
8618 | + | |
8619 | +void free_range(uint64_t *l) | |
8620 | +{ | |
8621 | + kmem_cache_free(lvb_cache, l); | |
8622 | +} | |
8623 | + | |
b7b72b66 | 8624 | +struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls) |
c1c6733f | 8625 | +{ |
b7b72b66 | 8626 | + struct dlm_rcom *rc; |
c1c6733f AM |
8627 | + |
8628 | + rc = kmalloc(dlm_config.buffer_size, ls->ls_allocation); | |
8629 | + if (rc) | |
8630 | + memset(rc, 0, dlm_config.buffer_size); | |
8631 | + | |
8632 | + return rc; | |
8633 | +} | |
8634 | + | |
b7b72b66 | 8635 | +void free_rcom_buffer(struct dlm_rcom *rc) |
c1c6733f AM |
8636 | +{ |
8637 | + kfree(rc); | |
8638 | +} | |
8639 | diff -urN linux-orig/cluster/dlm/memory.h linux-patched/cluster/dlm/memory.h | |
8640 | --- linux-orig/cluster/dlm/memory.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 8641 | +++ linux-patched/cluster/dlm/memory.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
8642 | @@ -0,0 +1,32 @@ |
8643 | +/****************************************************************************** | |
8644 | +******************************************************************************* | |
8645 | +** | |
8646 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8647 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8648 | +** | |
8649 | +** This copyrighted material is made available to anyone wishing to use, | |
8650 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8651 | +** of the GNU General Public License v.2. | |
8652 | +** | |
8653 | +******************************************************************************* | |
8654 | +******************************************************************************/ | |
8655 | + | |
8656 | +#ifndef __MEMORY_DOT_H__ | |
8657 | +#define __MEMORY_DOT_H__ | |
8658 | + | |
8659 | +int dlm_memory_init(void); | |
8660 | +void dlm_memory_exit(void); | |
b7b72b66 AM |
8661 | +struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen); |
8662 | +void free_rsb(struct dlm_rsb *r); | |
8663 | +struct dlm_lkb *allocate_lkb(struct dlm_ls *ls); | |
8664 | +void free_lkb(struct dlm_lkb *l); | |
8665 | +struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen); | |
8666 | +void free_direntry(struct dlm_direntry *de); | |
8667 | +char *allocate_lvb(struct dlm_ls *ls); | |
c1c6733f | 8668 | +void free_lvb(char *l); |
b7b72b66 AM |
8669 | +struct dlm_rcom *allocate_rcom_buffer(struct dlm_ls *ls); |
8670 | +void free_rcom_buffer(struct dlm_rcom *rc); | |
8671 | +uint64_t *allocate_range(struct dlm_ls *ls); | |
8672 | +void free_range(uint64_t *l); | |
c1c6733f AM |
8673 | + |
8674 | +#endif /* __MEMORY_DOT_H__ */ | |
8675 | diff -urN linux-orig/cluster/dlm/midcomms.c linux-patched/cluster/dlm/midcomms.c | |
8676 | --- linux-orig/cluster/dlm/midcomms.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 8677 | +++ linux-patched/cluster/dlm/midcomms.c 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 8678 | @@ -0,0 +1,355 @@ |
c1c6733f AM |
8679 | +/****************************************************************************** |
8680 | +******************************************************************************* | |
8681 | +** | |
8682 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
8683 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
8684 | +** | |
8685 | +** This copyrighted material is made available to anyone wishing to use, | |
8686 | +** modify, copy, or redistribute it subject to the terms and conditions | |
8687 | +** of the GNU General Public License v.2. | |
8688 | +** | |
8689 | +******************************************************************************* | |
8690 | +******************************************************************************/ | |
8691 | + | |
8692 | +/* | |
8693 | + * midcomms.c | |
8694 | + * | |
8695 | + * This is the appallingly named "mid-level" comms layer. | |
8696 | + * | |
8697 | + * Its purpose is to take packets from the "real" comms layer, | |
8698 | + * split them up into packets and pass them to the interested | |
8699 | + * part of the locking mechanism. | |
8700 | + * | |
8701 | + * It also takes messages from the locking layer, formats them | |
8702 | + * into packets and sends them to the comms layer. | |
8703 | + * | |
8704 | + * It knows the format of the mid-level messages used and nodeidss | |
8705 | + * but it does not know how to resolve a nodeid into an IP address | |
8706 | + * or any of the comms channel details | |
8707 | + * | |
8708 | + */ | |
8709 | + | |
8710 | +#include "dlm_internal.h" | |
8711 | +#include "lowcomms.h" | |
8712 | +#include "midcomms.h" | |
8713 | +#include "lockqueue.h" | |
8714 | +#include "nodes.h" | |
8715 | +#include "reccomms.h" | |
8716 | +#include "config.h" | |
8717 | + | |
8718 | +/* Byteorder routines */ | |
8719 | + | |
8720 | +static void host_to_network(void *msg) | |
8721 | +{ | |
b7b72b66 AM |
8722 | + struct dlm_header *head = msg; |
8723 | + struct dlm_request *req = msg; | |
8724 | + struct dlm_reply *rep = msg; | |
8725 | + struct dlm_query_request *qreq = msg; | |
8726 | + struct dlm_query_reply *qrep= msg; | |
8727 | + struct dlm_rcom *rc = msg; | |
c1c6733f AM |
8728 | + |
8729 | + /* Force into network byte order */ | |
8730 | + | |
8731 | + /* | |
8732 | + * Do the common header first | |
8733 | + */ | |
8734 | + | |
8735 | + head->rh_length = cpu_to_le16(head->rh_length); | |
8736 | + head->rh_lockspace = cpu_to_le32(head->rh_lockspace); | |
8737 | + /* Leave the lkid alone as it is transparent at the remote end */ | |
8738 | + | |
8739 | + /* | |
8740 | + * Do the fields in the remlockrequest or remlockreply structs | |
8741 | + */ | |
8742 | + | |
8743 | + switch (req->rr_header.rh_cmd) { | |
8744 | + | |
8745 | + case GDLM_REMCMD_LOCKREQUEST: | |
8746 | + case GDLM_REMCMD_CONVREQUEST: | |
8747 | + req->rr_range_start = cpu_to_le64(req->rr_range_start); | |
8748 | + req->rr_range_end = cpu_to_le64(req->rr_range_end); | |
8749 | + /* Deliberate fall through */ | |
8750 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
8751 | + case GDLM_REMCMD_LOOKUP: | |
8752 | + case GDLM_REMCMD_LOCKGRANT: | |
8753 | + case GDLM_REMCMD_SENDBAST: | |
8754 | + case GDLM_REMCMD_SENDCAST: | |
8755 | + case GDLM_REMCMD_REM_RESDATA: | |
8756 | + req->rr_flags = cpu_to_le32(req->rr_flags); | |
8757 | + req->rr_status = cpu_to_le32(req->rr_status); | |
8758 | + break; | |
8759 | + | |
8760 | + case GDLM_REMCMD_LOCKREPLY: | |
b7b72b66 AM |
8761 | + rep->rl_lockstate = cpu_to_le32(rep->rl_lockstate); |
8762 | + rep->rl_nodeid = cpu_to_le32(rep->rl_nodeid); | |
8763 | + rep->rl_status = cpu_to_le32(rep->rl_status); | |
c1c6733f AM |
8764 | + break; |
8765 | + | |
8766 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8767 | + case GDLM_REMCMD_RECOVERREPLY: | |
8768 | + rc->rc_msgid = cpu_to_le32(rc->rc_msgid); | |
8769 | + rc->rc_datalen = cpu_to_le16(rc->rc_datalen); | |
8770 | + break; | |
8771 | + | |
8772 | + case GDLM_REMCMD_QUERY: | |
b7b72b66 AM |
8773 | + qreq->rq_mstlkid = cpu_to_le32(qreq->rq_mstlkid); |
8774 | + qreq->rq_query = cpu_to_le32(qreq->rq_query); | |
8775 | + qreq->rq_maxlocks = cpu_to_le32(qreq->rq_maxlocks); | |
c1c6733f AM |
8776 | + break; |
8777 | + | |
8778 | + case GDLM_REMCMD_QUERYREPLY: | |
b7b72b66 AM |
8779 | + qrep->rq_numlocks = cpu_to_le32(qrep->rq_numlocks); |
8780 | + qrep->rq_status = cpu_to_le32(qrep->rq_status); | |
8781 | + qrep->rq_grantcount = cpu_to_le32(qrep->rq_grantcount); | |
8782 | + qrep->rq_waitcount = cpu_to_le32(qrep->rq_waitcount); | |
8783 | + qrep->rq_convcount = cpu_to_le32(qrep->rq_convcount); | |
c1c6733f AM |
8784 | + break; |
8785 | + | |
8786 | + default: | |
8787 | + printk("dlm: warning, unknown REMCMD type %u\n", | |
8788 | + req->rr_header.rh_cmd); | |
8789 | + } | |
8790 | +} | |
8791 | + | |
8792 | +static void network_to_host(void *msg) | |
8793 | +{ | |
b7b72b66 AM |
8794 | + struct dlm_header *head = msg; |
8795 | + struct dlm_request *req = msg; | |
8796 | + struct dlm_reply *rep = msg; | |
8797 | + struct dlm_query_request *qreq = msg; | |
8798 | + struct dlm_query_reply *qrep = msg; | |
8799 | + struct dlm_rcom *rc = msg; | |
c1c6733f AM |
8800 | + |
8801 | + /* Force into host byte order */ | |
8802 | + | |
8803 | + /* | |
8804 | + * Do the common header first | |
8805 | + */ | |
8806 | + | |
8807 | + head->rh_length = le16_to_cpu(head->rh_length); | |
8808 | + head->rh_lockspace = le32_to_cpu(head->rh_lockspace); | |
8809 | + /* Leave the lkid alone as it is transparent at the remote end */ | |
8810 | + | |
8811 | + /* | |
8812 | + * Do the fields in the remlockrequest or remlockreply structs | |
8813 | + */ | |
8814 | + | |
8815 | + switch (req->rr_header.rh_cmd) { | |
8816 | + | |
8817 | + case GDLM_REMCMD_LOCKREQUEST: | |
8818 | + case GDLM_REMCMD_CONVREQUEST: | |
8819 | + req->rr_range_start = le64_to_cpu(req->rr_range_start); | |
8820 | + req->rr_range_end = le64_to_cpu(req->rr_range_end); | |
8821 | + case GDLM_REMCMD_LOOKUP: | |
8822 | + case GDLM_REMCMD_UNLOCKREQUEST: | |
8823 | + case GDLM_REMCMD_LOCKGRANT: | |
8824 | + case GDLM_REMCMD_SENDBAST: | |
8825 | + case GDLM_REMCMD_SENDCAST: | |
8826 | + case GDLM_REMCMD_REM_RESDATA: | |
8827 | + /* Actually, not much to do here as the remote lock IDs are | |
8828 | + * transparent too */ | |
8829 | + req->rr_flags = le32_to_cpu(req->rr_flags); | |
8830 | + req->rr_status = le32_to_cpu(req->rr_status); | |
8831 | + break; | |
8832 | + | |
8833 | + case GDLM_REMCMD_LOCKREPLY: | |
b7b72b66 AM |
8834 | + rep->rl_lockstate = le32_to_cpu(rep->rl_lockstate); |
8835 | + rep->rl_nodeid = le32_to_cpu(rep->rl_nodeid); | |
8836 | + rep->rl_status = le32_to_cpu(rep->rl_status); | |
c1c6733f AM |
8837 | + break; |
8838 | + | |
8839 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8840 | + case GDLM_REMCMD_RECOVERREPLY: | |
8841 | + rc->rc_msgid = le32_to_cpu(rc->rc_msgid); | |
8842 | + rc->rc_datalen = le16_to_cpu(rc->rc_datalen); | |
8843 | + break; | |
8844 | + | |
8845 | + | |
8846 | + case GDLM_REMCMD_QUERY: | |
b7b72b66 AM |
8847 | + qreq->rq_mstlkid = le32_to_cpu(qreq->rq_mstlkid); |
8848 | + qreq->rq_query = le32_to_cpu(qreq->rq_query); | |
8849 | + qreq->rq_maxlocks = le32_to_cpu(qreq->rq_maxlocks); | |
c1c6733f AM |
8850 | + break; |
8851 | + | |
8852 | + case GDLM_REMCMD_QUERYREPLY: | |
b7b72b66 AM |
8853 | + qrep->rq_numlocks = le32_to_cpu(qrep->rq_numlocks); |
8854 | + qrep->rq_status = le32_to_cpu(qrep->rq_status); | |
8855 | + qrep->rq_grantcount = le32_to_cpu(qrep->rq_grantcount); | |
8856 | + qrep->rq_waitcount = le32_to_cpu(qrep->rq_waitcount); | |
8857 | + qrep->rq_convcount = le32_to_cpu(qrep->rq_convcount); | |
c1c6733f AM |
8858 | + break; |
8859 | + | |
8860 | + default: | |
8861 | + printk("dlm: warning, unknown REMCMD type %u\n", | |
8862 | + req->rr_header.rh_cmd); | |
8863 | + } | |
8864 | +} | |
8865 | + | |
8866 | +static void copy_from_cb(void *dst, const void *base, unsigned offset, | |
8867 | + unsigned len, unsigned limit) | |
8868 | +{ | |
8869 | + unsigned copy = len; | |
8870 | + | |
8871 | + if ((copy + offset) > limit) | |
8872 | + copy = limit - offset; | |
8873 | + memcpy(dst, base + offset, copy); | |
8874 | + len -= copy; | |
8875 | + if (len) | |
8876 | + memcpy(dst + copy, base, len); | |
8877 | +} | |
8878 | + | |
8879 | +static void khexdump(const unsigned char *c, int len) | |
8880 | +{ | |
8881 | + while (len > 16) { | |
8882 | + printk(KERN_INFO | |
8883 | + "%02x %02x %02x %02x %02x %02x %02x %02x-%02x %02x %02x %02x %02x %02x %02x %02x\n", | |
8884 | + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], c[8], | |
8885 | + c[9], c[10], c[11], c[12], c[13], c[14], c[15]); | |
8886 | + len -= 16; | |
c783755a | 8887 | + c += 16; |
c1c6733f AM |
8888 | + } |
8889 | + while (len > 4) { | |
8890 | + printk(KERN_INFO "%02x %02x %02x %02x\n", c[0], c[1], c[2], | |
8891 | + c[3]); | |
8892 | + len -= 4; | |
c783755a | 8893 | + c += 4; |
c1c6733f AM |
8894 | + } |
8895 | + while (len > 0) { | |
8896 | + printk(KERN_INFO "%02x\n", c[0]); | |
8897 | + len--; | |
c783755a | 8898 | + c++; |
c1c6733f AM |
8899 | + } |
8900 | +} | |
8901 | + | |
8902 | +/* | |
8903 | + * Called from the low-level comms layer to process a buffer of | |
8904 | + * commands. | |
8905 | + * | |
8906 | + * Only complete messages are processed here, any "spare" bytes from | |
8907 | + * the end of a buffer are saved and tacked onto the front of the next | |
8908 | + * message that comes in. I doubt this will happen very often but we | |
8909 | + * need to be able to cope with it and I don't want the task to be waiting | |
8910 | + * for packets to come in when there is useful work to be done. | |
8911 | + * | |
8912 | + */ | |
8913 | +int midcomms_process_incoming_buffer(int nodeid, const void *base, | |
8914 | + unsigned offset, unsigned len, | |
8915 | + unsigned limit) | |
8916 | +{ | |
b7b72b66 AM |
8917 | + unsigned char __tmp[sizeof(struct dlm_header) + 64]; |
8918 | + struct dlm_header *msg = (struct dlm_header *) __tmp; | |
c1c6733f AM |
8919 | + int ret = 0; |
8920 | + int err = 0; | |
8921 | + unsigned msglen; | |
8922 | + __u32 id, space; | |
8923 | + | |
b7b72b66 | 8924 | + while (len > sizeof(struct dlm_header)) { |
c1c6733f | 8925 | + /* Get message header and check it over */ |
b7b72b66 | 8926 | + copy_from_cb(msg, base, offset, sizeof(struct dlm_header), |
c1c6733f AM |
8927 | + limit); |
8928 | + msglen = le16_to_cpu(msg->rh_length); | |
8929 | + id = msg->rh_lkid; | |
8930 | + space = msg->rh_lockspace; | |
8931 | + | |
8932 | + /* Check message size */ | |
8933 | + err = -EINVAL; | |
b7b72b66 | 8934 | + if (msglen < sizeof(struct dlm_header)) |
c1c6733f AM |
8935 | + break; |
8936 | + err = -E2BIG; | |
8937 | + if (msglen > dlm_config.buffer_size) { | |
c783755a AM |
8938 | + printk("dlm: message size from %d too big %d(pkt len=%d)\n", nodeid, msglen, len); |
8939 | + khexdump((const unsigned char *) msg, len); | |
c1c6733f AM |
8940 | + break; |
8941 | + } | |
8942 | + err = 0; | |
8943 | + | |
8944 | + /* Not enough in buffer yet? wait for some more */ | |
8945 | + if (msglen > len) | |
8946 | + break; | |
8947 | + | |
8948 | + /* Make sure our temp buffer is large enough */ | |
8949 | + if (msglen > sizeof(__tmp) && | |
b7b72b66 | 8950 | + msg == (struct dlm_header *) __tmp) { |
c1c6733f AM |
8951 | + msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL); |
8952 | + if (msg == NULL) | |
8953 | + return ret; | |
8954 | + } | |
8955 | + | |
8956 | + copy_from_cb(msg, base, offset, msglen, limit); | |
8957 | + BUG_ON(id != msg->rh_lkid); | |
8958 | + BUG_ON(space != msg->rh_lockspace); | |
8959 | + ret += msglen; | |
8960 | + offset += msglen; | |
8961 | + offset &= (limit - 1); | |
8962 | + len -= msglen; | |
8963 | + network_to_host(msg); | |
8964 | + | |
8965 | + if ((msg->rh_cmd > 32) || | |
8966 | + (msg->rh_cmd == 0) || | |
b7b72b66 | 8967 | + (msg->rh_length < sizeof(struct dlm_header)) || |
c1c6733f AM |
8968 | + (msg->rh_length > dlm_config.buffer_size)) { |
8969 | + | |
8970 | + printk("dlm: midcomms: cmd=%u, flags=%u, length=%hu, " | |
8971 | + "lkid=%u, lockspace=%u\n", | |
8972 | + msg->rh_cmd, msg->rh_flags, msg->rh_length, | |
8973 | + msg->rh_lkid, msg->rh_lockspace); | |
8974 | + | |
8975 | + printk("dlm: midcomms: base=%p, offset=%u, len=%u, " | |
8976 | + "ret=%u, limit=%08x newbuf=%d\n", | |
8977 | + base, offset, len, ret, limit, | |
b7b72b66 | 8978 | + ((struct dlm_header *) __tmp == msg)); |
c1c6733f AM |
8979 | + |
8980 | + khexdump((const unsigned char *) msg, msg->rh_length); | |
8981 | + | |
8982 | + return -EBADMSG; | |
8983 | + } | |
8984 | + | |
8985 | + switch (msg->rh_cmd) { | |
8986 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
8987 | + case GDLM_REMCMD_RECOVERREPLY: | |
8988 | + process_recovery_comm(nodeid, msg); | |
8989 | + break; | |
8990 | + default: | |
8991 | + process_cluster_request(nodeid, msg, FALSE); | |
8992 | + } | |
8993 | + } | |
8994 | + | |
b7b72b66 | 8995 | + if (msg != (struct dlm_header *) __tmp) |
c1c6733f AM |
8996 | + kfree(msg); |
8997 | + | |
8998 | + return err ? err : ret; | |
8999 | +} | |
9000 | + | |
9001 | +/* | |
9002 | + * Send a lowcomms buffer | |
9003 | + */ | |
9004 | + | |
b7b72b66 | 9005 | +void midcomms_send_buffer(struct dlm_header *msg, struct writequeue_entry *e) |
c1c6733f AM |
9006 | +{ |
9007 | + host_to_network(msg); | |
9008 | + lowcomms_commit_buffer(e); | |
9009 | +} | |
9010 | + | |
9011 | +/* | |
9012 | + * Make the message into network byte order and send it | |
9013 | + */ | |
9014 | + | |
b7b72b66 | 9015 | +int midcomms_send_message(uint32_t nodeid, struct dlm_header *msg, |
c1c6733f AM |
9016 | + int allocation) |
9017 | +{ | |
9018 | + int len = msg->rh_length; | |
9019 | + | |
9020 | + host_to_network(msg); | |
9021 | + | |
9022 | + /* | |
9023 | + * Loopback. In fact, the locking code pretty much prevents this from | |
9024 | + * being needed but it can happen when the directory node is also the | |
9025 | + * local node. | |
9026 | + */ | |
9027 | + | |
9028 | + if (nodeid == our_nodeid()) | |
9029 | + return midcomms_process_incoming_buffer(nodeid, (char *) msg, 0, | |
9030 | + len, len); | |
9031 | + | |
9032 | + return lowcomms_send_message(nodeid, (char *) msg, len, allocation); | |
9033 | +} | |
9034 | diff -urN linux-orig/cluster/dlm/midcomms.h linux-patched/cluster/dlm/midcomms.h | |
9035 | --- linux-orig/cluster/dlm/midcomms.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 9036 | +++ linux-patched/cluster/dlm/midcomms.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
9037 | @@ -0,0 +1,24 @@ |
9038 | +/****************************************************************************** | |
9039 | +******************************************************************************* | |
9040 | +** | |
9041 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
9042 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
9043 | +** | |
9044 | +** This copyrighted material is made available to anyone wishing to use, | |
9045 | +** modify, copy, or redistribute it subject to the terms and conditions | |
9046 | +** of the GNU General Public License v.2. | |
9047 | +** | |
9048 | +******************************************************************************* | |
9049 | +******************************************************************************/ | |
9050 | + | |
9051 | +#ifndef __MIDCOMMS_DOT_H__ | |
9052 | +#define __MIDCOMMS_DOT_H__ | |
9053 | + | |
b7b72b66 | 9054 | +int midcomms_send_message(uint32_t csid, struct dlm_header *msg, |
c1c6733f AM |
9055 | + int allocation); |
9056 | +int midcomms_process_incoming_buffer(int csid, const void *buf, unsigned offset, | |
9057 | + unsigned len, unsigned limit); | |
b7b72b66 | 9058 | +void midcomms_send_buffer(struct dlm_header *msg, |
c1c6733f AM |
9059 | + struct writequeue_entry *e); |
9060 | + | |
9061 | +#endif /* __MIDCOMMS_DOT_H__ */ | |
9062 | diff -urN linux-orig/cluster/dlm/nodes.c linux-patched/cluster/dlm/nodes.c | |
9063 | --- linux-orig/cluster/dlm/nodes.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
9064 | +++ linux-patched/cluster/dlm/nodes.c 2004-11-03 11:31:56.000000000 +0800 |
9065 | @@ -0,0 +1,347 @@ | |
c1c6733f AM |
9066 | +/****************************************************************************** |
9067 | +******************************************************************************* | |
9068 | +** | |
9069 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
9070 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
9071 | +** | |
9072 | +** This copyrighted material is made available to anyone wishing to use, | |
9073 | +** modify, copy, or redistribute it subject to the terms and conditions | |
9074 | +** of the GNU General Public License v.2. | |
9075 | +** | |
9076 | +******************************************************************************* | |
9077 | +******************************************************************************/ | |
9078 | + | |
9079 | +#include <net/sock.h> | |
9080 | +#include <cluster/cnxman.h> | |
9081 | + | |
9082 | +#include "dlm_internal.h" | |
9083 | +#include "lowcomms.h" | |
9084 | +#include "nodes.h" | |
9085 | +#include "recover.h" | |
9086 | +#include "reccomms.h" | |
9087 | +#include "util.h" | |
9088 | + | |
9089 | +static struct list_head cluster_nodes; | |
9090 | +static spinlock_t node_lock; | |
c1c6733f AM |
9091 | + |
9092 | + | |
9093 | +void dlm_nodes_init(void) | |
9094 | +{ | |
9095 | + INIT_LIST_HEAD(&cluster_nodes); | |
9096 | + spin_lock_init(&node_lock); | |
c1c6733f AM |
9097 | +} |
9098 | + | |
b7b72b66 | 9099 | +static struct dlm_node *search_node(uint32_t nodeid) |
c1c6733f | 9100 | +{ |
b7b72b66 | 9101 | + struct dlm_node *node; |
c1c6733f | 9102 | + |
b7b72b66 AM |
9103 | + list_for_each_entry(node, &cluster_nodes, list) { |
9104 | + if (node->nodeid == nodeid) | |
c1c6733f AM |
9105 | + goto out; |
9106 | + } | |
9107 | + node = NULL; | |
c783755a | 9108 | + out: |
c1c6733f AM |
9109 | + return node; |
9110 | +} | |
9111 | + | |
b7b72b66 | 9112 | +static void put_node(struct dlm_node *node) |
c1c6733f AM |
9113 | +{ |
9114 | + spin_lock(&node_lock); | |
c783755a AM |
9115 | + if (atomic_dec_and_test(&node->refcount)) { |
9116 | + lowcomms_close(node->nodeid); | |
b7b72b66 | 9117 | + list_del(&node->list); |
c1c6733f AM |
9118 | + spin_unlock(&node_lock); |
9119 | + kfree(node); | |
9120 | + return; | |
9121 | + } | |
9122 | + spin_unlock(&node_lock); | |
9123 | +} | |
9124 | + | |
b7b72b66 | 9125 | +static int get_node(uint32_t nodeid, struct dlm_node **ndp) |
c1c6733f | 9126 | +{ |
b7b72b66 | 9127 | + struct dlm_node *node, *node2; |
c1c6733f AM |
9128 | + int error = -ENOMEM; |
9129 | + | |
9130 | + spin_lock(&node_lock); | |
9131 | + node = search_node(nodeid); | |
9132 | + if (node) | |
c783755a | 9133 | + atomic_inc(&node->refcount); |
c1c6733f AM |
9134 | + spin_unlock(&node_lock); |
9135 | + | |
9136 | + if (node) | |
9137 | + goto out; | |
9138 | + | |
b7b72b66 | 9139 | + node = (struct dlm_node *) kmalloc(sizeof(struct dlm_node), GFP_KERNEL); |
c1c6733f AM |
9140 | + if (!node) |
9141 | + goto fail; | |
9142 | + | |
b7b72b66 AM |
9143 | + memset(node, 0, sizeof(struct dlm_node)); |
9144 | + node->nodeid = nodeid; | |
c1c6733f AM |
9145 | + |
9146 | + spin_lock(&node_lock); | |
9147 | + node2 = search_node(nodeid); | |
9148 | + if (node2) { | |
c783755a | 9149 | + atomic_inc(&node2->refcount); |
c1c6733f AM |
9150 | + spin_unlock(&node_lock); |
9151 | + kfree(node); | |
9152 | + node = node2; | |
9153 | + goto out; | |
9154 | + } | |
9155 | + | |
c783755a | 9156 | + atomic_set(&node->refcount, 1); |
b7b72b66 | 9157 | + list_add_tail(&node->list, &cluster_nodes); |
c1c6733f AM |
9158 | + spin_unlock(&node_lock); |
9159 | + | |
c783755a | 9160 | + out: |
c1c6733f AM |
9161 | + *ndp = node; |
9162 | + return 0; | |
c783755a | 9163 | + fail: |
c1c6733f AM |
9164 | + return error; |
9165 | +} | |
9166 | + | |
b7b72b66 | 9167 | +int init_new_csb(uint32_t nodeid, struct dlm_csb **ret_csb) |
c1c6733f | 9168 | +{ |
b7b72b66 AM |
9169 | + struct dlm_csb *csb; |
9170 | + struct dlm_node *node; | |
c1c6733f AM |
9171 | + int error = -ENOMEM; |
9172 | + | |
b7b72b66 | 9173 | + csb = (struct dlm_csb *) kmalloc(sizeof(struct dlm_csb), GFP_KERNEL); |
c1c6733f AM |
9174 | + if (!csb) |
9175 | + goto fail; | |
9176 | + | |
b7b72b66 | 9177 | + memset(csb, 0, sizeof(struct dlm_csb)); |
c1c6733f AM |
9178 | + |
9179 | + error = get_node(nodeid, &node); | |
9180 | + if (error) | |
9181 | + goto fail_free; | |
9182 | + | |
b7b72b66 | 9183 | + csb->node = node; |
c1c6733f AM |
9184 | + *ret_csb = csb; |
9185 | + return 0; | |
9186 | + | |
c783755a | 9187 | + fail_free: |
c1c6733f | 9188 | + kfree(csb); |
c783755a | 9189 | + fail: |
c1c6733f AM |
9190 | + return error; |
9191 | +} | |
9192 | + | |
b7b72b66 | 9193 | +void release_csb(struct dlm_csb *csb) |
c1c6733f | 9194 | +{ |
b7b72b66 | 9195 | + put_node(csb->node); |
c1c6733f AM |
9196 | + kfree(csb); |
9197 | +} | |
9198 | + | |
9199 | +uint32_t our_nodeid(void) | |
9200 | +{ | |
9201 | + return lowcomms_our_nodeid(); | |
9202 | +} | |
9203 | + | |
bb1d8b11 AM |
9204 | +static void make_node_array(struct dlm_ls *ls) |
9205 | +{ | |
9206 | + struct dlm_csb *csb; | |
9207 | + uint32_t *array; | |
9208 | + int i = 0; | |
9209 | + | |
9210 | + if (ls->ls_node_array) { | |
9211 | + kfree(ls->ls_node_array); | |
9212 | + ls->ls_node_array = NULL; | |
9213 | + } | |
9214 | + | |
9215 | + array = kmalloc(sizeof(uint32_t) * ls->ls_num_nodes, GFP_KERNEL); | |
9216 | + if (!array) | |
9217 | + return; | |
9218 | + | |
9219 | + list_for_each_entry(csb, &ls->ls_nodes, list) | |
9220 | + array[i++] = csb->node->nodeid; | |
9221 | + | |
9222 | + ls->ls_node_array = array; | |
9223 | +} | |
9224 | + | |
b7b72b66 | 9225 | +int nodes_reconfig_wait(struct dlm_ls *ls) |
c1c6733f AM |
9226 | +{ |
9227 | + int error; | |
9228 | + | |
9229 | + if (ls->ls_low_nodeid == our_nodeid()) { | |
b7b72b66 | 9230 | + error = dlm_wait_status_all(ls, NODES_VALID); |
c1c6733f AM |
9231 | + if (!error) |
9232 | + set_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags); | |
9233 | + | |
9234 | + /* Experimental: this delay should allow any final messages | |
9235 | + * from the previous node to be received before beginning | |
9236 | + * recovery. */ | |
9237 | + | |
9238 | + if (ls->ls_num_nodes == 1) { | |
9239 | + current->state = TASK_UNINTERRUPTIBLE; | |
9240 | + schedule_timeout((2) * HZ); | |
9241 | + } | |
9242 | + | |
9243 | + } else | |
b7b72b66 | 9244 | + error = dlm_wait_status_low(ls, NODES_ALL_VALID); |
c1c6733f AM |
9245 | + |
9246 | + return error; | |
9247 | +} | |
9248 | + | |
b7b72b66 | 9249 | +static void add_ordered_node(struct dlm_ls *ls, struct dlm_csb *new) |
c1c6733f | 9250 | +{ |
b7b72b66 | 9251 | + struct dlm_csb *csb = NULL; |
c1c6733f | 9252 | + struct list_head *tmp; |
b7b72b66 | 9253 | + struct list_head *newlist = &new->list; |
c1c6733f AM |
9254 | + struct list_head *head = &ls->ls_nodes; |
9255 | + | |
9256 | + list_for_each(tmp, head) { | |
b7b72b66 | 9257 | + csb = list_entry(tmp, struct dlm_csb, list); |
c1c6733f | 9258 | + |
b7b72b66 | 9259 | + if (new->node->nodeid < csb->node->nodeid) |
c1c6733f AM |
9260 | + break; |
9261 | + } | |
9262 | + | |
9263 | + if (!csb) | |
9264 | + list_add_tail(newlist, head); | |
9265 | + else { | |
9266 | + /* FIXME: can use list macro here */ | |
9267 | + newlist->prev = tmp->prev; | |
9268 | + newlist->next = tmp; | |
9269 | + tmp->prev->next = newlist; | |
9270 | + tmp->prev = newlist; | |
9271 | + } | |
9272 | +} | |
9273 | + | |
b7b72b66 | 9274 | +int ls_nodes_reconfig(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) |
c1c6733f | 9275 | +{ |
b7b72b66 | 9276 | + struct dlm_csb *csb, *safe; |
c1c6733f AM |
9277 | + int error, i, found, pos = 0, neg = 0; |
9278 | + uint32_t low = (uint32_t) (-1); | |
9279 | + | |
9280 | + /* | |
9281 | + * Remove (and save) departed nodes from lockspace's nodes list | |
9282 | + */ | |
9283 | + | |
b7b72b66 | 9284 | + list_for_each_entry_safe(csb, safe, &ls->ls_nodes, list) { |
c1c6733f | 9285 | + found = FALSE; |
b7b72b66 AM |
9286 | + for (i = 0; i < rv->node_count; i++) { |
9287 | + if (csb->node->nodeid == rv->nodeids[i]) { | |
c1c6733f AM |
9288 | + found = TRUE; |
9289 | + break; | |
9290 | + } | |
9291 | + } | |
9292 | + | |
9293 | + if (!found) { | |
9294 | + neg++; | |
b7b72b66 AM |
9295 | + csb->gone_event = rv->event_id; |
9296 | + list_del(&csb->list); | |
9297 | + list_add_tail(&csb->list, &ls->ls_nodes_gone); | |
c1c6733f | 9298 | + ls->ls_num_nodes--; |
b7b72b66 | 9299 | + log_all(ls, "remove node %u", csb->node->nodeid); |
c1c6733f AM |
9300 | + } |
9301 | + } | |
9302 | + | |
9303 | + /* | |
9304 | + * Add new nodes to lockspace's nodes list | |
9305 | + */ | |
9306 | + | |
b7b72b66 | 9307 | + for (i = 0; i < rv->node_count; i++) { |
c1c6733f | 9308 | + found = FALSE; |
b7b72b66 AM |
9309 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
9310 | + if (csb->node->nodeid == rv->nodeids[i]) { | |
c1c6733f AM |
9311 | + found = TRUE; |
9312 | + break; | |
9313 | + } | |
9314 | + } | |
9315 | + | |
9316 | + if (!found) { | |
9317 | + pos++; | |
9318 | + | |
b7b72b66 AM |
9319 | + error = init_new_csb(rv->nodeids[i], &csb); |
9320 | + DLM_ASSERT(!error,); | |
c1c6733f AM |
9321 | + |
9322 | + add_ordered_node(ls, csb); | |
9323 | + ls->ls_num_nodes++; | |
b7b72b66 | 9324 | + log_all(ls, "add node %u", csb->node->nodeid); |
c1c6733f AM |
9325 | + } |
9326 | + } | |
9327 | + | |
b7b72b66 AM |
9328 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
9329 | + if (csb->node->nodeid < low) | |
9330 | + low = csb->node->nodeid; | |
c1c6733f AM |
9331 | + } |
9332 | + | |
c1c6733f | 9333 | + ls->ls_low_nodeid = low; |
c1c6733f AM |
9334 | + set_bit(LSFL_NODES_VALID, &ls->ls_flags); |
9335 | + *neg_out = neg; | |
bb1d8b11 | 9336 | + make_node_array(ls); |
c1c6733f AM |
9337 | + |
9338 | + error = nodes_reconfig_wait(ls); | |
9339 | + | |
9340 | + log_all(ls, "total nodes %d", ls->ls_num_nodes); | |
9341 | + | |
9342 | + return error; | |
9343 | +} | |
9344 | + | |
c783755a AM |
9345 | +static void nodes_clear(struct list_head *head) |
9346 | +{ | |
9347 | + struct dlm_csb *csb; | |
9348 | + | |
9349 | + while (!list_empty(head)) { | |
9350 | + csb = list_entry(head->next, struct dlm_csb, list); | |
9351 | + list_del(&csb->list); | |
9352 | + release_csb(csb); | |
9353 | + } | |
9354 | +} | |
9355 | + | |
9356 | +void ls_nodes_clear(struct dlm_ls *ls) | |
9357 | +{ | |
9358 | + nodes_clear(&ls->ls_nodes); | |
9359 | + ls->ls_num_nodes = 0; | |
9360 | +} | |
9361 | + | |
9362 | +void ls_nodes_gone_clear(struct dlm_ls *ls) | |
9363 | +{ | |
9364 | + nodes_clear(&ls->ls_nodes_gone); | |
9365 | +} | |
9366 | + | |
b7b72b66 | 9367 | +int ls_nodes_init(struct dlm_ls *ls, struct dlm_recover *rv) |
c1c6733f | 9368 | +{ |
b7b72b66 | 9369 | + struct dlm_csb *csb; |
c1c6733f AM |
9370 | + int i, error; |
9371 | + uint32_t low = (uint32_t) (-1); | |
9372 | + | |
c783755a AM |
9373 | + /* nodes may be left from a previous failed start */ |
9374 | + ls_nodes_clear(ls); | |
9375 | + | |
c1c6733f AM |
9376 | + log_all(ls, "add nodes"); |
9377 | + | |
b7b72b66 AM |
9378 | + for (i = 0; i < rv->node_count; i++) { |
9379 | + error = init_new_csb(rv->nodeids[i], &csb); | |
c1c6733f AM |
9380 | + if (error) |
9381 | + goto fail; | |
9382 | + | |
9383 | + add_ordered_node(ls, csb); | |
9384 | + ls->ls_num_nodes++; | |
9385 | + | |
b7b72b66 AM |
9386 | + if (csb->node->nodeid < low) |
9387 | + low = csb->node->nodeid; | |
c1c6733f AM |
9388 | + } |
9389 | + | |
9390 | + ls->ls_low_nodeid = low; | |
c1c6733f | 9391 | + set_bit(LSFL_NODES_VALID, &ls->ls_flags); |
bb1d8b11 | 9392 | + make_node_array(ls); |
c1c6733f AM |
9393 | + |
9394 | + error = nodes_reconfig_wait(ls); | |
9395 | + | |
9396 | + log_all(ls, "total nodes %d", ls->ls_num_nodes); | |
c1c6733f | 9397 | + return error; |
c783755a AM |
9398 | + fail: |
9399 | + ls_nodes_clear(ls); | |
c1c6733f AM |
9400 | + return error; |
9401 | +} | |
9402 | + | |
b7b72b66 | 9403 | +int in_nodes_gone(struct dlm_ls *ls, uint32_t nodeid) |
c1c6733f | 9404 | +{ |
b7b72b66 | 9405 | + struct dlm_csb *csb; |
c1c6733f | 9406 | + |
b7b72b66 AM |
9407 | + list_for_each_entry(csb, &ls->ls_nodes_gone, list) { |
9408 | + if (csb->node->nodeid == nodeid) | |
c1c6733f AM |
9409 | + return TRUE; |
9410 | + } | |
9411 | + return FALSE; | |
9412 | +} | |
9413 | diff -urN linux-orig/cluster/dlm/nodes.h linux-patched/cluster/dlm/nodes.h | |
9414 | --- linux-orig/cluster/dlm/nodes.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 9415 | +++ linux-patched/cluster/dlm/nodes.h 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 9416 | @@ -0,0 +1,27 @@ |
c1c6733f AM |
9417 | +/****************************************************************************** |
9418 | +******************************************************************************* | |
9419 | +** | |
9420 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
9421 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
9422 | +** | |
9423 | +** This copyrighted material is made available to anyone wishing to use, | |
9424 | +** modify, copy, or redistribute it subject to the terms and conditions | |
9425 | +** of the GNU General Public License v.2. | |
9426 | +** | |
9427 | +******************************************************************************* | |
9428 | +******************************************************************************/ | |
9429 | + | |
9430 | +#ifndef __NODES_DOT_H__ | |
9431 | +#define __NODES_DOT_H__ | |
9432 | + | |
9433 | +void dlm_nodes_init(void); | |
b7b72b66 AM |
9434 | +int init_new_csb(uint32_t nodeid, struct dlm_csb ** ret_csb); |
9435 | +void release_csb(struct dlm_csb * csb); | |
c1c6733f | 9436 | +uint32_t our_nodeid(void); |
b7b72b66 AM |
9437 | +int ls_nodes_reconfig(struct dlm_ls * ls, struct dlm_recover * gr, int *neg); |
9438 | +int ls_nodes_init(struct dlm_ls * ls, struct dlm_recover * gr); | |
9439 | +int in_nodes_gone(struct dlm_ls * ls, uint32_t nodeid); | |
c783755a AM |
9440 | +void ls_nodes_clear(struct dlm_ls *ls); |
9441 | +void ls_nodes_gone_clear(struct dlm_ls *ls); | |
c1c6733f AM |
9442 | + |
9443 | +#endif /* __NODES_DOT_H__ */ | |
9444 | diff -urN linux-orig/cluster/dlm/proc.c linux-patched/cluster/dlm/proc.c | |
9445 | --- linux-orig/cluster/dlm/proc.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
9446 | +++ linux-patched/cluster/dlm/proc.c 2004-11-03 11:31:56.000000000 +0800 |
9447 | @@ -0,0 +1,652 @@ | |
c1c6733f AM |
9448 | +/****************************************************************************** |
9449 | +******************************************************************************* | |
9450 | +** | |
9451 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
9452 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
9453 | +** | |
9454 | +** This copyrighted material is made available to anyone wishing to use, | |
9455 | +** modify, copy, or redistribute it subject to the terms and conditions | |
9456 | +** of the GNU General Public License v.2. | |
9457 | +** | |
9458 | +******************************************************************************* | |
9459 | +******************************************************************************/ | |
9460 | + | |
9461 | +#include <linux/init.h> | |
9462 | +#include <linux/proc_fs.h> | |
9463 | +#include <linux/ctype.h> | |
9464 | +#include <linux/seq_file.h> | |
9465 | +#include <linux/module.h> | |
9466 | + | |
9467 | +#include "dlm_internal.h" | |
9468 | +#include "lockspace.h" | |
9469 | + | |
9470 | +#if defined(DLM_DEBUG) | |
9471 | +#define DLM_DEBUG_SIZE (1024) | |
9472 | +#define MAX_DEBUG_MSG_LEN (64) | |
9473 | +#else | |
9474 | +#define DLM_DEBUG_SIZE (0) | |
9475 | +#define MAX_DEBUG_MSG_LEN (0) | |
9476 | +#endif | |
9477 | + | |
9478 | +static char * debug_buf; | |
9479 | +static unsigned int debug_size; | |
9480 | +static unsigned int debug_point; | |
9481 | +static int debug_wrap; | |
9482 | +static spinlock_t debug_lock; | |
9483 | +static struct proc_dir_entry * debug_proc_entry = NULL; | |
c1c6733f AM |
9484 | +static char proc_ls_name[255] = ""; |
9485 | + | |
9486 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
9487 | +static struct proc_dir_entry * locks_proc_entry = NULL; | |
9488 | +static struct seq_operations locks_info_op; | |
b7b72b66 AM |
9489 | +static struct proc_dir_entry * dir_proc_entry = NULL; |
9490 | +static struct seq_operations dir_info_op; | |
c1c6733f AM |
9491 | + |
9492 | + | |
b7b72b66 AM |
9493 | +/* |
9494 | + * /proc/cluster/dlm_locks - dump resources and locks | |
9495 | + */ | |
9496 | + | |
c1c6733f AM |
9497 | +static int locks_open(struct inode *inode, struct file *file) |
9498 | +{ | |
9499 | + return seq_open(file, &locks_info_op); | |
9500 | +} | |
9501 | + | |
9502 | +/* Write simply sets the lockspace to use */ | |
9503 | +static ssize_t locks_write(struct file *file, const char *buf, | |
9504 | + size_t count, loff_t * ppos) | |
9505 | +{ | |
9506 | + if (count < sizeof(proc_ls_name)) { | |
9507 | + copy_from_user(proc_ls_name, buf, count); | |
9508 | + proc_ls_name[count] = '\0'; | |
9509 | + | |
9510 | + /* Remove any trailing LF so that lazy users | |
9511 | + can just echo "lsname" > /proc/cluster/dlm_locks */ | |
9512 | + if (proc_ls_name[count - 1] == '\n') | |
9513 | + proc_ls_name[count - 1] = '\0'; | |
9514 | + | |
9515 | + return count; | |
9516 | + } | |
9517 | + return 0; | |
9518 | +} | |
9519 | + | |
9520 | +static struct file_operations locks_fops = { | |
b7b72b66 AM |
9521 | + open:locks_open, |
9522 | + write:locks_write, | |
9523 | + read:seq_read, | |
9524 | + llseek:seq_lseek, | |
9525 | + release:seq_release, | |
c1c6733f AM |
9526 | +}; |
9527 | + | |
9528 | +struct ls_dumpinfo { | |
9529 | + int entry; | |
9530 | + struct list_head *next; | |
b7b72b66 AM |
9531 | + struct dlm_ls *ls; |
9532 | + struct dlm_rsb *rsb; | |
9533 | + struct dlm_direntry *de; | |
c1c6733f AM |
9534 | +}; |
9535 | + | |
b7b72b66 | 9536 | +static int print_resource(struct dlm_rsb * res, struct seq_file *s); |
c1c6733f AM |
9537 | + |
9538 | +static struct ls_dumpinfo *next_rsb(struct ls_dumpinfo *di) | |
9539 | +{ | |
b7b72b66 AM |
9540 | + int i; |
9541 | + | |
c1c6733f AM |
9542 | + if (!di->next) { |
9543 | + /* Find the next non-empty hash bucket */ | |
b7b72b66 AM |
9544 | + for (i = di->entry; i < di->ls->ls_rsbtbl_size; i++) { |
9545 | + read_lock(&di->ls->ls_rsbtbl[i].lock); | |
9546 | + if (!list_empty(&di->ls->ls_rsbtbl[i].list)) { | |
9547 | + di->next = di->ls->ls_rsbtbl[i].list.next; | |
9548 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); | |
9549 | + break; | |
9550 | + } | |
9551 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); | |
c1c6733f | 9552 | + } |
b7b72b66 | 9553 | + di->entry = i; |
c1c6733f | 9554 | + |
b7b72b66 AM |
9555 | + if (di->entry >= di->ls->ls_rsbtbl_size) |
9556 | + return NULL; /* End of hash list */ | |
c1c6733f | 9557 | + } else { /* Find the next entry in the list */ |
b7b72b66 AM |
9558 | + i = di->entry; |
9559 | + read_lock(&di->ls->ls_rsbtbl[i].lock); | |
c1c6733f | 9560 | + di->next = di->next->next; |
b7b72b66 | 9561 | + if (di->next->next == di->ls->ls_rsbtbl[i].list.next) { |
c1c6733f AM |
9562 | + /* End of list - move to next bucket */ |
9563 | + di->next = NULL; | |
9564 | + di->entry++; | |
b7b72b66 | 9565 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); |
c1c6733f AM |
9566 | + return next_rsb(di); /* do the top half of this conditional */ |
9567 | + } | |
b7b72b66 | 9568 | + read_unlock(&di->ls->ls_rsbtbl[i].lock); |
c1c6733f | 9569 | + } |
b7b72b66 | 9570 | + di->rsb = list_entry(di->next, struct dlm_rsb, res_hashchain); |
c1c6733f AM |
9571 | + |
9572 | + return di; | |
9573 | +} | |
9574 | + | |
b7b72b66 | 9575 | +static void *s_start(struct seq_file *m, loff_t *pos) |
c1c6733f AM |
9576 | +{ |
9577 | + struct ls_dumpinfo *di; | |
b7b72b66 | 9578 | + struct dlm_ls *ls; |
c1c6733f AM |
9579 | + int i; |
9580 | + | |
9581 | + ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name)); | |
9582 | + if (!ls) | |
9583 | + return NULL; | |
9584 | + | |
9585 | + di = kmalloc(sizeof(struct ls_dumpinfo), GFP_KERNEL); | |
9586 | + if (!di) | |
9587 | + return NULL; | |
9588 | + | |
9589 | + if (*pos == 0) | |
9590 | + seq_printf(m, "DLM lockspace '%s'\n", proc_ls_name); | |
9591 | + | |
9592 | + di->entry = 0; | |
9593 | + di->next = NULL; | |
9594 | + di->ls = ls; | |
b7b72b66 | 9595 | + di->de = NULL; |
c1c6733f AM |
9596 | + |
9597 | + for (i = 0; i < *pos; i++) | |
9598 | + if (next_rsb(di) == NULL) | |
9599 | + return NULL; | |
9600 | + | |
9601 | + return next_rsb(di); | |
9602 | +} | |
9603 | + | |
b7b72b66 | 9604 | +static void *s_next(struct seq_file *m, void *p, loff_t *pos) |
c1c6733f AM |
9605 | +{ |
9606 | + struct ls_dumpinfo *di = p; | |
9607 | + | |
9608 | + *pos += 1; | |
9609 | + | |
9610 | + return next_rsb(di); | |
9611 | +} | |
9612 | + | |
9613 | +static int s_show(struct seq_file *m, void *p) | |
9614 | +{ | |
9615 | + struct ls_dumpinfo *di = p; | |
9616 | + return print_resource(di->rsb, m); | |
9617 | +} | |
9618 | + | |
9619 | +static void s_stop(struct seq_file *m, void *p) | |
9620 | +{ | |
9621 | + kfree(p); | |
9622 | +} | |
9623 | + | |
9624 | +static struct seq_operations locks_info_op = { | |
b7b72b66 AM |
9625 | + start:s_start, |
9626 | + next:s_next, | |
9627 | + stop:s_stop, | |
9628 | + show:s_show | |
c1c6733f AM |
9629 | +}; |
9630 | + | |
9631 | +static char *print_lockmode(int mode) | |
9632 | +{ | |
9633 | + switch (mode) { | |
9634 | + case DLM_LOCK_IV: | |
9635 | + return "--"; | |
9636 | + case DLM_LOCK_NL: | |
9637 | + return "NL"; | |
9638 | + case DLM_LOCK_CR: | |
9639 | + return "CR"; | |
9640 | + case DLM_LOCK_CW: | |
9641 | + return "CW"; | |
9642 | + case DLM_LOCK_PR: | |
9643 | + return "PR"; | |
9644 | + case DLM_LOCK_PW: | |
9645 | + return "PW"; | |
9646 | + case DLM_LOCK_EX: | |
9647 | + return "EX"; | |
9648 | + default: | |
9649 | + return "??"; | |
9650 | + } | |
9651 | +} | |
9652 | + | |
b7b72b66 AM |
9653 | +static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, |
9654 | + struct dlm_rsb *res) | |
c1c6733f AM |
9655 | +{ |
9656 | + | |
9657 | + seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode)); | |
9658 | + | |
9659 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9660 | + || lkb->lkb_status == GDLM_LKSTS_WAITING) | |
9661 | + seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode)); | |
9662 | + | |
9663 | + if (lkb->lkb_range) { | |
9664 | + /* This warns on Alpha. Tough. Only I see it */ | |
9665 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9666 | + || lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
9667 | + seq_printf(s, " %" PRIx64 "-%" PRIx64, | |
9668 | + lkb->lkb_range[GR_RANGE_START], | |
9669 | + lkb->lkb_range[GR_RANGE_END]); | |
9670 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
9671 | + || lkb->lkb_status == GDLM_LKSTS_WAITING) | |
9672 | + seq_printf(s, " (%" PRIx64 "-%" PRIx64 ")", | |
9673 | + lkb->lkb_range[RQ_RANGE_START], | |
9674 | + lkb->lkb_range[RQ_RANGE_END]); | |
9675 | + } | |
9676 | + | |
9677 | + if (lkb->lkb_nodeid) { | |
9678 | + if (lkb->lkb_nodeid != res->res_nodeid) | |
9679 | + seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid, | |
9680 | + lkb->lkb_remid); | |
9681 | + else | |
9682 | + seq_printf(s, " Master: %08x", lkb->lkb_remid); | |
9683 | + } | |
9684 | + | |
9685 | + if (lkb->lkb_status != GDLM_LKSTS_GRANTED) | |
c783755a AM |
9686 | + seq_printf(s, " LQ: %d,0x%x", lkb->lkb_lockqueue_state, |
9687 | + lkb->lkb_lockqueue_flags); | |
c1c6733f AM |
9688 | + |
9689 | + seq_printf(s, "\n"); | |
9690 | +} | |
9691 | + | |
b7b72b66 | 9692 | +static int print_resource(struct dlm_rsb *res, struct seq_file *s) |
c1c6733f AM |
9693 | +{ |
9694 | + int i; | |
9695 | + struct list_head *locklist; | |
9696 | + | |
9697 | + seq_printf(s, "\nResource %p (parent %p). Name (len=%d) \"", res, | |
9698 | + res->res_parent, res->res_length); | |
9699 | + for (i = 0; i < res->res_length; i++) { | |
9700 | + if (isprint(res->res_name[i])) | |
9701 | + seq_printf(s, "%c", res->res_name[i]); | |
9702 | + else | |
9703 | + seq_printf(s, "%c", '.'); | |
9704 | + } | |
9705 | + if (res->res_nodeid) | |
9706 | + seq_printf(s, "\" \nLocal Copy, Master is node %d\n", | |
9707 | + res->res_nodeid); | |
9708 | + else | |
9709 | + seq_printf(s, "\" \nMaster Copy\n"); | |
9710 | + | |
9711 | + /* Print the LVB: */ | |
9712 | + if (res->res_lvbptr) { | |
9713 | + seq_printf(s, "LVB: "); | |
9714 | + for (i = 0; i < DLM_LVB_LEN; i++) { | |
9715 | + if (i == DLM_LVB_LEN / 2) | |
9716 | + seq_printf(s, "\n "); | |
9717 | + seq_printf(s, "%02x ", | |
9718 | + (unsigned char) res->res_lvbptr[i]); | |
9719 | + } | |
9720 | + seq_printf(s, "\n"); | |
9721 | + } | |
9722 | + | |
9723 | + /* Print the locks attached to this resource */ | |
9724 | + seq_printf(s, "Granted Queue\n"); | |
9725 | + list_for_each(locklist, &res->res_grantqueue) { | |
b7b72b66 AM |
9726 | + struct dlm_lkb *this_lkb = |
9727 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
c1c6733f AM |
9728 | + print_lock(s, this_lkb, res); |
9729 | + } | |
9730 | + | |
9731 | + seq_printf(s, "Conversion Queue\n"); | |
9732 | + list_for_each(locklist, &res->res_convertqueue) { | |
b7b72b66 AM |
9733 | + struct dlm_lkb *this_lkb = |
9734 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
c1c6733f AM |
9735 | + print_lock(s, this_lkb, res); |
9736 | + } | |
9737 | + | |
9738 | + seq_printf(s, "Waiting Queue\n"); | |
9739 | + list_for_each(locklist, &res->res_waitqueue) { | |
b7b72b66 AM |
9740 | + struct dlm_lkb *this_lkb = |
9741 | + list_entry(locklist, struct dlm_lkb, lkb_statequeue); | |
c1c6733f AM |
9742 | + print_lock(s, this_lkb, res); |
9743 | + } | |
b7b72b66 AM |
9744 | + |
9745 | + return 0; | |
9746 | +} | |
9747 | + | |
9748 | + | |
9749 | +/* | |
9750 | + * /proc/cluster/dlm_dir - dump resource directory | |
9751 | + */ | |
9752 | + | |
9753 | +static int print_de(struct dlm_direntry *de, struct seq_file *s) | |
9754 | +{ | |
9755 | + char strname[DLM_RESNAME_MAXLEN+1]; | |
9756 | + | |
9757 | + memset(strname, 0, DLM_RESNAME_MAXLEN+1); | |
9758 | + memcpy(strname, de->name, de->length); | |
9759 | + | |
9760 | + seq_printf(s, "%s %u\n", strname, de->master_nodeid); | |
c1c6733f AM |
9761 | + return 0; |
9762 | +} | |
b7b72b66 AM |
9763 | + |
9764 | +static int dir_open(struct inode *inode, struct file *file) | |
9765 | +{ | |
9766 | + return seq_open(file, &dir_info_op); | |
9767 | +} | |
9768 | + | |
9769 | +static ssize_t dir_write(struct file *file, const char *buf, | |
9770 | + size_t count, loff_t *ppos) | |
9771 | +{ | |
9772 | + return locks_write(file, buf, count, ppos); | |
9773 | +} | |
9774 | + | |
9775 | +static struct file_operations dir_fops = { | |
c783755a AM |
9776 | + .open = dir_open, |
9777 | + .write = dir_write, | |
9778 | + .read = seq_read, | |
9779 | + .llseek = seq_lseek, | |
9780 | + .release = seq_release, | |
9781 | + .owner = THIS_MODULE, | |
b7b72b66 AM |
9782 | +}; |
9783 | + | |
9784 | +static struct ls_dumpinfo *next_de(struct ls_dumpinfo *di) | |
9785 | +{ | |
9786 | + int i; | |
9787 | + | |
9788 | + if (!di->next) { | |
9789 | + /* Find the next non-empty hash bucket */ | |
9790 | + for (i = di->entry; i < di->ls->ls_dirtbl_size; i++) { | |
9791 | + read_lock(&di->ls->ls_dirtbl[i].lock); | |
9792 | + if (!list_empty(&di->ls->ls_dirtbl[i].list)) { | |
9793 | + di->next = di->ls->ls_dirtbl[i].list.next; | |
9794 | + read_unlock(&di->ls->ls_dirtbl[i].lock); | |
9795 | + break; | |
9796 | + } | |
9797 | + read_unlock(&di->ls->ls_dirtbl[i].lock); | |
9798 | + } | |
9799 | + di->entry = i; | |
9800 | + | |
9801 | + if (di->entry >= di->ls->ls_dirtbl_size) | |
9802 | + return NULL; /* End of hash list */ | |
9803 | + } else { /* Find the next entry in the list */ | |
9804 | + i = di->entry; | |
9805 | + read_lock(&di->ls->ls_dirtbl[i].lock); | |
9806 | + di->next = di->next->next; | |
9807 | + if (di->next->next == di->ls->ls_dirtbl[i].list.next) { | |
9808 | + /* End of list - move to next bucket */ | |
9809 | + di->next = NULL; | |
9810 | + di->entry++; | |
9811 | + read_unlock(&di->ls->ls_dirtbl[i].lock); | |
9812 | + return next_de(di); /* do the top half of this conditional */ | |
9813 | + } | |
9814 | + read_unlock(&di->ls->ls_dirtbl[i].lock); | |
9815 | + } | |
9816 | + di->de = list_entry(di->next, struct dlm_direntry, list); | |
9817 | + | |
9818 | + return di; | |
9819 | +} | |
9820 | + | |
9821 | +static void *dir_start(struct seq_file *m, loff_t *pos) | |
9822 | +{ | |
9823 | + struct ls_dumpinfo *di; | |
9824 | + struct dlm_ls *ls; | |
9825 | + int i; | |
9826 | + | |
9827 | + ls = find_lockspace_by_name(proc_ls_name, strlen(proc_ls_name)); | |
9828 | + if (!ls) | |
9829 | + return NULL; | |
9830 | + | |
9831 | + di = kmalloc(sizeof(struct ls_dumpinfo), GFP_KERNEL); | |
9832 | + if (!di) | |
9833 | + return NULL; | |
9834 | + | |
9835 | + if (*pos == 0) | |
9836 | + seq_printf(m, "DLM lockspace '%s'\n", proc_ls_name); | |
9837 | + | |
9838 | + di->entry = 0; | |
9839 | + di->next = NULL; | |
9840 | + di->ls = ls; | |
9841 | + | |
9842 | + for (i = 0; i < *pos; i++) | |
9843 | + if (next_de(di) == NULL) | |
9844 | + return NULL; | |
9845 | + | |
9846 | + return next_de(di); | |
9847 | +} | |
9848 | + | |
9849 | +static void *dir_next(struct seq_file *m, void *p, loff_t *pos) | |
9850 | +{ | |
9851 | + struct ls_dumpinfo *di = p; | |
9852 | + | |
9853 | + *pos += 1; | |
9854 | + | |
9855 | + return next_de(di); | |
9856 | +} | |
9857 | + | |
9858 | +static int dir_show(struct seq_file *m, void *p) | |
9859 | +{ | |
9860 | + struct ls_dumpinfo *di = p; | |
9861 | + return print_de(di->de, m); | |
9862 | +} | |
9863 | + | |
9864 | +static void dir_stop(struct seq_file *m, void *p) | |
9865 | +{ | |
9866 | + kfree(p); | |
9867 | +} | |
9868 | + | |
9869 | +static struct seq_operations dir_info_op = { | |
c783755a AM |
9870 | + .start = dir_start, |
9871 | + .next = dir_next, | |
9872 | + .stop = dir_stop, | |
9873 | + .show = dir_show, | |
b7b72b66 | 9874 | +}; |
c1c6733f AM |
9875 | +#endif /* CONFIG_CLUSTER_DLM_PROCLOCKS */ |
9876 | + | |
b7b72b66 | 9877 | +void dlm_debug_log(struct dlm_ls *ls, const char *fmt, ...) |
c1c6733f AM |
9878 | +{ |
9879 | + va_list va; | |
9880 | + int i, n, size, len; | |
9881 | + char buf[MAX_DEBUG_MSG_LEN+1]; | |
9882 | + | |
9883 | + spin_lock(&debug_lock); | |
9884 | + | |
9885 | + if (!debug_buf) | |
9886 | + goto out; | |
9887 | + | |
9888 | + size = MAX_DEBUG_MSG_LEN; | |
9889 | + memset(buf, 0, size+1); | |
9890 | + | |
9891 | + n = snprintf(buf, size, "%s ", ls->ls_name); | |
9892 | + size -= n; | |
9893 | + | |
9894 | + va_start(va, fmt); | |
9895 | + vsnprintf(buf+n, size, fmt, va); | |
9896 | + va_end(va); | |
9897 | + | |
9898 | + len = strlen(buf); | |
9899 | + if (len > MAX_DEBUG_MSG_LEN-1) | |
9900 | + len = MAX_DEBUG_MSG_LEN-1; | |
9901 | + buf[len] = '\n'; | |
9902 | + buf[len+1] = '\0'; | |
9903 | + | |
9904 | + for (i = 0; i < strlen(buf); i++) { | |
9905 | + debug_buf[debug_point++] = buf[i]; | |
9906 | + | |
9907 | + if (debug_point == debug_size) { | |
9908 | + debug_point = 0; | |
9909 | + debug_wrap = 1; | |
9910 | + } | |
9911 | + } | |
9912 | + out: | |
9913 | + spin_unlock(&debug_lock); | |
9914 | +} | |
9915 | + | |
9916 | +void dlm_debug_dump(void) | |
9917 | +{ | |
9918 | + int i; | |
9919 | + | |
9920 | + spin_lock(&debug_lock); | |
9921 | + if (debug_wrap) { | |
9922 | + for (i = debug_point; i < debug_size; i++) | |
9923 | + printk("%c", debug_buf[i]); | |
9924 | + } | |
9925 | + for (i = 0; i < debug_point; i++) | |
9926 | + printk("%c", debug_buf[i]); | |
9927 | + spin_unlock(&debug_lock); | |
9928 | +} | |
9929 | + | |
9930 | +void dlm_debug_setup(int size) | |
9931 | +{ | |
9932 | + char *b = NULL; | |
9933 | + | |
9934 | + if (size > PAGE_SIZE) | |
9935 | + size = PAGE_SIZE; | |
9936 | + if (size) | |
9937 | + b = kmalloc(size, GFP_KERNEL); | |
9938 | + | |
9939 | + spin_lock(&debug_lock); | |
9940 | + if (debug_buf) | |
9941 | + kfree(debug_buf); | |
9942 | + if (!size || !b) | |
9943 | + goto out; | |
9944 | + debug_size = size; | |
9945 | + debug_point = 0; | |
9946 | + debug_wrap = 0; | |
9947 | + debug_buf = b; | |
9948 | + memset(debug_buf, 0, debug_size); | |
9949 | + out: | |
9950 | + spin_unlock(&debug_lock); | |
9951 | +} | |
9952 | + | |
9953 | +static void dlm_debug_init(void) | |
9954 | +{ | |
9955 | + debug_buf = NULL; | |
9956 | + debug_size = 0; | |
9957 | + debug_point = 0; | |
9958 | + debug_wrap = 0; | |
9959 | + spin_lock_init(&debug_lock); | |
9960 | + | |
9961 | + dlm_debug_setup(DLM_DEBUG_SIZE); | |
9962 | +} | |
9963 | + | |
9964 | +#ifdef CONFIG_PROC_FS | |
9965 | +int dlm_debug_info(char *b, char **start, off_t offset, int length) | |
9966 | +{ | |
9967 | + int i, n = 0; | |
9968 | + | |
9969 | + spin_lock(&debug_lock); | |
9970 | + | |
9971 | + if (debug_wrap) { | |
9972 | + for (i = debug_point; i < debug_size; i++) | |
9973 | + n += sprintf(b + n, "%c", debug_buf[i]); | |
9974 | + } | |
9975 | + for (i = 0; i < debug_point; i++) | |
9976 | + n += sprintf(b + n, "%c", debug_buf[i]); | |
9977 | + | |
9978 | + spin_unlock(&debug_lock); | |
9979 | + | |
9980 | + return n; | |
9981 | +} | |
bb1d8b11 | 9982 | +#endif |
c1c6733f | 9983 | + |
c783755a AM |
9984 | +#ifdef CONFIG_DLM_STATS |
9985 | +struct dlm_statinfo dlm_stats; | |
9986 | +static struct proc_dir_entry *stats_proc_entry = NULL; | |
9987 | +static int dlm_stats_info(char *b, char **start, off_t offset, int length) | |
9988 | +{ | |
9989 | + int n=0; | |
9990 | + int i; | |
9991 | + long lq_locks = 0; | |
9992 | + unsigned long lq_time = 0; | |
9993 | + | |
9994 | + n += sprintf(b+n, "DLM stats (HZ=%d)\n\n", HZ); | |
9995 | + n += sprintf(b+n, "Lock operations: %7d\n", dlm_stats.lockops); | |
9996 | + n += sprintf(b+n, "Unlock operations: %7d\n", dlm_stats.unlockops); | |
9997 | + n += sprintf(b+n, "Convert operations: %7d\n", dlm_stats.convertops); | |
9998 | + n += sprintf(b+n, "Completion ASTs: %7d\n", dlm_stats.cast); | |
9999 | + n += sprintf(b+n, "Blocking ASTs: %7d\n", dlm_stats.bast); | |
10000 | + n += sprintf(b+n, "\n"); | |
10001 | + n += sprintf(b+n, "Lockqueue num waittime ave\n"); | |
10002 | + for (i=1; i<=4 ; i++) { | |
10003 | + char *lq_reason="???"; | |
10004 | + switch (i){ | |
10005 | + case 1: lq_reason = "WAIT_RSB "; | |
10006 | + break; | |
10007 | + case 2: lq_reason = "WAIT_CONV "; | |
10008 | + break; | |
10009 | + case 3: lq_reason = "WAIT_GRANT "; | |
10010 | + break; | |
10011 | + case 4: lq_reason = "WAIT_UNLOCK"; | |
10012 | + break; | |
10013 | + } | |
10014 | + if (dlm_stats.lockqueue_locks[i]) | |
10015 | + n += sprintf(b+n, "%s %6lu %7lu %3lu\n", | |
10016 | + lq_reason, | |
10017 | + dlm_stats.lockqueue_locks[i], | |
10018 | + dlm_stats.lockqueue_time[i], | |
10019 | + dlm_stats.lockqueue_time[i]/ | |
10020 | + dlm_stats.lockqueue_locks[i]); | |
10021 | + | |
10022 | + lq_locks += dlm_stats.lockqueue_locks[i]; | |
10023 | + lq_time += dlm_stats.lockqueue_time[i]; | |
10024 | + } | |
10025 | + if (lq_locks) | |
10026 | + n += sprintf(b+n, "Total %6lu %7lu %3lu\n", | |
10027 | + lq_locks, lq_time, lq_time/lq_locks); | |
10028 | + return n; | |
10029 | +} | |
10030 | + | |
10031 | +static int dlm_stats_clear(struct file *file, const char __user *buffer, | |
10032 | + unsigned long count, void *data) | |
10033 | +{ | |
10034 | + memset(&dlm_stats, 0, sizeof(dlm_stats)); | |
10035 | + return count; | |
10036 | +} | |
bb1d8b11 | 10037 | +#endif /* CONFIG_DLM_STATS */ |
c1c6733f AM |
10038 | + |
10039 | +void dlm_proc_init(void) | |
10040 | +{ | |
10041 | +#ifdef CONFIG_PROC_FS | |
10042 | + debug_proc_entry = create_proc_entry("cluster/dlm_debug", S_IRUGO, | |
10043 | + NULL); | |
10044 | + if (!debug_proc_entry) | |
10045 | + return; | |
10046 | + | |
10047 | + debug_proc_entry->get_info = &dlm_debug_info; | |
c1c6733f | 10048 | +#endif |
bb1d8b11 | 10049 | + |
c783755a | 10050 | +#ifdef CONFIG_DLM_STATS |
bb1d8b11 AM |
10051 | + stats_proc_entry = create_proc_entry("cluster/dlm_stats", |
10052 | + S_IRUSR | S_IWUSR, NULL); | |
c783755a AM |
10053 | + if (!stats_proc_entry) |
10054 | + return; | |
10055 | + | |
10056 | + stats_proc_entry->get_info = &dlm_stats_info; | |
10057 | + stats_proc_entry->write_proc = &dlm_stats_clear; | |
10058 | +#endif | |
10059 | + | |
c1c6733f AM |
10060 | + dlm_debug_init(); |
10061 | + | |
10062 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
10063 | + locks_proc_entry = create_proc_read_entry("cluster/dlm_locks", | |
10064 | + S_IFREG | 0400, | |
10065 | + NULL, NULL, NULL); | |
10066 | + if (!locks_proc_entry) | |
10067 | + return; | |
10068 | + locks_proc_entry->proc_fops = &locks_fops; | |
b7b72b66 AM |
10069 | + |
10070 | + dir_proc_entry = create_proc_read_entry("cluster/dlm_dir", | |
10071 | + S_IFREG | 0400, | |
10072 | + NULL, NULL, NULL); | |
10073 | + if (!dir_proc_entry) | |
10074 | + return; | |
10075 | + dir_proc_entry->proc_fops = &dir_fops; | |
c1c6733f AM |
10076 | +#endif |
10077 | +} | |
10078 | + | |
10079 | +void dlm_proc_exit(void) | |
10080 | +{ | |
10081 | +#ifdef CONFIG_PROC_FS | |
10082 | + if (debug_proc_entry) { | |
10083 | + remove_proc_entry("cluster/dlm_debug", NULL); | |
10084 | + dlm_debug_setup(0); | |
10085 | + } | |
c1c6733f | 10086 | +#endif |
bb1d8b11 | 10087 | + |
c783755a AM |
10088 | +#ifdef CONFIG_DLM_STATS |
10089 | + if (stats_proc_entry) | |
10090 | + remove_proc_entry("cluster/dlm_stats", NULL); | |
10091 | +#endif | |
c1c6733f AM |
10092 | + |
10093 | +#ifdef CONFIG_CLUSTER_DLM_PROCLOCKS | |
10094 | + if (locks_proc_entry) | |
10095 | + remove_proc_entry("cluster/dlm_locks", NULL); | |
b7b72b66 AM |
10096 | + if (dir_proc_entry) |
10097 | + remove_proc_entry("cluster/dlm_dir", NULL); | |
c1c6733f AM |
10098 | +#endif |
10099 | +} | |
10100 | diff -urN linux-orig/cluster/dlm/queries.c linux-patched/cluster/dlm/queries.c | |
10101 | --- linux-orig/cluster/dlm/queries.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
10102 | +++ linux-patched/cluster/dlm/queries.c 2004-11-03 11:31:56.000000000 +0800 |
10103 | @@ -0,0 +1,713 @@ | |
c1c6733f AM |
10104 | +/****************************************************************************** |
10105 | +******************************************************************************* | |
10106 | +** | |
10107 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
10108 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
10109 | +** | |
10110 | +** This copyrighted material is made available to anyone wishing to use, | |
10111 | +** modify, copy, or redistribute it subject to the terms and conditions | |
10112 | +** of the GNU General Public License v.2. | |
10113 | +** | |
10114 | +******************************************************************************* | |
10115 | +******************************************************************************/ | |
10116 | + | |
10117 | +/* | |
10118 | + * queries.c | |
10119 | + * | |
10120 | + * This file provides the kernel query interface to the DLM. | |
10121 | + * | |
10122 | + */ | |
10123 | + | |
10124 | +#define EXPORT_SYMTAB | |
10125 | +#include <linux/module.h> | |
10126 | + | |
10127 | +#include "dlm_internal.h" | |
b7b72b66 | 10128 | +#include "lockspace.h" |
c1c6733f AM |
10129 | +#include "lockqueue.h" |
10130 | +#include "locking.h" | |
10131 | +#include "lkb.h" | |
10132 | +#include "nodes.h" | |
10133 | +#include "dir.h" | |
10134 | +#include "ast.h" | |
10135 | +#include "memory.h" | |
10136 | +#include "lowcomms.h" | |
10137 | +#include "midcomms.h" | |
10138 | +#include "rsb.h" | |
10139 | + | |
b7b72b66 AM |
10140 | +static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo); |
10141 | +static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo); | |
c1c6733f AM |
10142 | + |
10143 | +/* | |
10144 | + * API entry point. | |
10145 | + */ | |
10146 | +int dlm_query(void *lockspace, | |
10147 | + struct dlm_lksb *lksb, | |
10148 | + int query, | |
10149 | + struct dlm_queryinfo *qinfo, | |
10150 | + void (ast_routine(void *)), | |
10151 | + void *astarg) | |
10152 | +{ | |
10153 | + int status = -EINVAL; | |
b7b72b66 AM |
10154 | + struct dlm_lkb *target_lkb; |
10155 | + struct dlm_lkb *query_lkb = NULL; /* Our temporary LKB */ | |
10156 | + struct dlm_ls *ls = find_lockspace_by_local_id(lockspace); | |
c1c6733f | 10157 | + |
b7b72b66 AM |
10158 | + if (!ls) |
10159 | + return -EINVAL; | |
c1c6733f AM |
10160 | + if (!qinfo) |
10161 | + goto out; | |
c1c6733f AM |
10162 | + if (!ast_routine) |
10163 | + goto out; | |
10164 | + if (!lksb) | |
10165 | + goto out; | |
10166 | + | |
10167 | + if (!qinfo->gqi_lockinfo) | |
10168 | + qinfo->gqi_locksize = 0; | |
10169 | + | |
10170 | + /* Find the lkid */ | |
10171 | + target_lkb = find_lock_by_id(ls, lksb->sb_lkid); | |
10172 | + if (!target_lkb) | |
10173 | + goto out; | |
10174 | + | |
10175 | + /* If the user wants a list of locks that are blocking or | |
10176 | + not blocking this lock, then it must be waiting | |
10177 | + for something | |
10178 | + */ | |
10179 | + if (((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING || | |
10180 | + (query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK) && | |
10181 | + target_lkb->lkb_status == GDLM_LKSTS_GRANTED) | |
b7b72b66 | 10182 | + goto out; |
c1c6733f AM |
10183 | + |
10184 | + /* We now allocate an LKB for our own use (so we can hang | |
10185 | + * things like the AST routine and the lksb from it) */ | |
10186 | + lksb->sb_status = -EBUSY; | |
10187 | + query_lkb = create_lkb(ls); | |
10188 | + if (!query_lkb) { | |
10189 | + status = -ENOMEM; | |
10190 | + goto out; | |
10191 | + } | |
10192 | + query_lkb->lkb_astaddr = ast_routine; | |
10193 | + query_lkb->lkb_astparam = (long)astarg; | |
10194 | + query_lkb->lkb_resource = target_lkb->lkb_resource; | |
10195 | + query_lkb->lkb_lksb = lksb; | |
10196 | + | |
10197 | + /* Don't free the resource while we are querying it. This ref | |
10198 | + * will be dropped when the LKB is freed */ | |
10199 | + hold_rsb(query_lkb->lkb_resource); | |
10200 | + | |
10201 | + /* Fill in the stuff that's always local */ | |
10202 | + if (qinfo->gqi_resinfo) { | |
10203 | + if (target_lkb->lkb_resource->res_nodeid) | |
10204 | + qinfo->gqi_resinfo->rsi_masternode = | |
10205 | + target_lkb->lkb_resource->res_nodeid; | |
10206 | + else | |
10207 | + qinfo->gqi_resinfo->rsi_masternode = our_nodeid(); | |
10208 | + qinfo->gqi_resinfo->rsi_length = | |
10209 | + target_lkb->lkb_resource->res_length; | |
10210 | + memcpy(qinfo->gqi_resinfo->rsi_name, | |
10211 | + target_lkb->lkb_resource->res_name, | |
10212 | + qinfo->gqi_resinfo->rsi_length); | |
10213 | + } | |
10214 | + | |
10215 | + /* If the master is local (or the user doesn't want the overhead of a | |
10216 | + * remote call) - fill in the details here */ | |
10217 | + if (target_lkb->lkb_resource->res_nodeid == 0 || | |
10218 | + (query & DLM_QUERY_LOCAL)) { | |
10219 | + | |
10220 | + status = 0; | |
10221 | + /* Resource info */ | |
10222 | + if (qinfo->gqi_resinfo) { | |
10223 | + query_resource(target_lkb->lkb_resource, | |
10224 | + qinfo->gqi_resinfo); | |
10225 | + } | |
10226 | + | |
10227 | + /* Lock lists */ | |
10228 | + if (qinfo->gqi_lockinfo) { | |
10229 | + status = query_locks(query, target_lkb, qinfo); | |
10230 | + } | |
10231 | + | |
10232 | + query_lkb->lkb_retstatus = status; | |
b7b72b66 | 10233 | + queue_ast(query_lkb, AST_COMP | AST_DEL, 0); |
c1c6733f AM |
10234 | + wake_astd(); |
10235 | + | |
10236 | + /* An AST will be delivered so we must return success here */ | |
10237 | + status = 0; | |
10238 | + goto out; | |
10239 | + } | |
10240 | + | |
10241 | + /* Remote master */ | |
10242 | + if (target_lkb->lkb_resource->res_nodeid != 0) | |
10243 | + { | |
b7b72b66 | 10244 | + struct dlm_query_request *remquery; |
c1c6733f AM |
10245 | + struct writequeue_entry *e; |
10246 | + | |
10247 | + /* Clear this cos the receiving end adds to it with | |
10248 | + each incoming packet */ | |
10249 | + qinfo->gqi_lockcount = 0; | |
10250 | + | |
10251 | + /* Squirrel a pointer to the query info struct | |
10252 | + somewhere illegal */ | |
b7b72b66 | 10253 | + query_lkb->lkb_request = (struct dlm_request *) qinfo; |
c1c6733f AM |
10254 | + |
10255 | + e = lowcomms_get_buffer(query_lkb->lkb_resource->res_nodeid, | |
b7b72b66 | 10256 | + sizeof(struct dlm_query_request), |
c1c6733f AM |
10257 | + ls->ls_allocation, |
10258 | + (char **) &remquery); | |
10259 | + if (!e) { | |
10260 | + status = -ENOBUFS; | |
10261 | + goto out; | |
10262 | + } | |
10263 | + | |
10264 | + /* Build remote packet */ | |
b7b72b66 | 10265 | + memset(remquery, 0, sizeof(struct dlm_query_request)); |
c1c6733f AM |
10266 | + |
10267 | + remquery->rq_maxlocks = qinfo->gqi_locksize; | |
10268 | + remquery->rq_query = query; | |
10269 | + remquery->rq_mstlkid = target_lkb->lkb_remid; | |
10270 | + if (qinfo->gqi_lockinfo) | |
10271 | + remquery->rq_maxlocks = qinfo->gqi_locksize; | |
10272 | + | |
10273 | + remquery->rq_header.rh_cmd = GDLM_REMCMD_QUERY; | |
10274 | + remquery->rq_header.rh_flags = 0; | |
b7b72b66 | 10275 | + remquery->rq_header.rh_length = sizeof(struct dlm_query_request); |
c1c6733f AM |
10276 | + remquery->rq_header.rh_lkid = query_lkb->lkb_id; |
10277 | + remquery->rq_header.rh_lockspace = ls->ls_global_id; | |
10278 | + | |
10279 | + midcomms_send_buffer(&remquery->rq_header, e); | |
10280 | + status = 0; | |
10281 | + } | |
10282 | + | |
10283 | + out: | |
b7b72b66 | 10284 | + put_lockspace(ls); |
c1c6733f AM |
10285 | + return status; |
10286 | +} | |
10287 | + | |
10288 | +static inline int valid_range(struct dlm_range *r) | |
10289 | +{ | |
10290 | + if (r->ra_start != 0ULL || | |
10291 | + r->ra_end != 0xFFFFFFFFFFFFFFFFULL) | |
10292 | + return 1; | |
10293 | + else | |
10294 | + return 0; | |
10295 | +} | |
10296 | + | |
10297 | +static void put_int(int x, char *buf, int *offp) | |
10298 | +{ | |
10299 | + x = cpu_to_le32(x); | |
10300 | + memcpy(buf + *offp, &x, sizeof(int)); | |
10301 | + *offp += sizeof(int); | |
10302 | +} | |
10303 | + | |
10304 | +static void put_int64(uint64_t x, char *buf, int *offp) | |
10305 | +{ | |
10306 | + x = cpu_to_le64(x); | |
10307 | + memcpy(buf + *offp, &x, sizeof(uint64_t)); | |
10308 | + *offp += sizeof(uint64_t); | |
10309 | +} | |
10310 | + | |
10311 | +static int get_int(char *buf, int *offp) | |
10312 | +{ | |
10313 | + int value; | |
10314 | + memcpy(&value, buf + *offp, sizeof(int)); | |
10315 | + *offp += sizeof(int); | |
10316 | + return le32_to_cpu(value); | |
10317 | +} | |
10318 | + | |
10319 | +static uint64_t get_int64(char *buf, int *offp) | |
10320 | +{ | |
10321 | + uint64_t value; | |
10322 | + | |
10323 | + memcpy(&value, buf + *offp, sizeof(uint64_t)); | |
10324 | + *offp += sizeof(uint64_t); | |
10325 | + return le64_to_cpu(value); | |
10326 | +} | |
10327 | + | |
10328 | +#define LOCK_LEN (sizeof(int)*4 + sizeof(uint8_t)*4) | |
10329 | + | |
10330 | +/* Called from recvd to get lock info for a remote node */ | |
b7b72b66 | 10331 | +int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg) |
c1c6733f | 10332 | +{ |
b7b72b66 AM |
10333 | + struct dlm_query_request *query = (struct dlm_query_request *) msg; |
10334 | + struct dlm_query_reply *reply; | |
c1c6733f AM |
10335 | + struct dlm_resinfo resinfo; |
10336 | + struct dlm_queryinfo qinfo; | |
10337 | + struct writequeue_entry *e; | |
10338 | + char *buf; | |
b7b72b66 | 10339 | + struct dlm_lkb *lkb; |
c1c6733f AM |
10340 | + int status = 0; |
10341 | + int bufidx; | |
10342 | + int finished = 0; | |
10343 | + int cur_lock = 0; | |
10344 | + int start_lock = 0; | |
10345 | + | |
10346 | + lkb = find_lock_by_id(ls, query->rq_mstlkid); | |
10347 | + if (!lkb) { | |
10348 | + status = -EINVAL; | |
10349 | + goto send_error; | |
10350 | + } | |
10351 | + | |
10352 | + qinfo.gqi_resinfo = &resinfo; | |
10353 | + qinfo.gqi_locksize = query->rq_maxlocks; | |
10354 | + | |
10355 | + /* Get the resource bits */ | |
10356 | + query_resource(lkb->lkb_resource, &resinfo); | |
10357 | + | |
10358 | + /* Now get the locks if wanted */ | |
10359 | + if (query->rq_maxlocks) { | |
10360 | + qinfo.gqi_lockinfo = kmalloc(sizeof(struct dlm_lockinfo) * query->rq_maxlocks, | |
10361 | + GFP_KERNEL); | |
10362 | + if (!qinfo.gqi_lockinfo) { | |
10363 | + status = -ENOMEM; | |
10364 | + goto send_error; | |
10365 | + } | |
10366 | + | |
10367 | + status = query_locks(query->rq_query, lkb, &qinfo); | |
10368 | + if (status && status != -E2BIG) { | |
10369 | + kfree(qinfo.gqi_lockinfo); | |
10370 | + goto send_error; | |
10371 | + } | |
10372 | + } | |
10373 | + else { | |
10374 | + qinfo.gqi_lockinfo = NULL; | |
10375 | + qinfo.gqi_lockcount = 0; | |
10376 | + } | |
10377 | + | |
10378 | + /* Send as many blocks as needed for all the locks */ | |
10379 | + do { | |
10380 | + int i; | |
b7b72b66 | 10381 | + int msg_len = sizeof(struct dlm_query_reply); |
c1c6733f AM |
10382 | + int last_msg_len = msg_len; /* keeps compiler quiet */ |
10383 | + int last_lock; | |
10384 | + | |
10385 | + /* First work out how many locks we can fit into a block */ | |
10386 | + for (i=cur_lock; i < qinfo.gqi_lockcount && msg_len < PAGE_SIZE; i++) { | |
10387 | + | |
10388 | + last_msg_len = msg_len; | |
10389 | + | |
10390 | + msg_len += LOCK_LEN; | |
10391 | + if (valid_range(&qinfo.gqi_lockinfo[i].lki_grrange) || | |
10392 | + valid_range(&qinfo.gqi_lockinfo[i].lki_rqrange)) { | |
10393 | + | |
10394 | + msg_len += sizeof(uint64_t) * 4; | |
10395 | + } | |
10396 | + } | |
10397 | + | |
10398 | + /* There must be a neater way of doing this... */ | |
10399 | + if (msg_len > PAGE_SIZE) { | |
10400 | + last_lock = i-1; | |
10401 | + msg_len = last_msg_len; | |
10402 | + } | |
10403 | + else { | |
10404 | + last_lock = i; | |
10405 | + } | |
10406 | + | |
10407 | + e = lowcomms_get_buffer(nodeid, | |
10408 | + msg_len, | |
10409 | + ls->ls_allocation, | |
10410 | + (char **) &reply); | |
10411 | + if (!e) { | |
10412 | + kfree(qinfo.gqi_lockinfo); | |
10413 | + status = -ENOBUFS; | |
10414 | + goto out; | |
10415 | + } | |
10416 | + | |
10417 | + reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY; | |
10418 | + reply->rq_header.rh_length = msg_len; | |
10419 | + reply->rq_header.rh_lkid = msg->rh_lkid; | |
10420 | + reply->rq_header.rh_lockspace = msg->rh_lockspace; | |
10421 | + | |
10422 | + reply->rq_status = status; | |
10423 | + reply->rq_startlock = cur_lock; | |
10424 | + reply->rq_grantcount = qinfo.gqi_resinfo->rsi_grantcount; | |
10425 | + reply->rq_convcount = qinfo.gqi_resinfo->rsi_convcount; | |
10426 | + reply->rq_waitcount = qinfo.gqi_resinfo->rsi_waitcount; | |
10427 | + memcpy(reply->rq_valblk, qinfo.gqi_resinfo->rsi_valblk, DLM_LVB_LEN); | |
10428 | + | |
10429 | + buf = (char *)reply; | |
b7b72b66 | 10430 | + bufidx = sizeof(struct dlm_query_reply); |
c1c6733f AM |
10431 | + |
10432 | + for (; cur_lock < last_lock; cur_lock++) { | |
10433 | + | |
10434 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_state; | |
10435 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_grmode; | |
10436 | + buf[bufidx++] = qinfo.gqi_lockinfo[cur_lock].lki_rqmode; | |
10437 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_lkid, buf, &bufidx); | |
10438 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_mstlkid, buf, &bufidx); | |
10439 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_parent, buf, &bufidx); | |
10440 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_node, buf, &bufidx); | |
b7b72b66 | 10441 | + put_int(qinfo.gqi_lockinfo[cur_lock].lki_ownpid, buf, &bufidx); |
c1c6733f AM |
10442 | + |
10443 | + if (valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_grrange) || | |
10444 | + valid_range(&qinfo.gqi_lockinfo[cur_lock].lki_rqrange)) { | |
10445 | + | |
10446 | + buf[bufidx++] = 1; | |
10447 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_start, buf, &bufidx); | |
10448 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_grrange.ra_end, buf, &bufidx); | |
10449 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_start, buf, &bufidx); | |
10450 | + put_int64(qinfo.gqi_lockinfo[cur_lock].lki_rqrange.ra_end, buf, &bufidx); | |
10451 | + } | |
10452 | + else { | |
10453 | + buf[bufidx++] = 0; | |
10454 | + } | |
10455 | + } | |
10456 | + | |
10457 | + if (cur_lock == qinfo.gqi_lockcount) { | |
10458 | + reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY; | |
10459 | + finished = 1; | |
10460 | + } | |
10461 | + else { | |
10462 | + reply->rq_header.rh_flags = 0; | |
10463 | + } | |
10464 | + | |
10465 | + reply->rq_numlocks = cur_lock - start_lock; | |
10466 | + start_lock = cur_lock; | |
10467 | + | |
10468 | + midcomms_send_buffer(&reply->rq_header, e); | |
10469 | + } while (!finished); | |
10470 | + | |
10471 | + kfree(qinfo.gqi_lockinfo); | |
10472 | + out: | |
10473 | + return status; | |
10474 | + | |
10475 | + send_error: | |
10476 | + e = lowcomms_get_buffer(nodeid, | |
b7b72b66 | 10477 | + sizeof(struct dlm_query_reply), |
c1c6733f AM |
10478 | + ls->ls_allocation, |
10479 | + (char **) &reply); | |
10480 | + if (!e) { | |
10481 | + status = -ENOBUFS; | |
10482 | + goto out; | |
10483 | + } | |
10484 | + reply->rq_header.rh_cmd = GDLM_REMCMD_QUERYREPLY; | |
b7b72b66 AM |
10485 | + reply->rq_header.rh_flags = GDLM_REMFLAG_ENDQUERY; |
10486 | + reply->rq_header.rh_length = sizeof(struct dlm_query_reply); | |
c1c6733f AM |
10487 | + reply->rq_header.rh_lkid = msg->rh_lkid; |
10488 | + reply->rq_header.rh_lockspace = msg->rh_lockspace; | |
10489 | + reply->rq_status = status; | |
10490 | + reply->rq_numlocks = 0; | |
10491 | + reply->rq_startlock = 0; | |
10492 | + reply->rq_grantcount = 0; | |
10493 | + reply->rq_convcount = 0; | |
10494 | + reply->rq_waitcount = 0; | |
10495 | + | |
10496 | + midcomms_send_buffer(&reply->rq_header, e); | |
10497 | + | |
10498 | + return status; | |
10499 | +} | |
10500 | + | |
10501 | +/* Reply to a remote query */ | |
b7b72b66 | 10502 | +int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg) |
c1c6733f | 10503 | +{ |
b7b72b66 | 10504 | + struct dlm_lkb *query_lkb; |
c1c6733f | 10505 | + struct dlm_queryinfo *qinfo; |
b7b72b66 | 10506 | + struct dlm_query_reply *reply; |
c1c6733f AM |
10507 | + char *buf; |
10508 | + int i; | |
10509 | + int bufidx; | |
10510 | + | |
10511 | + query_lkb = find_lock_by_id(ls, msg->rh_lkid); | |
10512 | + if (!query_lkb) | |
10513 | + return -EINVAL; | |
10514 | + | |
10515 | + qinfo = (struct dlm_queryinfo *) query_lkb->lkb_request; | |
b7b72b66 | 10516 | + reply = (struct dlm_query_reply *) msg; |
c1c6733f AM |
10517 | + |
10518 | + /* Copy the easy bits first */ | |
10519 | + qinfo->gqi_lockcount += reply->rq_numlocks; | |
10520 | + if (qinfo->gqi_resinfo) { | |
10521 | + qinfo->gqi_resinfo->rsi_grantcount = reply->rq_grantcount; | |
10522 | + qinfo->gqi_resinfo->rsi_convcount = reply->rq_convcount; | |
10523 | + qinfo->gqi_resinfo->rsi_waitcount = reply->rq_waitcount; | |
10524 | + memcpy(qinfo->gqi_resinfo->rsi_valblk, reply->rq_valblk, | |
10525 | + DLM_LVB_LEN); | |
10526 | + } | |
10527 | + | |
10528 | + /* Now unpack the locks */ | |
b7b72b66 | 10529 | + bufidx = sizeof(struct dlm_query_reply); |
c1c6733f AM |
10530 | + buf = (char *) msg; |
10531 | + | |
b7b72b66 | 10532 | + DLM_ASSERT(reply->rq_startlock + reply->rq_numlocks <= qinfo->gqi_locksize, |
c1c6733f AM |
10533 | + printk("start = %d, num + %d. Max= %d\n", |
10534 | + reply->rq_startlock, reply->rq_numlocks, qinfo->gqi_locksize);); | |
10535 | + | |
10536 | + for (i = reply->rq_startlock; | |
10537 | + i < reply->rq_startlock + reply->rq_numlocks; i++) { | |
10538 | + qinfo->gqi_lockinfo[i].lki_state = buf[bufidx++]; | |
10539 | + qinfo->gqi_lockinfo[i].lki_grmode = buf[bufidx++]; | |
10540 | + qinfo->gqi_lockinfo[i].lki_rqmode = buf[bufidx++]; | |
10541 | + qinfo->gqi_lockinfo[i].lki_lkid = get_int(buf, &bufidx); | |
10542 | + qinfo->gqi_lockinfo[i].lki_mstlkid = get_int(buf, &bufidx); | |
10543 | + qinfo->gqi_lockinfo[i].lki_parent = get_int(buf, &bufidx); | |
10544 | + qinfo->gqi_lockinfo[i].lki_node = get_int(buf, &bufidx); | |
b7b72b66 | 10545 | + qinfo->gqi_lockinfo[i].lki_ownpid = get_int(buf, &bufidx); |
c1c6733f AM |
10546 | + if (buf[bufidx++]) { |
10547 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_start = get_int64(buf, &bufidx); | |
10548 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_end = get_int64(buf, &bufidx); | |
10549 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = get_int64(buf, &bufidx); | |
10550 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = get_int64(buf, &bufidx); | |
10551 | + } | |
10552 | + else { | |
10553 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_start = 0ULL; | |
10554 | + qinfo->gqi_lockinfo[i].lki_grrange.ra_end = 0xFFFFFFFFFFFFFFFFULL; | |
10555 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_start = 0ULL; | |
10556 | + qinfo->gqi_lockinfo[i].lki_rqrange.ra_end = 0xFFFFFFFFFFFFFFFFULL; | |
10557 | + } | |
10558 | + } | |
10559 | + | |
10560 | + /* If this was the last block then now tell the user */ | |
10561 | + if (msg->rh_flags & GDLM_REMFLAG_ENDQUERY) { | |
10562 | + query_lkb->lkb_retstatus = reply->rq_status; | |
b7b72b66 | 10563 | + queue_ast(query_lkb, AST_COMP | AST_DEL, 0); |
c1c6733f AM |
10564 | + wake_astd(); |
10565 | + } | |
10566 | + | |
10567 | + return 0; | |
10568 | +} | |
10569 | + | |
10570 | +/* Aggregate resource information */ | |
b7b72b66 | 10571 | +static int query_resource(struct dlm_rsb *rsb, struct dlm_resinfo *resinfo) |
c1c6733f AM |
10572 | +{ |
10573 | + struct list_head *tmp; | |
10574 | + | |
c1c6733f AM |
10575 | + if (rsb->res_lvbptr) |
10576 | + memcpy(resinfo->rsi_valblk, rsb->res_lvbptr, DLM_LVB_LEN); | |
10577 | + | |
bb1d8b11 | 10578 | + down_read(&rsb->res_lock); |
c1c6733f AM |
10579 | + resinfo->rsi_grantcount = 0; |
10580 | + list_for_each(tmp, &rsb->res_grantqueue) { | |
10581 | + resinfo->rsi_grantcount++; | |
10582 | + } | |
10583 | + | |
10584 | + resinfo->rsi_waitcount = 0; | |
10585 | + list_for_each(tmp, &rsb->res_waitqueue) { | |
10586 | + resinfo->rsi_waitcount++; | |
10587 | + } | |
10588 | + | |
10589 | + resinfo->rsi_convcount = 0; | |
10590 | + list_for_each(tmp, &rsb->res_convertqueue) { | |
10591 | + resinfo->rsi_convcount++; | |
10592 | + } | |
bb1d8b11 | 10593 | + up_read(&rsb->res_lock); |
c1c6733f AM |
10594 | + |
10595 | + return 0; | |
10596 | +} | |
10597 | + | |
b7b72b66 | 10598 | +static int add_lock(struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo) |
c1c6733f AM |
10599 | +{ |
10600 | + int entry; | |
10601 | + | |
10602 | + /* Don't fill it in if the buffer is full */ | |
10603 | + if (qinfo->gqi_lockcount == qinfo->gqi_locksize) | |
10604 | + return -E2BIG; | |
10605 | + | |
10606 | + /* gqi_lockcount contains the number of locks we have returned */ | |
10607 | + entry = qinfo->gqi_lockcount++; | |
10608 | + | |
10609 | + /* Fun with master copies */ | |
10610 | + if (lkb->lkb_flags & GDLM_LKFLG_MSTCPY) { | |
10611 | + qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_remid; | |
10612 | + qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_id; | |
10613 | + } | |
10614 | + else { | |
10615 | + qinfo->gqi_lockinfo[entry].lki_lkid = lkb->lkb_id; | |
10616 | + qinfo->gqi_lockinfo[entry].lki_mstlkid = lkb->lkb_remid; | |
10617 | + } | |
10618 | + | |
10619 | + /* Also make sure we always have a valid nodeid in there, the | |
10620 | + calling end may not know which node "0" is */ | |
10621 | + if (lkb->lkb_nodeid) | |
10622 | + qinfo->gqi_lockinfo[entry].lki_node = lkb->lkb_nodeid; | |
10623 | + else | |
10624 | + qinfo->gqi_lockinfo[entry].lki_node = our_nodeid(); | |
10625 | + | |
10626 | + if (lkb->lkb_parent) | |
10627 | + qinfo->gqi_lockinfo[entry].lki_parent = lkb->lkb_parent->lkb_id; | |
10628 | + else | |
10629 | + qinfo->gqi_lockinfo[entry].lki_parent = 0; | |
10630 | + | |
10631 | + qinfo->gqi_lockinfo[entry].lki_state = lkb->lkb_status; | |
10632 | + qinfo->gqi_lockinfo[entry].lki_rqmode = lkb->lkb_rqmode; | |
10633 | + qinfo->gqi_lockinfo[entry].lki_grmode = lkb->lkb_grmode; | |
b7b72b66 | 10634 | + qinfo->gqi_lockinfo[entry].lki_ownpid = lkb->lkb_ownpid; |
c1c6733f AM |
10635 | + |
10636 | + if (lkb->lkb_range) { | |
10637 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = | |
10638 | + lkb->lkb_range[GR_RANGE_START]; | |
10639 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_end = | |
10640 | + lkb->lkb_range[GR_RANGE_END]; | |
10641 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = | |
10642 | + lkb->lkb_range[RQ_RANGE_START]; | |
10643 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_end = | |
10644 | + lkb->lkb_range[RQ_RANGE_END]; | |
10645 | + } else { | |
10646 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0ULL; | |
10647 | + qinfo->gqi_lockinfo[entry].lki_grrange.ra_start = 0xffffffffffffffffULL; | |
10648 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0ULL; | |
10649 | + qinfo->gqi_lockinfo[entry].lki_rqrange.ra_start = 0xffffffffffffffffULL; | |
10650 | + } | |
10651 | + return 0; | |
10652 | +} | |
10653 | + | |
bb1d8b11 AM |
10654 | +static int query_lkb_queue(struct dlm_rsb *rsb, |
10655 | + struct list_head *queue, int query, | |
c1c6733f AM |
10656 | + struct dlm_queryinfo *qinfo) |
10657 | +{ | |
10658 | + struct list_head *tmp; | |
10659 | + int status = 0; | |
10660 | + int mode = query & DLM_QUERY_MODE_MASK; | |
10661 | + | |
bb1d8b11 | 10662 | + down_read(&rsb->res_lock); |
c1c6733f | 10663 | + list_for_each(tmp, queue) { |
b7b72b66 | 10664 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
c1c6733f AM |
10665 | + int lkmode; |
10666 | + | |
10667 | + if (query & DLM_QUERY_RQMODE) | |
10668 | + lkmode = lkb->lkb_rqmode; | |
10669 | + else | |
10670 | + lkmode = lkb->lkb_grmode; | |
10671 | + | |
10672 | + /* Add the LKB info to the list if it matches the criteria in | |
10673 | + * the query bitmap */ | |
10674 | + switch (query & DLM_QUERY_MASK) { | |
10675 | + case DLM_QUERY_LOCKS_ALL: | |
10676 | + status = add_lock(lkb, qinfo); | |
10677 | + break; | |
10678 | + | |
10679 | + case DLM_QUERY_LOCKS_HIGHER: | |
10680 | + if (lkmode > mode) | |
10681 | + status = add_lock(lkb, qinfo); | |
10682 | + break; | |
10683 | + | |
10684 | + case DLM_QUERY_LOCKS_EQUAL: | |
10685 | + if (lkmode == mode) | |
10686 | + status = add_lock(lkb, qinfo); | |
10687 | + break; | |
10688 | + | |
10689 | + case DLM_QUERY_LOCKS_LOWER: | |
10690 | + if (lkmode < mode) | |
10691 | + status = add_lock(lkb, qinfo); | |
c783755a AM |
10692 | + |
10693 | + case DLM_QUERY_LOCKS_ORPHAN: | |
10694 | + if (lkb->lkb_flags & GDLM_LKFLG_ORPHAN) | |
10695 | + status = add_lock(lkb, qinfo); | |
c1c6733f AM |
10696 | + break; |
10697 | + } | |
10698 | + } | |
bb1d8b11 | 10699 | + up_read(&rsb->res_lock); |
c1c6733f AM |
10700 | + return status; |
10701 | +} | |
10702 | + | |
10703 | +/* | |
10704 | + * Return 1 if the locks' ranges overlap | |
10705 | + * If the lkb has no range then it is assumed to cover 0-ffffffff.ffffffff | |
10706 | + */ | |
b7b72b66 | 10707 | +static inline int ranges_overlap(struct dlm_lkb *lkb1, struct dlm_lkb *lkb2) |
c1c6733f AM |
10708 | +{ |
10709 | + if (!lkb1->lkb_range || !lkb2->lkb_range) | |
10710 | + return 1; | |
10711 | + | |
10712 | + if (lkb1->lkb_range[RQ_RANGE_END] <= lkb2->lkb_range[GR_RANGE_START] || | |
10713 | + lkb1->lkb_range[RQ_RANGE_START] >= lkb2->lkb_range[GR_RANGE_END]) | |
10714 | + return 0; | |
10715 | + | |
10716 | + return 1; | |
10717 | +} | |
10718 | +extern const int __dlm_compat_matrix[8][8]; | |
10719 | + | |
10720 | + | |
b7b72b66 | 10721 | +static int get_blocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo) |
c1c6733f AM |
10722 | +{ |
10723 | + struct list_head *tmp; | |
10724 | + int status = 0; | |
10725 | + | |
bb1d8b11 | 10726 | + down_read(&qlkb->lkb_resource->res_lock); |
c1c6733f | 10727 | + list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) { |
b7b72b66 | 10728 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
c1c6733f AM |
10729 | + |
10730 | + if (ranges_overlap(lkb, qlkb) && | |
10731 | + !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1]) | |
10732 | + status = add_lock(lkb, qinfo); | |
10733 | + } | |
bb1d8b11 | 10734 | + up_read(&qlkb->lkb_resource->res_lock); |
c1c6733f AM |
10735 | + |
10736 | + return status; | |
10737 | +} | |
10738 | + | |
b7b72b66 | 10739 | +static int get_nonblocking_locks(struct dlm_lkb *qlkb, struct dlm_queryinfo *qinfo) |
c1c6733f AM |
10740 | +{ |
10741 | + struct list_head *tmp; | |
10742 | + int status = 0; | |
10743 | + | |
bb1d8b11 | 10744 | + down_read(&qlkb->lkb_resource->res_lock); |
c1c6733f | 10745 | + list_for_each(tmp, &qlkb->lkb_resource->res_grantqueue) { |
b7b72b66 | 10746 | + struct dlm_lkb *lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
c1c6733f AM |
10747 | + |
10748 | + if (!(ranges_overlap(lkb, qlkb) && | |
10749 | + !__dlm_compat_matrix[lkb->lkb_grmode + 1][qlkb->lkb_rqmode + 1])) | |
10750 | + status = add_lock(lkb, qinfo); | |
10751 | + } | |
bb1d8b11 | 10752 | + up_read(&qlkb->lkb_resource->res_lock); |
c1c6733f AM |
10753 | + |
10754 | + return status; | |
10755 | +} | |
10756 | + | |
10757 | +/* Gather a list of appropriate locks */ | |
b7b72b66 | 10758 | +static int query_locks(int query, struct dlm_lkb *lkb, struct dlm_queryinfo *qinfo) |
c1c6733f AM |
10759 | +{ |
10760 | + int status = 0; | |
10761 | + | |
10762 | + | |
10763 | + /* Mask in the actual granted/requsted mode of the lock if LOCK_THIS | |
10764 | + * was requested as the mode | |
10765 | + */ | |
10766 | + if ((query & DLM_QUERY_MODE_MASK) == DLM_LOCK_THIS) { | |
10767 | + query &= ~DLM_QUERY_MODE_MASK; | |
10768 | + if (query & DLM_QUERY_RQMODE) | |
10769 | + query |= lkb->lkb_rqmode; | |
10770 | + else | |
10771 | + query |= lkb->lkb_grmode; | |
10772 | + } | |
10773 | + | |
10774 | + qinfo->gqi_lockcount = 0; | |
10775 | + | |
10776 | + /* BLOCKING/NOTBLOCK only look at the granted queue */ | |
10777 | + if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_BLOCKING) | |
10778 | + return get_blocking_locks(lkb, qinfo); | |
10779 | + | |
10780 | + if ((query & DLM_QUERY_MASK) == DLM_QUERY_LOCKS_NOTBLOCK) | |
10781 | + return get_nonblocking_locks(lkb, qinfo); | |
10782 | + | |
10783 | + /* Do the lock queues that were requested */ | |
10784 | + if (query & DLM_QUERY_QUEUE_GRANT) { | |
bb1d8b11 AM |
10785 | + status = query_lkb_queue(lkb->lkb_resource, |
10786 | + &lkb->lkb_resource->res_grantqueue, | |
c1c6733f AM |
10787 | + query, qinfo); |
10788 | + } | |
10789 | + | |
10790 | + if (!status && (query & DLM_QUERY_QUEUE_CONVERT)) { | |
bb1d8b11 AM |
10791 | + status = query_lkb_queue(lkb->lkb_resource, |
10792 | + &lkb->lkb_resource->res_convertqueue, | |
c1c6733f AM |
10793 | + query, qinfo); |
10794 | + } | |
10795 | + | |
10796 | + if (!status && (query & DLM_QUERY_QUEUE_WAIT)) { | |
bb1d8b11 AM |
10797 | + status = query_lkb_queue(lkb->lkb_resource, |
10798 | + &lkb->lkb_resource->res_waitqueue, | |
c1c6733f AM |
10799 | + query, qinfo); |
10800 | + } | |
10801 | + | |
10802 | + | |
10803 | + return status; | |
10804 | +} | |
10805 | + | |
10806 | +EXPORT_SYMBOL(dlm_query); | |
10807 | +/* | |
10808 | + * Overrides for Emacs so that we follow Linus's tabbing style. | |
10809 | + * Emacs will notice this stuff at the end of the file and automatically | |
10810 | + * adjust the settings for this buffer only. This must remain at the end | |
10811 | + * of the file. | |
10812 | + * --------------------------------------------------------------------------- | |
10813 | + * Local variables: | |
10814 | + * c-file-style: "linux" | |
10815 | + * End: | |
10816 | + */ | |
10817 | diff -urN linux-orig/cluster/dlm/queries.h linux-patched/cluster/dlm/queries.h | |
10818 | --- linux-orig/cluster/dlm/queries.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 10819 | +++ linux-patched/cluster/dlm/queries.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
10820 | @@ -0,0 +1,20 @@ |
10821 | +/****************************************************************************** | |
10822 | +******************************************************************************* | |
10823 | +** | |
10824 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
10825 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
10826 | +** | |
10827 | +** This copyrighted material is made available to anyone wishing to use, | |
10828 | +** modify, copy, or redistribute it subject to the terms and conditions | |
10829 | +** of the GNU General Public License v.2. | |
10830 | +** | |
10831 | +******************************************************************************* | |
10832 | +******************************************************************************/ | |
10833 | + | |
10834 | +#ifndef __QUERIES_DOT_H__ | |
10835 | +#define __QUERIES_DOT_H__ | |
10836 | + | |
b7b72b66 AM |
10837 | +extern int remote_query(int nodeid, struct dlm_ls *ls, struct dlm_header *msg); |
10838 | +extern int remote_query_reply(int nodeid, struct dlm_ls *ls, struct dlm_header *msg); | |
c1c6733f AM |
10839 | + |
10840 | +#endif /* __QUERIES_DOT_H__ */ | |
10841 | diff -urN linux-orig/cluster/dlm/rebuild.c linux-patched/cluster/dlm/rebuild.c | |
10842 | --- linux-orig/cluster/dlm/rebuild.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 10843 | +++ linux-patched/cluster/dlm/rebuild.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 10844 | @@ -0,0 +1,1280 @@ |
c1c6733f AM |
10845 | +/****************************************************************************** |
10846 | +******************************************************************************* | |
10847 | +** | |
10848 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
10849 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
b7b72b66 | 10850 | +** |
c1c6733f AM |
10851 | +** This copyrighted material is made available to anyone wishing to use, |
10852 | +** modify, copy, or redistribute it subject to the terms and conditions | |
10853 | +** of the GNU General Public License v.2. | |
10854 | +** | |
10855 | +******************************************************************************* | |
10856 | +******************************************************************************/ | |
10857 | + | |
b7b72b66 | 10858 | +/* |
c1c6733f AM |
10859 | + * Rebuild RSB's on new masters. Functions for transferring locks and |
10860 | + * subresources to new RSB masters during recovery. | |
10861 | + */ | |
10862 | + | |
10863 | +#include "dlm_internal.h" | |
10864 | +#include "reccomms.h" | |
10865 | +#include "lkb.h" | |
10866 | +#include "rsb.h" | |
10867 | +#include "nodes.h" | |
10868 | +#include "config.h" | |
10869 | +#include "memory.h" | |
10870 | +#include "recover.h" | |
10871 | + | |
10872 | + | |
10873 | +/* Types of entity serialised in remastering messages */ | |
10874 | +#define REMASTER_ROOTRSB 1 | |
10875 | +#define REMASTER_RSB 2 | |
10876 | +#define REMASTER_LKB 3 | |
10877 | + | |
10878 | +struct rcom_fill { | |
10879 | + char * outbuf; /* Beginning of data */ | |
10880 | + int offset; /* Current offset into outbuf */ | |
10881 | + int maxlen; /* Max value of offset */ | |
10882 | + int remasterid; | |
10883 | + int count; | |
b7b72b66 AM |
10884 | + struct dlm_rsb * rsb; |
10885 | + struct dlm_rsb * subrsb; | |
10886 | + struct dlm_lkb * lkb; | |
c1c6733f AM |
10887 | + struct list_head * lkbqueue; |
10888 | + char more; | |
10889 | +}; | |
10890 | +typedef struct rcom_fill rcom_fill_t; | |
10891 | + | |
10892 | + | |
10893 | +struct rebuild_node { | |
10894 | + struct list_head list; | |
10895 | + int nodeid; | |
b7b72b66 | 10896 | + struct dlm_rsb * rootrsb; |
c1c6733f AM |
10897 | +}; |
10898 | +typedef struct rebuild_node rebuild_node_t; | |
10899 | + | |
10900 | + | |
b7b72b66 | 10901 | +/* |
c1c6733f AM |
10902 | + * Root rsb passed in for which all lkb's (own and subrsbs) will be sent to new |
10903 | + * master. The rsb will be "done" with recovery when the new master has | |
10904 | + * replied with all the new remote lockid's for this rsb's lkb's. | |
10905 | + */ | |
10906 | + | |
b7b72b66 | 10907 | +void expect_new_lkids(struct dlm_rsb *rsb) |
c1c6733f AM |
10908 | +{ |
10909 | + rsb->res_newlkid_expect = 0; | |
10910 | + recover_list_add(rsb); | |
10911 | +} | |
10912 | + | |
b7b72b66 | 10913 | +/* |
c1c6733f AM |
10914 | + * This function is called on root rsb or subrsb when another lkb is being sent |
10915 | + * to the new master for which we expect to receive a corresponding remote lkid | |
10916 | + */ | |
10917 | + | |
b7b72b66 | 10918 | +void need_new_lkid(struct dlm_rsb *rsb) |
c1c6733f | 10919 | +{ |
b7b72b66 | 10920 | + struct dlm_rsb *root = rsb; |
c1c6733f AM |
10921 | + |
10922 | + if (rsb->res_parent) | |
10923 | + root = rsb->res_root; | |
10924 | + | |
10925 | + if (!root->res_newlkid_expect) | |
10926 | + recover_list_add(root); | |
10927 | + else | |
b7b72b66 | 10928 | + DLM_ASSERT(test_bit(RESFL_RECOVER_LIST, &root->res_flags),); |
c1c6733f AM |
10929 | + |
10930 | + root->res_newlkid_expect++; | |
10931 | +} | |
10932 | + | |
b7b72b66 | 10933 | +/* |
c1c6733f AM |
10934 | + * This function is called for each lkb for which a new remote lkid is |
10935 | + * received. Decrement the expected number of remote lkids expected for the | |
10936 | + * root rsb. | |
10937 | + */ | |
10938 | + | |
b7b72b66 | 10939 | +void have_new_lkid(struct dlm_lkb *lkb) |
c1c6733f | 10940 | +{ |
b7b72b66 | 10941 | + struct dlm_rsb *root = lkb->lkb_resource; |
c1c6733f AM |
10942 | + |
10943 | + if (root->res_parent) | |
10944 | + root = root->res_root; | |
10945 | + | |
10946 | + down_write(&root->res_lock); | |
10947 | + | |
b7b72b66 AM |
10948 | + DLM_ASSERT(root->res_newlkid_expect, |
10949 | + printk("newlkid_expect=%d\n", root->res_newlkid_expect);); | |
c1c6733f AM |
10950 | + |
10951 | + root->res_newlkid_expect--; | |
10952 | + | |
10953 | + if (!root->res_newlkid_expect) { | |
10954 | + clear_bit(RESFL_NEW_MASTER, &root->res_flags); | |
10955 | + recover_list_del(root); | |
10956 | + } | |
10957 | + up_write(&root->res_lock); | |
10958 | +} | |
10959 | + | |
b7b72b66 | 10960 | +/* |
c1c6733f AM |
10961 | + * Return the rebuild struct for a node - will create an entry on the rootrsb |
10962 | + * list if necessary. | |
10963 | + * | |
b7b72b66 | 10964 | + * Currently no locking is needed here as it all happens in the dlm_recvd |
c1c6733f AM |
10965 | + * thread |
10966 | + */ | |
10967 | + | |
b7b72b66 | 10968 | +static rebuild_node_t *find_rebuild_root(struct dlm_ls *ls, int nodeid) |
c1c6733f AM |
10969 | +{ |
10970 | + rebuild_node_t *node = NULL; | |
10971 | + | |
10972 | + list_for_each_entry(node, &ls->ls_rebuild_rootrsb_list, list) { | |
10973 | + if (node->nodeid == nodeid) | |
10974 | + return node; | |
10975 | + } | |
10976 | + | |
10977 | + /* Not found, add one */ | |
10978 | + node = kmalloc(sizeof(rebuild_node_t), GFP_KERNEL); | |
10979 | + if (!node) | |
10980 | + return NULL; | |
10981 | + | |
10982 | + node->nodeid = nodeid; | |
10983 | + node->rootrsb = NULL; | |
10984 | + list_add(&node->list, &ls->ls_rebuild_rootrsb_list); | |
10985 | + | |
10986 | + return node; | |
10987 | +} | |
10988 | + | |
b7b72b66 | 10989 | +/* |
c1c6733f AM |
10990 | + * Tidy up after a rebuild run. Called when all recovery has finished |
10991 | + */ | |
10992 | + | |
b7b72b66 | 10993 | +void rebuild_freemem(struct dlm_ls *ls) |
c1c6733f AM |
10994 | +{ |
10995 | + rebuild_node_t *node = NULL, *s; | |
10996 | + | |
10997 | + list_for_each_entry_safe(node, s, &ls->ls_rebuild_rootrsb_list, list) { | |
10998 | + list_del(&node->list); | |
10999 | + kfree(node); | |
11000 | + } | |
11001 | +} | |
11002 | + | |
11003 | +static void put_int(int x, char *buf, int *offp) | |
11004 | +{ | |
11005 | + x = cpu_to_le32(x); | |
11006 | + memcpy(buf + *offp, &x, sizeof(int)); | |
11007 | + *offp += sizeof(int); | |
11008 | +} | |
11009 | + | |
11010 | +static void put_int64(uint64_t x, char *buf, int *offp) | |
11011 | +{ | |
11012 | + x = cpu_to_le64(x); | |
11013 | + memcpy(buf + *offp, &x, sizeof(uint64_t)); | |
11014 | + *offp += sizeof(uint64_t); | |
11015 | +} | |
11016 | + | |
11017 | +static void put_bytes(char *x, int len, char *buf, int *offp) | |
11018 | +{ | |
11019 | + put_int(len, buf, offp); | |
11020 | + memcpy(buf + *offp, x, len); | |
11021 | + *offp += len; | |
11022 | +} | |
11023 | + | |
11024 | +static void put_char(char x, char *buf, int *offp) | |
11025 | +{ | |
11026 | + buf[*offp] = x; | |
11027 | + *offp += 1; | |
11028 | +} | |
11029 | + | |
11030 | +static int get_int(char *buf, int *offp) | |
11031 | +{ | |
11032 | + int value; | |
11033 | + memcpy(&value, buf + *offp, sizeof(int)); | |
11034 | + *offp += sizeof(int); | |
11035 | + return le32_to_cpu(value); | |
11036 | +} | |
11037 | + | |
11038 | +static uint64_t get_int64(char *buf, int *offp) | |
11039 | +{ | |
11040 | + uint64_t value; | |
11041 | + | |
11042 | + memcpy(&value, buf + *offp, sizeof(uint64_t)); | |
11043 | + *offp += sizeof(uint64_t); | |
11044 | + return le64_to_cpu(value); | |
11045 | +} | |
11046 | + | |
11047 | +static char get_char(char *buf, int *offp) | |
11048 | +{ | |
11049 | + char x = buf[*offp]; | |
11050 | + | |
11051 | + *offp += 1; | |
11052 | + return x; | |
11053 | +} | |
11054 | + | |
11055 | +static void get_bytes(char *bytes, int *len, char *buf, int *offp) | |
11056 | +{ | |
11057 | + *len = get_int(buf, offp); | |
11058 | + memcpy(bytes, buf + *offp, *len); | |
11059 | + *offp += *len; | |
11060 | +} | |
11061 | + | |
b7b72b66 | 11062 | +static int lkb_length(struct dlm_lkb *lkb) |
c1c6733f AM |
11063 | +{ |
11064 | + int len = 0; | |
11065 | + | |
11066 | + len += sizeof(int); /* lkb_id */ | |
11067 | + len += sizeof(int); /* lkb_resource->res_reamasterid */ | |
11068 | + len += sizeof(int); /* lkb_flags */ | |
11069 | + len += sizeof(int); /* lkb_status */ | |
11070 | + len += sizeof(char); /* lkb_rqmode */ | |
11071 | + len += sizeof(char); /* lkb_grmode */ | |
11072 | + len += sizeof(int); /* lkb_childcnt */ | |
11073 | + len += sizeof(int); /* lkb_parent->lkb_id */ | |
11074 | + len += sizeof(int); /* lkb_bastaddr */ | |
b7b72b66 | 11075 | + len += sizeof(int); /* lkb_ownpid */ |
c1c6733f AM |
11076 | + |
11077 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
11078 | + len += sizeof(int); /* number of lvb bytes */ | |
11079 | + len += DLM_LVB_LEN; | |
11080 | + } | |
11081 | + | |
11082 | + if (lkb->lkb_range) { | |
11083 | + len += sizeof(uint64_t); | |
11084 | + len += sizeof(uint64_t); | |
11085 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT) { | |
11086 | + len += sizeof(uint64_t); | |
11087 | + len += sizeof(uint64_t); | |
11088 | + } | |
11089 | + } | |
11090 | + | |
11091 | + return len; | |
11092 | +} | |
11093 | + | |
b7b72b66 | 11094 | +/* |
c1c6733f AM |
11095 | + * It's up to the caller to be sure there's enough space in the buffer. |
11096 | + */ | |
11097 | + | |
b7b72b66 | 11098 | +static void serialise_lkb(struct dlm_lkb *lkb, char *buf, int *offp) |
c1c6733f AM |
11099 | +{ |
11100 | + int flags; | |
11101 | + | |
11102 | + /* Need to tell the remote end if we have a range */ | |
11103 | + flags = lkb->lkb_flags; | |
11104 | + if (lkb->lkb_range) | |
11105 | + flags |= GDLM_LKFLG_RANGE; | |
11106 | + | |
b7b72b66 | 11107 | + /* |
c1c6733f AM |
11108 | + * See lkb_length() |
11109 | + * Total: 30 (no lvb) or 66 (with lvb) bytes | |
11110 | + */ | |
11111 | + | |
11112 | + put_int(lkb->lkb_id, buf, offp); | |
11113 | + put_int(lkb->lkb_resource->res_remasterid, buf, offp); | |
11114 | + put_int(flags, buf, offp); | |
11115 | + put_int(lkb->lkb_status, buf, offp); | |
11116 | + put_char(lkb->lkb_rqmode, buf, offp); | |
11117 | + put_char(lkb->lkb_grmode, buf, offp); | |
11118 | + put_int(atomic_read(&lkb->lkb_childcnt), buf, offp); | |
11119 | + | |
11120 | + if (lkb->lkb_parent) | |
11121 | + put_int(lkb->lkb_parent->lkb_id, buf, offp); | |
11122 | + else | |
11123 | + put_int(0, buf, offp); | |
11124 | + | |
11125 | + if (lkb->lkb_bastaddr) | |
11126 | + put_int(1, buf, offp); | |
11127 | + else | |
11128 | + put_int(0, buf, offp); | |
b7b72b66 | 11129 | + put_int(lkb->lkb_ownpid, buf, offp); |
c1c6733f AM |
11130 | + |
11131 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
b7b72b66 | 11132 | + DLM_ASSERT(lkb->lkb_lvbptr,); |
c1c6733f AM |
11133 | + put_bytes(lkb->lkb_lvbptr, DLM_LVB_LEN, buf, offp); |
11134 | + } | |
11135 | + | |
11136 | + /* Only send the range we actually need */ | |
11137 | + if (lkb->lkb_range) { | |
11138 | + switch (lkb->lkb_status) { | |
11139 | + case GDLM_LKSTS_CONVERT: | |
11140 | + put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp); | |
11141 | + put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp); | |
11142 | + put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp); | |
11143 | + put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp); | |
11144 | + break; | |
11145 | + case GDLM_LKSTS_WAITING: | |
11146 | + put_int64(lkb->lkb_range[RQ_RANGE_START], buf, offp); | |
11147 | + put_int64(lkb->lkb_range[RQ_RANGE_END], buf, offp); | |
11148 | + break; | |
11149 | + case GDLM_LKSTS_GRANTED: | |
11150 | + put_int64(lkb->lkb_range[GR_RANGE_START], buf, offp); | |
11151 | + put_int64(lkb->lkb_range[GR_RANGE_END], buf, offp); | |
11152 | + break; | |
11153 | + default: | |
b7b72b66 | 11154 | + DLM_ASSERT(0,); |
c1c6733f AM |
11155 | + } |
11156 | + } | |
11157 | +} | |
11158 | + | |
b7b72b66 | 11159 | +static int rsb_length(struct dlm_rsb *rsb) |
c1c6733f AM |
11160 | +{ |
11161 | + int len = 0; | |
11162 | + | |
11163 | + len += sizeof(int); /* number of res_name bytes */ | |
11164 | + len += rsb->res_length; /* res_name */ | |
11165 | + len += sizeof(int); /* res_remasterid */ | |
11166 | + len += sizeof(int); /* res_parent->res_remasterid */ | |
11167 | + | |
11168 | + return len; | |
11169 | +} | |
11170 | + | |
b7b72b66 | 11171 | +static inline struct dlm_rsb *next_subrsb(struct dlm_rsb *subrsb) |
c1c6733f AM |
11172 | +{ |
11173 | + struct list_head *tmp; | |
b7b72b66 | 11174 | + struct dlm_rsb *r; |
c1c6733f AM |
11175 | + |
11176 | + tmp = subrsb->res_subreslist.next; | |
b7b72b66 | 11177 | + r = list_entry(tmp, struct dlm_rsb, res_subreslist); |
c1c6733f AM |
11178 | + |
11179 | + return r; | |
11180 | +} | |
11181 | + | |
b7b72b66 | 11182 | +static inline int last_in_list(struct dlm_rsb *r, struct list_head *head) |
c1c6733f | 11183 | +{ |
b7b72b66 AM |
11184 | + struct dlm_rsb *last; |
11185 | + last = list_entry(head->prev, struct dlm_rsb, res_subreslist); | |
c1c6733f AM |
11186 | + if (last == r) |
11187 | + return 1; | |
11188 | + return 0; | |
11189 | +} | |
11190 | + | |
b7b72b66 AM |
11191 | +static int lkbs_to_remaster_list(struct list_head *head) |
11192 | +{ | |
11193 | + struct dlm_lkb *lkb; | |
11194 | + | |
11195 | + list_for_each_entry(lkb, head, lkb_statequeue) { | |
11196 | + if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD) | |
11197 | + continue; | |
11198 | + return TRUE; | |
11199 | + } | |
11200 | + return FALSE; | |
11201 | +} | |
11202 | + | |
11203 | +/* | |
c1c6733f | 11204 | + * Used to decide if an rsb should be rebuilt on a new master. An rsb only |
b7b72b66 AM |
11205 | + * needs to be rebuild if we have lkb's queued on it. NOREBUILD lkb's are not |
11206 | + * rebuilt. | |
c1c6733f AM |
11207 | + */ |
11208 | + | |
b7b72b66 | 11209 | +static int lkbs_to_remaster(struct dlm_rsb *r) |
c1c6733f | 11210 | +{ |
b7b72b66 | 11211 | + struct dlm_rsb *sub; |
c1c6733f | 11212 | + |
b7b72b66 | 11213 | + if (lkbs_to_remaster_list(&r->res_grantqueue)) |
c1c6733f | 11214 | + return TRUE; |
b7b72b66 AM |
11215 | + if (lkbs_to_remaster_list(&r->res_convertqueue)) |
11216 | + return TRUE; | |
11217 | + if (lkbs_to_remaster_list(&r->res_waitqueue)) | |
c1c6733f | 11218 | + return TRUE; |
c1c6733f AM |
11219 | + |
11220 | + list_for_each_entry(sub, &r->res_subreslist, res_subreslist) { | |
b7b72b66 | 11221 | + if (lkbs_to_remaster_list(&sub->res_grantqueue)) |
c1c6733f | 11222 | + return TRUE; |
b7b72b66 AM |
11223 | + if (lkbs_to_remaster_list(&sub->res_convertqueue)) |
11224 | + return TRUE; | |
11225 | + if (lkbs_to_remaster_list(&sub->res_waitqueue)) | |
c1c6733f | 11226 | + return TRUE; |
c1c6733f AM |
11227 | + } |
11228 | + | |
11229 | + return FALSE; | |
11230 | +} | |
11231 | + | |
b7b72b66 | 11232 | +static void serialise_rsb(struct dlm_rsb *rsb, char *buf, int *offp) |
c1c6733f | 11233 | +{ |
b7b72b66 | 11234 | + /* |
c1c6733f AM |
11235 | + * See rsb_length() |
11236 | + * Total: 36 bytes (4 + 24 + 4 + 4) | |
11237 | + */ | |
11238 | + | |
11239 | + put_bytes(rsb->res_name, rsb->res_length, buf, offp); | |
11240 | + put_int(rsb->res_remasterid, buf, offp); | |
11241 | + | |
11242 | + if (rsb->res_parent) | |
11243 | + put_int(rsb->res_parent->res_remasterid, buf, offp); | |
11244 | + else | |
11245 | + put_int(0, buf, offp); | |
11246 | + | |
b7b72b66 | 11247 | + DLM_ASSERT(!rsb->res_lvbptr,); |
c1c6733f AM |
11248 | +} |
11249 | + | |
b7b72b66 | 11250 | +/* |
c1c6733f AM |
11251 | + * Flatten an LKB into a buffer for sending to the new RSB master. As a |
11252 | + * side-effect the nodeid of the lock is set to the nodeid of the new RSB | |
11253 | + * master. | |
11254 | + */ | |
11255 | + | |
b7b72b66 AM |
11256 | +static int pack_one_lkb(struct dlm_rsb *r, struct dlm_lkb *lkb, |
11257 | + rcom_fill_t *fill) | |
c1c6733f AM |
11258 | +{ |
11259 | + if (fill->offset + 1 + lkb_length(lkb) > fill->maxlen) | |
11260 | + goto nospace; | |
11261 | + | |
11262 | + lkb->lkb_nodeid = r->res_nodeid; | |
11263 | + | |
11264 | + put_char(REMASTER_LKB, fill->outbuf, &fill->offset); | |
11265 | + serialise_lkb(lkb, fill->outbuf, &fill->offset); | |
11266 | + | |
11267 | + fill->count++; | |
11268 | + need_new_lkid(r); | |
11269 | + return 0; | |
11270 | + | |
11271 | + nospace: | |
11272 | + return -ENOSPC; | |
11273 | +} | |
11274 | + | |
b7b72b66 | 11275 | +/* |
c1c6733f AM |
11276 | + * Pack all LKB's from a given queue, except for those with the NOREBUILD flag. |
11277 | + */ | |
11278 | + | |
b7b72b66 | 11279 | +static int pack_lkb_queue(struct dlm_rsb *r, struct list_head *queue, |
c1c6733f AM |
11280 | + rcom_fill_t *fill) |
11281 | +{ | |
b7b72b66 | 11282 | + struct dlm_lkb *lkb; |
c1c6733f AM |
11283 | + int error; |
11284 | + | |
11285 | + list_for_each_entry(lkb, queue, lkb_statequeue) { | |
11286 | + if (lkb->lkb_flags & GDLM_LKFLG_NOREBUILD) | |
11287 | + continue; | |
11288 | + | |
11289 | + error = pack_one_lkb(r, lkb, fill); | |
11290 | + if (error) | |
11291 | + goto nospace; | |
11292 | + } | |
11293 | + | |
11294 | + return 0; | |
11295 | + | |
11296 | + nospace: | |
11297 | + fill->lkb = lkb; | |
11298 | + fill->lkbqueue = queue; | |
11299 | + | |
11300 | + return error; | |
11301 | +} | |
11302 | + | |
b7b72b66 | 11303 | +static int pack_lkb_queues(struct dlm_rsb *r, rcom_fill_t *fill) |
c1c6733f AM |
11304 | +{ |
11305 | + int error; | |
11306 | + | |
11307 | + error = pack_lkb_queue(r, &r->res_grantqueue, fill); | |
11308 | + if (error) | |
11309 | + goto nospace; | |
11310 | + | |
11311 | + error = pack_lkb_queue(r, &r->res_convertqueue, fill); | |
11312 | + if (error) | |
11313 | + goto nospace; | |
11314 | + | |
11315 | + error = pack_lkb_queue(r, &r->res_waitqueue, fill); | |
11316 | + | |
11317 | + nospace: | |
11318 | + return error; | |
11319 | +} | |
11320 | + | |
b7b72b66 | 11321 | +/* |
c1c6733f AM |
11322 | + * Pack remaining lkb's for rsb or subrsb. This may include a partial lkb |
11323 | + * queue and full lkb queues. | |
11324 | + */ | |
11325 | + | |
b7b72b66 | 11326 | +static int pack_lkb_remaining(struct dlm_rsb *r, rcom_fill_t *fill) |
c1c6733f AM |
11327 | +{ |
11328 | + struct list_head *tmp, *start, *end; | |
b7b72b66 | 11329 | + struct dlm_lkb *lkb; |
c1c6733f AM |
11330 | + int error; |
11331 | + | |
b7b72b66 | 11332 | + /* |
c1c6733f AM |
11333 | + * Beginning with fill->lkb, pack remaining lkb's on fill->lkbqueue. |
11334 | + */ | |
11335 | + | |
11336 | + error = pack_one_lkb(r, fill->lkb, fill); | |
11337 | + if (error) | |
11338 | + goto out; | |
11339 | + | |
11340 | + start = fill->lkb->lkb_statequeue.next; | |
11341 | + end = fill->lkbqueue; | |
11342 | + | |
11343 | + for (tmp = start; tmp != end; tmp = tmp->next) { | |
b7b72b66 | 11344 | + lkb = list_entry(tmp, struct dlm_lkb, lkb_statequeue); |
c1c6733f AM |
11345 | + |
11346 | + error = pack_one_lkb(r, lkb, fill); | |
11347 | + if (error) { | |
11348 | + fill->lkb = lkb; | |
11349 | + goto out; | |
11350 | + } | |
11351 | + } | |
11352 | + | |
b7b72b66 | 11353 | + /* |
c1c6733f AM |
11354 | + * Pack all lkb's on r's queues following fill->lkbqueue. |
11355 | + */ | |
11356 | + | |
11357 | + if (fill->lkbqueue == &r->res_waitqueue) | |
11358 | + goto out; | |
11359 | + if (fill->lkbqueue == &r->res_convertqueue) | |
11360 | + goto skip; | |
11361 | + | |
b7b72b66 | 11362 | + DLM_ASSERT(fill->lkbqueue == &r->res_grantqueue,); |
c1c6733f AM |
11363 | + |
11364 | + error = pack_lkb_queue(r, &r->res_convertqueue, fill); | |
11365 | + if (error) | |
11366 | + goto out; | |
11367 | + skip: | |
11368 | + error = pack_lkb_queue(r, &r->res_waitqueue, fill); | |
11369 | + | |
11370 | + out: | |
11371 | + return error; | |
11372 | +} | |
11373 | + | |
b7b72b66 AM |
11374 | +static int pack_one_subrsb(struct dlm_rsb *rsb, struct dlm_rsb *subrsb, |
11375 | + rcom_fill_t *fill) | |
c1c6733f AM |
11376 | +{ |
11377 | + int error; | |
11378 | + | |
11379 | + down_write(&subrsb->res_lock); | |
11380 | + | |
11381 | + if (fill->offset + 1 + rsb_length(subrsb) > fill->maxlen) | |
11382 | + goto nospace; | |
11383 | + | |
11384 | + subrsb->res_nodeid = rsb->res_nodeid; | |
11385 | + subrsb->res_remasterid = ++fill->remasterid; | |
11386 | + | |
11387 | + put_char(REMASTER_RSB, fill->outbuf, &fill->offset); | |
11388 | + serialise_rsb(subrsb, fill->outbuf, &fill->offset); | |
11389 | + | |
11390 | + error = pack_lkb_queues(subrsb, fill); | |
11391 | + if (error) | |
11392 | + goto nospace; | |
11393 | + | |
11394 | + up_write(&subrsb->res_lock); | |
11395 | + | |
11396 | + return 0; | |
11397 | + | |
11398 | + nospace: | |
11399 | + up_write(&subrsb->res_lock); | |
11400 | + fill->subrsb = subrsb; | |
11401 | + | |
11402 | + return -ENOSPC; | |
11403 | +} | |
11404 | + | |
b7b72b66 AM |
11405 | +static int pack_subrsbs(struct dlm_rsb *rsb, struct dlm_rsb *in_subrsb, |
11406 | + rcom_fill_t *fill) | |
c1c6733f | 11407 | +{ |
b7b72b66 | 11408 | + struct dlm_rsb *subrsb; |
c1c6733f AM |
11409 | + int error = 0; |
11410 | + | |
b7b72b66 | 11411 | + /* |
c1c6733f AM |
11412 | + * When an initial subrsb is given, we know it needs to be packed. |
11413 | + * When no initial subrsb is given, begin with the first (if any exist). | |
11414 | + */ | |
11415 | + | |
11416 | + if (!in_subrsb) { | |
11417 | + if (list_empty(&rsb->res_subreslist)) | |
11418 | + goto out; | |
11419 | + | |
b7b72b66 | 11420 | + subrsb = list_entry(rsb->res_subreslist.next, struct dlm_rsb, |
c1c6733f AM |
11421 | + res_subreslist); |
11422 | + } else | |
11423 | + subrsb = in_subrsb; | |
11424 | + | |
11425 | + for (;;) { | |
11426 | + error = pack_one_subrsb(rsb, subrsb, fill); | |
11427 | + if (error) | |
11428 | + goto out; | |
11429 | + | |
11430 | + if (last_in_list(subrsb, &rsb->res_subreslist)) | |
11431 | + break; | |
11432 | + | |
11433 | + subrsb = next_subrsb(subrsb); | |
11434 | + } | |
11435 | + | |
11436 | + out: | |
11437 | + return error; | |
11438 | +} | |
11439 | + | |
b7b72b66 | 11440 | +/* |
c1c6733f AM |
11441 | + * Finish packing whatever is left in an rsb tree. If space runs out while |
11442 | + * finishing, save subrsb/lkb and this will be called again for the same rsb. | |
11443 | + * | |
11444 | + * !subrsb && lkb, we left off part way through root rsb's lkbs. | |
11445 | + * subrsb && !lkb, we left off just before starting a new subrsb. | |
11446 | + * subrsb && lkb, we left off part way through a subrsb's lkbs. | |
11447 | + * !subrsb && !lkb, we shouldn't be in this function, but starting | |
11448 | + * a new rsb in pack_rsb_tree(). | |
11449 | + */ | |
11450 | + | |
b7b72b66 | 11451 | +static int pack_rsb_tree_remaining(struct dlm_ls *ls, struct dlm_rsb *rsb, |
c1c6733f AM |
11452 | + rcom_fill_t *fill) |
11453 | +{ | |
b7b72b66 | 11454 | + struct dlm_rsb *subrsb = NULL; |
c1c6733f AM |
11455 | + int error = 0; |
11456 | + | |
11457 | + if (!fill->subrsb && fill->lkb) { | |
11458 | + error = pack_lkb_remaining(rsb, fill); | |
11459 | + if (error) | |
11460 | + goto out; | |
11461 | + | |
11462 | + error = pack_subrsbs(rsb, NULL, fill); | |
11463 | + if (error) | |
11464 | + goto out; | |
11465 | + } | |
11466 | + | |
11467 | + else if (fill->subrsb && !fill->lkb) { | |
11468 | + error = pack_subrsbs(rsb, fill->subrsb, fill); | |
11469 | + if (error) | |
11470 | + goto out; | |
11471 | + } | |
11472 | + | |
11473 | + else if (fill->subrsb && fill->lkb) { | |
11474 | + error = pack_lkb_remaining(fill->subrsb, fill); | |
11475 | + if (error) | |
11476 | + goto out; | |
11477 | + | |
11478 | + if (last_in_list(fill->subrsb, &fill->rsb->res_subreslist)) | |
11479 | + goto out; | |
11480 | + | |
11481 | + subrsb = next_subrsb(fill->subrsb); | |
11482 | + | |
11483 | + error = pack_subrsbs(rsb, subrsb, fill); | |
11484 | + if (error) | |
11485 | + goto out; | |
11486 | + } | |
11487 | + | |
11488 | + fill->subrsb = NULL; | |
11489 | + fill->lkb = NULL; | |
11490 | + | |
11491 | + out: | |
11492 | + return error; | |
11493 | +} | |
11494 | + | |
b7b72b66 | 11495 | +/* |
c1c6733f AM |
11496 | + * Pack an RSB, all its LKB's, all its subrsb's and all their LKB's into a |
11497 | + * buffer. When the buffer runs out of space, save the place to restart (the | |
11498 | + * queue+lkb, subrsb, or subrsb+queue+lkb which wouldn't fit). | |
11499 | + */ | |
11500 | + | |
b7b72b66 AM |
11501 | +static int pack_rsb_tree(struct dlm_ls *ls, struct dlm_rsb *rsb, |
11502 | + rcom_fill_t *fill) | |
c1c6733f AM |
11503 | +{ |
11504 | + int error = -ENOSPC; | |
11505 | + | |
11506 | + fill->remasterid = 0; | |
11507 | + | |
b7b72b66 | 11508 | + /* |
c1c6733f AM |
11509 | + * Pack the root rsb itself. A 1 byte type precedes the serialised |
11510 | + * rsb. Then pack the lkb's for the root rsb. | |
11511 | + */ | |
11512 | + | |
11513 | + down_write(&rsb->res_lock); | |
11514 | + | |
11515 | + if (fill->offset + 1 + rsb_length(rsb) > fill->maxlen) | |
11516 | + goto out; | |
11517 | + | |
11518 | + rsb->res_remasterid = ++fill->remasterid; | |
11519 | + put_char(REMASTER_ROOTRSB, fill->outbuf, &fill->offset); | |
11520 | + serialise_rsb(rsb, fill->outbuf, &fill->offset); | |
11521 | + | |
11522 | + error = pack_lkb_queues(rsb, fill); | |
11523 | + if (error) | |
11524 | + goto out; | |
11525 | + | |
11526 | + up_write(&rsb->res_lock); | |
11527 | + | |
b7b72b66 | 11528 | + /* |
c1c6733f AM |
11529 | + * Pack subrsb/lkb's under the root rsb. |
11530 | + */ | |
11531 | + | |
11532 | + error = pack_subrsbs(rsb, NULL, fill); | |
11533 | + | |
11534 | + return error; | |
11535 | + | |
11536 | + out: | |
11537 | + up_write(&rsb->res_lock); | |
11538 | + return error; | |
11539 | +} | |
11540 | + | |
b7b72b66 | 11541 | +/* |
c1c6733f AM |
11542 | + * Given an RSB, return the next RSB that should be sent to a new master. |
11543 | + */ | |
11544 | + | |
b7b72b66 AM |
11545 | +static struct dlm_rsb *next_remastered_rsb(struct dlm_ls *ls, |
11546 | + struct dlm_rsb *rsb) | |
c1c6733f AM |
11547 | +{ |
11548 | + struct list_head *tmp, *start, *end; | |
b7b72b66 | 11549 | + struct dlm_rsb *r; |
c1c6733f AM |
11550 | + |
11551 | + if (!rsb) | |
11552 | + start = ls->ls_rootres.next; | |
11553 | + else | |
11554 | + start = rsb->res_rootlist.next; | |
11555 | + | |
11556 | + end = &ls->ls_rootres; | |
11557 | + | |
11558 | + for (tmp = start; tmp != end; tmp = tmp->next) { | |
b7b72b66 | 11559 | + r = list_entry(tmp, struct dlm_rsb, res_rootlist); |
c1c6733f AM |
11560 | + |
11561 | + if (test_bit(RESFL_NEW_MASTER, &r->res_flags)) { | |
11562 | + if (r->res_nodeid && lkbs_to_remaster(r)) { | |
11563 | + expect_new_lkids(r); | |
11564 | + return r; | |
11565 | + } else | |
11566 | + clear_bit(RESFL_NEW_MASTER, &r->res_flags); | |
11567 | + } | |
11568 | + } | |
11569 | + | |
11570 | + return NULL; | |
11571 | +} | |
11572 | + | |
b7b72b66 | 11573 | +/* |
c1c6733f AM |
11574 | + * Given an rcom buffer, fill it with RSB's that need to be sent to a single |
11575 | + * new master node. In the case where all the data to send to one node | |
11576 | + * requires multiple messages, this function needs to resume filling each | |
11577 | + * successive buffer from the point where it left off when the previous buffer | |
11578 | + * filled up. | |
11579 | + */ | |
11580 | + | |
b7b72b66 AM |
11581 | +static void fill_rcom_buffer(struct dlm_ls *ls, rcom_fill_t *fill, |
11582 | + uint32_t *nodeid) | |
c1c6733f | 11583 | +{ |
b7b72b66 | 11584 | + struct dlm_rsb *rsb, *prev_rsb = fill->rsb; |
c1c6733f AM |
11585 | + int error; |
11586 | + | |
11587 | + fill->offset = 0; | |
11588 | + | |
11589 | + if (!prev_rsb) { | |
11590 | + | |
b7b72b66 | 11591 | + /* |
c1c6733f AM |
11592 | + * The first time this function is called. |
11593 | + */ | |
11594 | + | |
11595 | + rsb = next_remastered_rsb(ls, NULL); | |
11596 | + if (!rsb) | |
11597 | + goto no_more; | |
11598 | + | |
11599 | + } else if (fill->subrsb || fill->lkb) { | |
11600 | + | |
b7b72b66 | 11601 | + /* |
c1c6733f AM |
11602 | + * Continue packing an rsb tree that was partially packed last |
11603 | + * time (fill->subrsb/lkb indicates where packing of last block | |
11604 | + * left off) | |
11605 | + */ | |
11606 | + | |
11607 | + rsb = prev_rsb; | |
11608 | + *nodeid = rsb->res_nodeid; | |
11609 | + | |
11610 | + error = pack_rsb_tree_remaining(ls, rsb, fill); | |
11611 | + if (error == -ENOSPC) | |
11612 | + goto more; | |
11613 | + | |
11614 | + rsb = next_remastered_rsb(ls, prev_rsb); | |
11615 | + if (!rsb) | |
11616 | + goto no_more; | |
11617 | + | |
11618 | + if (rsb->res_nodeid != prev_rsb->res_nodeid) | |
11619 | + goto more; | |
11620 | + } else { | |
11621 | + rsb = prev_rsb; | |
11622 | + } | |
11623 | + | |
b7b72b66 | 11624 | + /* |
c1c6733f AM |
11625 | + * Pack rsb trees into the buffer until we run out of space, run out of |
11626 | + * new rsb's or hit a new nodeid. | |
11627 | + */ | |
11628 | + | |
11629 | + *nodeid = rsb->res_nodeid; | |
11630 | + | |
11631 | + for (;;) { | |
11632 | + error = pack_rsb_tree(ls, rsb, fill); | |
11633 | + if (error == -ENOSPC) | |
11634 | + goto more; | |
11635 | + | |
11636 | + prev_rsb = rsb; | |
11637 | + | |
11638 | + rsb = next_remastered_rsb(ls, prev_rsb); | |
11639 | + if (!rsb) | |
11640 | + goto no_more; | |
11641 | + | |
11642 | + if (rsb->res_nodeid != prev_rsb->res_nodeid) | |
11643 | + goto more; | |
11644 | + } | |
11645 | + | |
11646 | + more: | |
11647 | + fill->more = 1; | |
11648 | + fill->rsb = rsb; | |
11649 | + return; | |
11650 | + | |
11651 | + no_more: | |
11652 | + fill->more = 0; | |
11653 | +} | |
11654 | + | |
b7b72b66 | 11655 | +/* |
c1c6733f AM |
11656 | + * Send lkb's (and subrsb/lkbs) for remastered root rsbs to new masters. |
11657 | + */ | |
11658 | + | |
b7b72b66 | 11659 | +int rebuild_rsbs_send(struct dlm_ls *ls) |
c1c6733f | 11660 | +{ |
b7b72b66 | 11661 | + struct dlm_rcom *rc; |
c1c6733f AM |
11662 | + rcom_fill_t fill; |
11663 | + uint32_t nodeid; | |
11664 | + int error; | |
11665 | + | |
b7b72b66 | 11666 | + DLM_ASSERT(recover_list_empty(ls),); |
c1c6733f AM |
11667 | + |
11668 | + log_all(ls, "rebuild locks"); | |
11669 | + | |
11670 | + error = -ENOMEM; | |
11671 | + rc = allocate_rcom_buffer(ls); | |
11672 | + if (!rc) | |
11673 | + goto ret; | |
11674 | + | |
b7b72b66 AM |
11675 | + down_read(&ls->ls_root_lock); |
11676 | + | |
c1c6733f AM |
11677 | + error = 0; |
11678 | + memset(&fill, 0, sizeof(rcom_fill_t)); | |
11679 | + fill.outbuf = rc->rc_buf; | |
b7b72b66 | 11680 | + fill.maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); |
c1c6733f AM |
11681 | + |
11682 | + do { | |
11683 | + fill_rcom_buffer(ls, &fill, &nodeid); | |
11684 | + if (!fill.offset) | |
11685 | + break; | |
11686 | + | |
11687 | + rc->rc_datalen = fill.offset; | |
11688 | + error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKS, rc, 0); | |
b7b72b66 AM |
11689 | + if (error) { |
11690 | + up_read(&ls->ls_root_lock); | |
c1c6733f | 11691 | + goto out; |
b7b72b66 | 11692 | + } |
c1c6733f AM |
11693 | + |
11694 | + schedule(); | |
b7b72b66 AM |
11695 | + error = dlm_recovery_stopped(ls); |
11696 | + if (error) { | |
11697 | + up_read(&ls->ls_root_lock); | |
c1c6733f | 11698 | + goto out; |
b7b72b66 | 11699 | + } |
c1c6733f AM |
11700 | + } |
11701 | + while (fill.more); | |
11702 | + | |
b7b72b66 AM |
11703 | + up_read(&ls->ls_root_lock); |
11704 | + | |
11705 | + error = dlm_wait_function(ls, &recover_list_empty); | |
c1c6733f AM |
11706 | + |
11707 | + log_all(ls, "rebuilt %d locks", fill.count); | |
11708 | + | |
11709 | + out: | |
c1c6733f AM |
11710 | + free_rcom_buffer(rc); |
11711 | + | |
11712 | + ret: | |
11713 | + return error; | |
11714 | +} | |
11715 | + | |
b7b72b66 AM |
11716 | +static struct dlm_rsb *find_by_remasterid(struct dlm_ls *ls, int remasterid, |
11717 | + struct dlm_rsb *rootrsb) | |
c1c6733f | 11718 | +{ |
b7b72b66 | 11719 | + struct dlm_rsb *rsb; |
c1c6733f | 11720 | + |
b7b72b66 | 11721 | + DLM_ASSERT(rootrsb,); |
c1c6733f AM |
11722 | + |
11723 | + if (rootrsb->res_remasterid == remasterid) { | |
11724 | + rsb = rootrsb; | |
11725 | + goto out; | |
11726 | + } | |
11727 | + | |
11728 | + list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) { | |
11729 | + if (rsb->res_remasterid == remasterid) | |
11730 | + goto out; | |
11731 | + } | |
11732 | + rsb = NULL; | |
11733 | + | |
11734 | + out: | |
11735 | + return rsb; | |
11736 | +} | |
11737 | + | |
b7b72b66 | 11738 | +/* |
c1c6733f AM |
11739 | + * Search a queue for the given remote lock id (remlkid). |
11740 | + */ | |
11741 | + | |
b7b72b66 AM |
11742 | +static struct dlm_lkb *search_remlkid(struct list_head *statequeue, int nodeid, |
11743 | + int remid) | |
c1c6733f | 11744 | +{ |
b7b72b66 | 11745 | + struct dlm_lkb *lkb; |
c1c6733f AM |
11746 | + |
11747 | + list_for_each_entry(lkb, statequeue, lkb_statequeue) { | |
11748 | + if (lkb->lkb_nodeid == nodeid && lkb->lkb_remid == remid) { | |
11749 | + return lkb; | |
11750 | + } | |
11751 | + } | |
11752 | + | |
11753 | + return NULL; | |
11754 | +} | |
11755 | + | |
b7b72b66 | 11756 | +/* |
c1c6733f AM |
11757 | + * Given a remote lock ID (and a parent resource), return the local LKB for it |
11758 | + * Hopefully we dont need to do this too often on deep lock trees. This is | |
11759 | + * VERY suboptimal for anything but the smallest lock trees. It searches the | |
11760 | + * lock tree for an LKB with the remote id "remid" and the node "nodeid" and | |
11761 | + * returns the LKB address. OPTIMISATION: we should keep a list of these while | |
11762 | + * we are building up the remastered LKBs | |
11763 | + */ | |
11764 | + | |
b7b72b66 AM |
11765 | +static struct dlm_lkb *find_by_remlkid(struct dlm_rsb *rootrsb, int nodeid, |
11766 | + int remid) | |
c1c6733f | 11767 | +{ |
b7b72b66 AM |
11768 | + struct dlm_lkb *lkb; |
11769 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
11770 | + |
11771 | + lkb = search_remlkid(&rootrsb->res_grantqueue, nodeid, remid); | |
11772 | + if (lkb) | |
11773 | + goto out; | |
11774 | + | |
11775 | + lkb = search_remlkid(&rootrsb->res_convertqueue, nodeid, remid); | |
11776 | + if (lkb) | |
11777 | + goto out; | |
11778 | + | |
11779 | + lkb = search_remlkid(&rootrsb->res_waitqueue, nodeid, remid); | |
11780 | + if (lkb) | |
11781 | + goto out; | |
11782 | + | |
11783 | + list_for_each_entry(rsb, &rootrsb->res_subreslist, res_subreslist) { | |
11784 | + lkb = search_remlkid(&rsb->res_grantqueue, nodeid, remid); | |
11785 | + if (lkb) | |
11786 | + goto out; | |
11787 | + | |
11788 | + lkb = search_remlkid(&rsb->res_convertqueue, nodeid, remid); | |
11789 | + if (lkb) | |
11790 | + goto out; | |
11791 | + | |
11792 | + lkb = search_remlkid(&rsb->res_waitqueue, nodeid, remid); | |
11793 | + if (lkb) | |
11794 | + goto out; | |
11795 | + } | |
11796 | + lkb = NULL; | |
11797 | + | |
11798 | + out: | |
11799 | + return lkb; | |
11800 | +} | |
11801 | + | |
b7b72b66 | 11802 | +/* |
c1c6733f AM |
11803 | + * Unpack an LKB from a remaster operation |
11804 | + */ | |
11805 | + | |
b7b72b66 AM |
11806 | +static int deserialise_lkb(struct dlm_ls *ls, int rem_nodeid, |
11807 | + struct dlm_rsb *rootrsb, char *buf, int *ptr, | |
11808 | + char *outbuf, int *outoffp) | |
c1c6733f | 11809 | +{ |
b7b72b66 AM |
11810 | + struct dlm_lkb *lkb, *exist_lkb = NULL; |
11811 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
11812 | + int error = -ENOMEM, parentid, rsb_rmid, remote_lkid, status, temp; |
11813 | + | |
11814 | + remote_lkid = get_int(buf, ptr); | |
11815 | + | |
11816 | + rsb_rmid = get_int(buf, ptr); | |
11817 | + rsb = find_by_remasterid(ls, rsb_rmid, rootrsb); | |
b7b72b66 | 11818 | + DLM_ASSERT(rsb, printk("no RSB for remasterid %d\n", rsb_rmid);); |
c1c6733f | 11819 | + |
b7b72b66 | 11820 | + /* |
c1c6733f | 11821 | + * We could have received this lkb already from a previous recovery |
b7b72b66 AM |
11822 | + * that was interrupted. We still need to advance ptr so read in |
11823 | + * lkb and then release it. FIXME: verify this is valid. | |
c1c6733f AM |
11824 | + */ |
11825 | + lkb = find_by_remlkid(rsb, rem_nodeid, remote_lkid); | |
b7b72b66 AM |
11826 | + if (lkb) { |
11827 | + log_all(ls, "lkb %x exists %s", remote_lkid, rsb->res_name); | |
11828 | + exist_lkb = lkb; | |
11829 | + } | |
c1c6733f | 11830 | + |
b7b72b66 | 11831 | + lkb = create_lkb(ls); |
c1c6733f AM |
11832 | + if (!lkb) |
11833 | + goto out; | |
11834 | + | |
11835 | + lkb->lkb_remid = remote_lkid; | |
11836 | + lkb->lkb_flags = get_int(buf, ptr); | |
11837 | + status = get_int(buf, ptr); | |
11838 | + lkb->lkb_rqmode = get_char(buf, ptr); | |
11839 | + lkb->lkb_grmode = get_char(buf, ptr); | |
11840 | + atomic_set(&lkb->lkb_childcnt, get_int(buf, ptr)); | |
11841 | + | |
11842 | + parentid = get_int(buf, ptr); | |
11843 | + lkb->lkb_bastaddr = (void *) (long) get_int(buf, ptr); | |
b7b72b66 | 11844 | + lkb->lkb_ownpid = get_int(buf, ptr); |
c1c6733f AM |
11845 | + |
11846 | + if (lkb->lkb_flags & GDLM_LKFLG_VALBLK) { | |
11847 | + lkb->lkb_lvbptr = allocate_lvb(ls); | |
11848 | + if (!lkb->lkb_lvbptr) | |
11849 | + goto out; | |
11850 | + get_bytes(lkb->lkb_lvbptr, &temp, buf, ptr); | |
11851 | + } | |
11852 | + | |
11853 | + if (lkb->lkb_flags & GDLM_LKFLG_RANGE) { | |
11854 | + uint64_t start, end; | |
11855 | + | |
11856 | + /* Don't need to keep the range flag, for comms use only */ | |
11857 | + lkb->lkb_flags &= ~GDLM_LKFLG_RANGE; | |
11858 | + start = get_int64(buf, ptr); | |
11859 | + end = get_int64(buf, ptr); | |
11860 | + | |
b7b72b66 | 11861 | + lkb->lkb_range = allocate_range(ls); |
c1c6733f AM |
11862 | + if (!lkb->lkb_range) |
11863 | + goto out; | |
11864 | + | |
11865 | + switch (status) { | |
11866 | + case GDLM_LKSTS_CONVERT: | |
11867 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
11868 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
11869 | + start = get_int64(buf, ptr); | |
11870 | + end = get_int64(buf, ptr); | |
11871 | + lkb->lkb_range[GR_RANGE_START] = start; | |
11872 | + lkb->lkb_range[GR_RANGE_END] = end; | |
11873 | + | |
11874 | + case GDLM_LKSTS_WAITING: | |
11875 | + lkb->lkb_range[RQ_RANGE_START] = start; | |
11876 | + lkb->lkb_range[RQ_RANGE_END] = end; | |
11877 | + break; | |
11878 | + | |
11879 | + case GDLM_LKSTS_GRANTED: | |
11880 | + lkb->lkb_range[GR_RANGE_START] = start; | |
11881 | + lkb->lkb_range[GR_RANGE_END] = end; | |
11882 | + break; | |
11883 | + default: | |
b7b72b66 | 11884 | + DLM_ASSERT(0,); |
c1c6733f AM |
11885 | + } |
11886 | + } | |
11887 | + | |
b7b72b66 AM |
11888 | + if (exist_lkb) { |
11889 | + /* verify lkb and exist_lkb values match? */ | |
11890 | + release_lkb(ls, lkb); | |
11891 | + lkb = exist_lkb; | |
11892 | + goto put_lkid; | |
11893 | + } | |
11894 | + | |
c1c6733f AM |
11895 | + /* Resolve local lock LKB address from parent ID */ |
11896 | + if (parentid) | |
11897 | + lkb->lkb_parent = find_by_remlkid(rootrsb, rem_nodeid, | |
11898 | + parentid); | |
11899 | + | |
11900 | + atomic_inc(&rsb->res_ref); | |
11901 | + lkb->lkb_resource = rsb; | |
11902 | + | |
11903 | + lkb->lkb_flags |= GDLM_LKFLG_MSTCPY; | |
11904 | + lkb->lkb_nodeid = rem_nodeid; | |
11905 | + | |
b7b72b66 | 11906 | + /* |
c1c6733f AM |
11907 | + * Put the lkb on an RSB queue. An lkb that's in the midst of a |
11908 | + * conversion request (on the requesting node's lockqueue and has | |
11909 | + * LQCONVERT set) should be put on the granted queue. The convert | |
11910 | + * request will be resent by the requesting node. | |
11911 | + */ | |
11912 | + | |
11913 | + if (lkb->lkb_flags & GDLM_LKFLG_LQCONVERT) { | |
11914 | + lkb->lkb_flags &= ~GDLM_LKFLG_LQCONVERT; | |
b7b72b66 | 11915 | + DLM_ASSERT(status == GDLM_LKSTS_CONVERT, |
c1c6733f AM |
11916 | + printk("status=%d\n", status);); |
11917 | + lkb->lkb_rqmode = DLM_LOCK_IV; | |
11918 | + status = GDLM_LKSTS_GRANTED; | |
11919 | + } | |
11920 | + | |
11921 | + lkb_enqueue(rsb, lkb, status); | |
11922 | + | |
b7b72b66 | 11923 | + /* |
c1c6733f AM |
11924 | + * Update the rsb lvb if the lkb's lvb is up to date (grmode > NL). |
11925 | + */ | |
11926 | + | |
11927 | + if ((lkb->lkb_flags & GDLM_LKFLG_VALBLK) | |
11928 | + && lkb->lkb_grmode > DLM_LOCK_NL) { | |
11929 | + if (!rsb->res_lvbptr) | |
b7b72b66 | 11930 | + rsb->res_lvbptr = allocate_lvb(ls); |
c1c6733f AM |
11931 | + if (!rsb->res_lvbptr) |
11932 | + goto out; | |
11933 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
11934 | + } | |
11935 | + | |
b7b72b66 | 11936 | + /* |
c1c6733f AM |
11937 | + * Clear flags that may have been sent over that are only relevant in |
11938 | + * the context of the sender. | |
11939 | + */ | |
11940 | + | |
b7b72b66 AM |
11941 | + lkb->lkb_flags &= ~(GDLM_LKFLG_DELETED | GDLM_LKFLG_LQRESEND | |
11942 | + GDLM_LKFLG_NOREBUILD | GDLM_LKFLG_DEMOTED); | |
c1c6733f AM |
11943 | + |
11944 | + put_lkid: | |
11945 | + /* Return the new LKID to the caller's buffer */ | |
11946 | + put_int(lkb->lkb_id, outbuf, outoffp); | |
11947 | + put_int(lkb->lkb_remid, outbuf, outoffp); | |
11948 | + error = 0; | |
11949 | + | |
11950 | + out: | |
11951 | + return error; | |
11952 | +} | |
11953 | + | |
b7b72b66 AM |
11954 | +static struct dlm_rsb *deserialise_rsb(struct dlm_ls *ls, int nodeid, |
11955 | + struct dlm_rsb *rootrsb, char *buf, | |
11956 | + int *ptr) | |
c1c6733f AM |
11957 | +{ |
11958 | + int length; | |
11959 | + int remasterid; | |
11960 | + int parent_remasterid; | |
11961 | + char name[DLM_RESNAME_MAXLEN]; | |
11962 | + int error; | |
b7b72b66 AM |
11963 | + struct dlm_rsb *parent = NULL; |
11964 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
11965 | + |
11966 | + get_bytes(name, &length, buf, ptr); | |
11967 | + remasterid = get_int(buf, ptr); | |
11968 | + parent_remasterid = get_int(buf, ptr); | |
11969 | + | |
11970 | + if (parent_remasterid) | |
11971 | + parent = find_by_remasterid(ls, parent_remasterid, rootrsb); | |
11972 | + | |
b7b72b66 | 11973 | + /* |
c1c6733f AM |
11974 | + * The rsb reference from this find_or_create_rsb() will keep the rsb |
11975 | + * around while we add new lkb's to it from deserialise_lkb. Each of | |
11976 | + * the lkb's will add an rsb reference. The reference added here is | |
11977 | + * removed by release_rsb() after all lkb's are added. | |
11978 | + */ | |
11979 | + | |
b7b72b66 AM |
11980 | + error = find_rsb(ls, parent, name, length, CREATE, &rsb); |
11981 | + DLM_ASSERT(!error,); | |
11982 | + | |
11983 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
c1c6733f AM |
11984 | + |
11985 | + /* There is a case where the above needs to create the RSB. */ | |
11986 | + if (rsb->res_nodeid == -1) | |
11987 | + rsb->res_nodeid = our_nodeid(); | |
11988 | + | |
11989 | + rsb->res_remasterid = remasterid; | |
11990 | + | |
11991 | + return rsb; | |
11992 | +} | |
11993 | + | |
b7b72b66 | 11994 | +/* |
c1c6733f AM |
11995 | + * Processing at the receiving end of a NEWLOCKS message from a node in |
11996 | + * rebuild_rsbs_send(). Rebuild a remastered lock tree. Nodeid is the remote | |
11997 | + * node whose locks we are now mastering. For a reply we need to send back the | |
11998 | + * new lockids of the remastered locks so that remote ops can find them. | |
11999 | + */ | |
12000 | + | |
b7b72b66 | 12001 | +int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len) |
c1c6733f | 12002 | +{ |
b7b72b66 AM |
12003 | + struct dlm_rcom *rc; |
12004 | + struct dlm_rsb *rsb = NULL; | |
c1c6733f AM |
12005 | + rebuild_node_t *rnode; |
12006 | + char *outbuf; | |
12007 | + int outptr, ptr = 0, error = -ENOMEM; | |
12008 | + | |
12009 | + rnode = find_rebuild_root(ls, nodeid); | |
12010 | + if (!rnode) | |
12011 | + goto out; | |
12012 | + | |
b7b72b66 | 12013 | + /* |
c1c6733f AM |
12014 | + * Allocate a buffer for the reply message which is a list of remote |
12015 | + * lock IDs and their (new) local lock ids. It will always be big | |
12016 | + * enough to fit <n> ID pairs if it already fit <n> LKBs. | |
12017 | + */ | |
12018 | + | |
12019 | + rc = allocate_rcom_buffer(ls); | |
12020 | + if (!rc) | |
12021 | + goto out; | |
12022 | + outbuf = rc->rc_buf; | |
12023 | + outptr = 0; | |
12024 | + | |
b7b72b66 | 12025 | + /* |
c1c6733f AM |
12026 | + * Unpack RSBs and LKBs, saving new LKB id's in outbuf as they're |
12027 | + * created. Each deserialise_rsb adds an rsb reference that must be | |
12028 | + * removed with release_rsb once all new lkb's for an rsb have been | |
12029 | + * added. | |
12030 | + */ | |
12031 | + | |
12032 | + while (ptr < len) { | |
12033 | + int type; | |
12034 | + | |
12035 | + type = get_char(buf, &ptr); | |
12036 | + | |
12037 | + switch (type) { | |
12038 | + case REMASTER_ROOTRSB: | |
12039 | + if (rsb) | |
12040 | + release_rsb(rsb); | |
12041 | + rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf, | |
12042 | + &ptr); | |
12043 | + rnode->rootrsb = rsb; | |
12044 | + break; | |
12045 | + | |
12046 | + case REMASTER_RSB: | |
12047 | + if (rsb) | |
12048 | + release_rsb(rsb); | |
12049 | + rsb = deserialise_rsb(ls, nodeid, rnode->rootrsb, buf, | |
12050 | + &ptr); | |
12051 | + break; | |
12052 | + | |
12053 | + case REMASTER_LKB: | |
12054 | + deserialise_lkb(ls, nodeid, rnode->rootrsb, buf, &ptr, | |
12055 | + outbuf, &outptr); | |
12056 | + break; | |
12057 | + | |
12058 | + default: | |
b7b72b66 | 12059 | + DLM_ASSERT(0, printk("type=%d nodeid=%u ptr=%d " |
c1c6733f AM |
12060 | + "len=%d\n", type, nodeid, ptr, |
12061 | + len);); | |
12062 | + } | |
12063 | + } | |
12064 | + | |
12065 | + if (rsb) | |
12066 | + release_rsb(rsb); | |
12067 | + | |
b7b72b66 | 12068 | + /* |
c1c6733f AM |
12069 | + * Reply with the new lock IDs. |
12070 | + */ | |
12071 | + | |
12072 | + rc->rc_datalen = outptr; | |
12073 | + error = rcom_send_message(ls, nodeid, RECCOMM_NEWLOCKIDS, rc, 0); | |
12074 | + | |
12075 | + free_rcom_buffer(rc); | |
12076 | + | |
12077 | + out: | |
12078 | + return error; | |
12079 | +} | |
12080 | + | |
b7b72b66 | 12081 | +/* |
c1c6733f AM |
12082 | + * Processing for a NEWLOCKIDS message. Called when we get the reply from the |
12083 | + * new master telling us what the new remote lock IDs are for the remastered | |
12084 | + * locks | |
12085 | + */ | |
12086 | + | |
b7b72b66 | 12087 | +int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len) |
c1c6733f AM |
12088 | +{ |
12089 | + int offset = 0; | |
12090 | + | |
12091 | + if (len == 1) | |
12092 | + len = 0; | |
12093 | + | |
12094 | + while (offset < len) { | |
12095 | + int remote_id; | |
12096 | + int local_id; | |
b7b72b66 | 12097 | + struct dlm_lkb *lkb; |
c1c6733f AM |
12098 | + |
12099 | + if (offset + 8 > len) { | |
12100 | + log_error(ls, "rebuild_rsbs_lkids_recv: bad data " | |
12101 | + "length nodeid=%d offset=%d len=%d", | |
12102 | + nodeid, offset, len); | |
12103 | + break; | |
12104 | + } | |
12105 | + | |
12106 | + remote_id = get_int(buf, &offset); | |
12107 | + local_id = get_int(buf, &offset); | |
12108 | + | |
12109 | + lkb = find_lock_by_id(ls, local_id); | |
12110 | + if (lkb) { | |
12111 | + lkb->lkb_remid = remote_id; | |
12112 | + have_new_lkid(lkb); | |
12113 | + } else { | |
12114 | + log_error(ls, "rebuild_rsbs_lkids_recv: unknown lkid " | |
12115 | + "nodeid=%d id=%x remid=%x offset=%d len=%d", | |
12116 | + nodeid, local_id, remote_id, offset, len); | |
12117 | + } | |
12118 | + } | |
12119 | + | |
12120 | + if (recover_list_empty(ls)) | |
12121 | + wake_up(&ls->ls_wait_general); | |
12122 | + | |
12123 | + return 0; | |
12124 | +} | |
12125 | diff -urN linux-orig/cluster/dlm/rebuild.h linux-patched/cluster/dlm/rebuild.h | |
12126 | --- linux-orig/cluster/dlm/rebuild.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 12127 | +++ linux-patched/cluster/dlm/rebuild.h 2004-11-03 11:31:56.000000000 +0800 |
c1c6733f AM |
12128 | @@ -0,0 +1,22 @@ |
12129 | +/****************************************************************************** | |
12130 | +******************************************************************************* | |
12131 | +** | |
12132 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12133 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12134 | +** | |
12135 | +** This copyrighted material is made available to anyone wishing to use, | |
12136 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12137 | +** of the GNU General Public License v.2. | |
12138 | +** | |
12139 | +******************************************************************************* | |
12140 | +******************************************************************************/ | |
12141 | + | |
12142 | +#ifndef __REBUILD_DOT_H__ | |
12143 | +#define __REBUILD_DOT_H__ | |
12144 | + | |
b7b72b66 AM |
12145 | +int rebuild_rsbs_send(struct dlm_ls *ls); |
12146 | +int rebuild_rsbs_recv(struct dlm_ls *ls, int nodeid, char *buf, int len); | |
12147 | +int rebuild_rsbs_lkids_recv(struct dlm_ls *ls, int nodeid, char *buf, int len); | |
12148 | +int rebuild_freemem(struct dlm_ls *ls); | |
c1c6733f AM |
12149 | + |
12150 | +#endif /* __REBUILD_DOT_H__ */ | |
12151 | diff -urN linux-orig/cluster/dlm/reccomms.c linux-patched/cluster/dlm/reccomms.c | |
12152 | --- linux-orig/cluster/dlm/reccomms.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
12153 | +++ linux-patched/cluster/dlm/reccomms.c 2004-11-03 11:31:56.000000000 +0800 |
12154 | @@ -0,0 +1,447 @@ | |
c1c6733f AM |
12155 | +/****************************************************************************** |
12156 | +******************************************************************************* | |
12157 | +** | |
12158 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12159 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12160 | +** | |
12161 | +** This copyrighted material is made available to anyone wishing to use, | |
12162 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12163 | +** of the GNU General Public License v.2. | |
12164 | +** | |
12165 | +******************************************************************************* | |
12166 | +******************************************************************************/ | |
12167 | + | |
12168 | +#include "dlm_internal.h" | |
12169 | +#include "lowcomms.h" | |
12170 | +#include "midcomms.h" | |
12171 | +#include "reccomms.h" | |
12172 | +#include "nodes.h" | |
12173 | +#include "lockspace.h" | |
12174 | +#include "recover.h" | |
12175 | +#include "dir.h" | |
12176 | +#include "config.h" | |
12177 | +#include "rebuild.h" | |
12178 | +#include "memory.h" | |
12179 | + | |
12180 | +/* Running on the basis that only a single recovery communication will be done | |
12181 | + * at a time per lockspace */ | |
12182 | + | |
bb1d8b11 | 12183 | +static void rcom_process_message(struct dlm_ls *ls, uint32_t nodeid, struct dlm_rcom *rc); |
c1c6733f | 12184 | + |
b7b72b66 | 12185 | +static int rcom_response(struct dlm_ls *ls) |
c1c6733f AM |
12186 | +{ |
12187 | + return test_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
12188 | +} | |
12189 | + | |
12190 | +/** | |
12191 | + * rcom_send_message - send or request recovery data | |
12192 | + * @ls: the lockspace | |
12193 | + * @nodeid: node to which the message is sent | |
12194 | + * @type: type of recovery message | |
12195 | + * @rc: the rc buffer to send | |
12196 | + * @need_reply: wait for reply if this is set | |
12197 | + * | |
12198 | + * Using this interface | |
12199 | + * i) Allocate an rc buffer: | |
12200 | + * rc = allocate_rcom_buffer(ls); | |
12201 | + * ii) Copy data to send beginning at rc->rc_buf: | |
12202 | + * memcpy(rc->rc_buf, mybuf, mylen); | |
12203 | + * iii) Set rc->rc_datalen to the number of bytes copied in (ii): | |
12204 | + * rc->rc_datalen = mylen | |
12205 | + * iv) Submit the rc to this function: | |
12206 | + * rcom_send_message(rc); | |
12207 | + * | |
b7b72b66 AM |
12208 | + * The max value of "mylen" is dlm_config.buffer_size - sizeof(struct |
12209 | + * dlm_rcom). If more data must be passed in one send, use | |
12210 | + * rcom_expand_buffer() which incrementally increases the size of the rc buffer | |
12211 | + * by dlm_config.buffer_size bytes. | |
c1c6733f AM |
12212 | + * |
12213 | + * Any data returned for the message (when need_reply is set) will saved in | |
12214 | + * rc->rc_buf when this function returns and rc->rc_datalen will be set to the | |
12215 | + * number of bytes copied into rc->rc_buf. | |
12216 | + * | |
12217 | + * Returns: 0 on success, -EXXX on failure | |
12218 | + */ | |
12219 | + | |
b7b72b66 AM |
12220 | +int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type, |
12221 | + struct dlm_rcom *rc, int need_reply) | |
c1c6733f AM |
12222 | +{ |
12223 | + int error = 0; | |
12224 | + | |
12225 | + if (!rc->rc_datalen) | |
12226 | + rc->rc_datalen = 1; | |
12227 | + | |
12228 | + /* | |
12229 | + * Fill in the header. | |
12230 | + */ | |
12231 | + | |
12232 | + rc->rc_header.rh_cmd = GDLM_REMCMD_RECOVERMESSAGE; | |
12233 | + rc->rc_header.rh_lockspace = ls->ls_global_id; | |
b7b72b66 | 12234 | + rc->rc_header.rh_length = sizeof(struct dlm_rcom) + rc->rc_datalen - 1; |
c1c6733f AM |
12235 | + rc->rc_subcmd = type; |
12236 | + rc->rc_msgid = ++ls->ls_rcom_msgid; | |
12237 | + | |
c1c6733f AM |
12238 | + /* |
12239 | + * When a reply is received, the reply data goes back into this buffer. | |
12240 | + * Synchronous rcom requests (need_reply=1) are serialised because of | |
12241 | + * the single ls_rcom. | |
12242 | + */ | |
12243 | + | |
12244 | + if (need_reply) { | |
12245 | + down(&ls->ls_rcom_lock); | |
12246 | + ls->ls_rcom = rc; | |
12247 | + } | |
12248 | + | |
12249 | + /* | |
12250 | + * After sending the message we'll wait at the end of this function to | |
12251 | + * get a reply. The READY flag will be set when the reply has been | |
12252 | + * received and requested data has been copied into | |
12253 | + * ls->ls_rcom->rc_buf; | |
12254 | + */ | |
12255 | + | |
b7b72b66 | 12256 | + DLM_ASSERT(!test_bit(LSFL_RECCOMM_READY, &ls->ls_flags),); |
c1c6733f AM |
12257 | + |
12258 | + /* | |
12259 | + * The WAIT bit indicates that we're waiting for and willing to accept a | |
12260 | + * reply. Any replies are ignored unless this bit is set. | |
12261 | + */ | |
12262 | + | |
12263 | + set_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags); | |
12264 | + | |
12265 | + /* | |
12266 | + * Process the message locally. | |
12267 | + */ | |
12268 | + | |
12269 | + if (nodeid == our_nodeid()) { | |
12270 | + rcom_process_message(ls, nodeid, rc); | |
12271 | + goto out; | |
12272 | + } | |
12273 | + | |
12274 | + /* | |
12275 | + * Send the message. | |
12276 | + */ | |
12277 | + | |
12278 | + log_debug(ls, "rcom send %d to %u id %u", type, nodeid, rc->rc_msgid); | |
12279 | + | |
b7b72b66 | 12280 | + error = midcomms_send_message(nodeid, (struct dlm_header *) rc, |
c1c6733f | 12281 | + GFP_KERNEL); |
b7b72b66 | 12282 | + DLM_ASSERT(error >= 0, printk("error = %d\n", error);); |
c1c6733f AM |
12283 | + error = 0; |
12284 | + | |
12285 | + /* | |
12286 | + * Wait for a reply. Once a reply is processed from midcomms, the | |
b7b72b66 | 12287 | + * READY bit will be set and we'll be awoken (dlm_wait_function will |
c1c6733f AM |
12288 | + * return 0). |
12289 | + */ | |
12290 | + | |
12291 | + if (need_reply) { | |
b7b72b66 | 12292 | + error = dlm_wait_function(ls, &rcom_response); |
c1c6733f AM |
12293 | + if (error) |
12294 | + log_debug(ls, "rcom wait error %d", error); | |
12295 | + } | |
12296 | + | |
12297 | + out: | |
12298 | + clear_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags); | |
12299 | + clear_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
12300 | + | |
12301 | + if (need_reply) | |
12302 | + up(&ls->ls_rcom_lock); | |
12303 | + | |
12304 | + return error; | |
12305 | +} | |
12306 | + | |
12307 | +/* | |
12308 | + * Runs in same context as midcomms. | |
12309 | + */ | |
12310 | + | |
b7b72b66 | 12311 | +static void rcom_process_message(struct dlm_ls *ls, uint32_t nodeid, struct dlm_rcom *rc) |
c1c6733f | 12312 | +{ |
b7b72b66 AM |
12313 | + struct dlm_rcom rc_stack; |
12314 | + struct dlm_rcom *reply = NULL; | |
c1c6733f | 12315 | + int status, datalen, maxlen; |
b7b72b66 | 12316 | + uint32_t r_nodeid, be_nodeid; |
c1c6733f AM |
12317 | + |
12318 | + if (!ls) | |
12319 | + return; | |
12320 | + | |
b7b72b66 | 12321 | + if (dlm_recovery_stopped(ls) && (rc->rc_subcmd != RECCOMM_STATUS)) { |
c1c6733f AM |
12322 | + log_error(ls, "ignoring recovery message %x from %u", |
12323 | + rc->rc_subcmd, nodeid); | |
12324 | + return; | |
12325 | + } | |
12326 | + | |
12327 | + switch (rc->rc_subcmd) { | |
12328 | + | |
12329 | + case RECCOMM_STATUS: | |
12330 | + | |
b7b72b66 | 12331 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
c1c6733f AM |
12332 | + reply = &rc_stack; |
12333 | + | |
12334 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
12335 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
12336 | + reply->rc_subcmd = rc->rc_subcmd; | |
12337 | + reply->rc_msgid = rc->rc_msgid; | |
12338 | + reply->rc_buf[0] = 0; | |
12339 | + | |
12340 | + if (test_bit(LSFL_RESDIR_VALID, &ls->ls_flags)) | |
12341 | + reply->rc_buf[0] |= RESDIR_VALID; | |
12342 | + | |
12343 | + if (test_bit(LSFL_ALL_RESDIR_VALID, &ls->ls_flags)) | |
12344 | + reply->rc_buf[0] |= RESDIR_ALL_VALID; | |
12345 | + | |
12346 | + if (test_bit(LSFL_NODES_VALID, &ls->ls_flags)) | |
12347 | + reply->rc_buf[0] |= NODES_VALID; | |
12348 | + | |
12349 | + if (test_bit(LSFL_ALL_NODES_VALID, &ls->ls_flags)) | |
12350 | + reply->rc_buf[0] |= NODES_ALL_VALID; | |
12351 | + | |
12352 | + reply->rc_datalen = 1; | |
12353 | + reply->rc_header.rh_length = | |
b7b72b66 | 12354 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
c1c6733f AM |
12355 | + |
12356 | + log_debug(ls, "rcom status %x to %u", reply->rc_buf[0], nodeid); | |
12357 | + break; | |
12358 | + | |
12359 | + case RECCOMM_RECOVERNAMES: | |
12360 | + | |
12361 | + reply = allocate_rcom_buffer(ls); | |
b7b72b66 AM |
12362 | + DLM_ASSERT(reply,); |
12363 | + maxlen = dlm_config.buffer_size - sizeof(struct dlm_rcom); | |
c1c6733f AM |
12364 | + |
12365 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
12366 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
12367 | + reply->rc_subcmd = rc->rc_subcmd; | |
12368 | + reply->rc_msgid = rc->rc_msgid; | |
12369 | + | |
12370 | + /* | |
12371 | + * The other node wants a bunch of resource names. The name of | |
12372 | + * the resource to begin with is in rc->rc_buf. | |
12373 | + */ | |
12374 | + | |
b7b72b66 AM |
12375 | + datalen = dlm_dir_rebuild_send(ls, rc->rc_buf, rc->rc_datalen, |
12376 | + reply->rc_buf, maxlen, nodeid); | |
c1c6733f AM |
12377 | + |
12378 | + reply->rc_datalen = datalen; | |
12379 | + reply->rc_header.rh_length = | |
b7b72b66 | 12380 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
c1c6733f AM |
12381 | + |
12382 | + log_debug(ls, "rcom names len %d to %u id %u", datalen, nodeid, | |
12383 | + reply->rc_msgid); | |
12384 | + break; | |
12385 | + | |
12386 | + case RECCOMM_GETMASTER: | |
12387 | + | |
12388 | + reply = allocate_rcom_buffer(ls); | |
b7b72b66 | 12389 | + DLM_ASSERT(reply,); |
c1c6733f AM |
12390 | + |
12391 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
12392 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
12393 | + reply->rc_subcmd = rc->rc_subcmd; | |
12394 | + reply->rc_msgid = rc->rc_msgid; | |
12395 | + | |
12396 | + /* | |
12397 | + * The other node wants to know the master of a named resource. | |
12398 | + */ | |
12399 | + | |
b7b72b66 AM |
12400 | + status = dlm_dir_lookup(ls, nodeid, rc->rc_buf, rc->rc_datalen, |
12401 | + &r_nodeid); | |
c1c6733f | 12402 | + if (status != 0) { |
b7b72b66 | 12403 | + log_all(ls, "rcom lookup error %d", status); |
c1c6733f AM |
12404 | + free_rcom_buffer(reply); |
12405 | + reply = NULL; | |
12406 | + return; | |
12407 | + } | |
b7b72b66 | 12408 | + be_nodeid = cpu_to_be32(r_nodeid); |
c1c6733f AM |
12409 | + memcpy(reply->rc_buf, &be_nodeid, sizeof(uint32_t)); |
12410 | + reply->rc_datalen = sizeof(uint32_t); | |
12411 | + reply->rc_header.rh_length = | |
b7b72b66 | 12412 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
c1c6733f AM |
12413 | + break; |
12414 | + | |
12415 | + case RECCOMM_BULKLOOKUP: | |
12416 | + | |
12417 | + reply = allocate_rcom_buffer(ls); | |
b7b72b66 | 12418 | + DLM_ASSERT(reply,); |
c1c6733f AM |
12419 | + |
12420 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
12421 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
12422 | + reply->rc_subcmd = rc->rc_subcmd; | |
12423 | + reply->rc_msgid = rc->rc_msgid; | |
12424 | + | |
12425 | + /* | |
12426 | + * This is a bulk version of the above and just returns a | |
12427 | + * buffer full of node ids to match the resources | |
12428 | + */ | |
12429 | + | |
12430 | + datalen = bulk_master_lookup(ls, nodeid, rc->rc_buf, | |
12431 | + rc->rc_datalen, reply->rc_buf); | |
12432 | + if (datalen < 0) { | |
12433 | + free_rcom_buffer(reply); | |
12434 | + reply = NULL; | |
12435 | + return; | |
12436 | + } | |
12437 | + | |
12438 | + reply->rc_datalen = datalen; | |
12439 | + reply->rc_header.rh_length = | |
b7b72b66 | 12440 | + sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
c1c6733f AM |
12441 | + break; |
12442 | + | |
12443 | + /* | |
12444 | + * These RECCOMM messages don't need replies. | |
12445 | + */ | |
12446 | + | |
12447 | + case RECCOMM_NEWLOCKS: | |
12448 | + rebuild_rsbs_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen); | |
12449 | + break; | |
12450 | + | |
12451 | + case RECCOMM_NEWLOCKIDS: | |
12452 | + rebuild_rsbs_lkids_recv(ls, nodeid, rc->rc_buf, rc->rc_datalen); | |
12453 | + break; | |
12454 | + | |
12455 | + case RECCOMM_REMRESDATA: | |
b7b72b66 | 12456 | + dlm_dir_remove(ls, nodeid, rc->rc_buf, rc->rc_datalen); |
c1c6733f AM |
12457 | + break; |
12458 | + | |
12459 | + default: | |
b7b72b66 | 12460 | + DLM_ASSERT(0, printk("cmd=%x\n", rc->rc_subcmd);); |
c1c6733f AM |
12461 | + } |
12462 | + | |
12463 | + if (reply) { | |
12464 | + if (nodeid == our_nodeid()) { | |
b7b72b66 | 12465 | + DLM_ASSERT(rc == ls->ls_rcom,); |
c1c6733f AM |
12466 | + memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen); |
12467 | + rc->rc_datalen = reply->rc_datalen; | |
12468 | + } else { | |
12469 | + midcomms_send_message(nodeid, | |
b7b72b66 | 12470 | + (struct dlm_header *) reply, |
c1c6733f AM |
12471 | + GFP_KERNEL); |
12472 | + } | |
12473 | + | |
12474 | + if (reply != &rc_stack) | |
12475 | + free_rcom_buffer(reply); | |
12476 | + } | |
12477 | +} | |
12478 | + | |
b7b72b66 AM |
12479 | +static void process_reply_sync(struct dlm_ls *ls, uint32_t nodeid, |
12480 | + struct dlm_rcom *reply) | |
c1c6733f | 12481 | +{ |
b7b72b66 | 12482 | + struct dlm_rcom *rc = ls->ls_rcom; |
c1c6733f AM |
12483 | + |
12484 | + if (!test_bit(LSFL_RECCOMM_WAIT, &ls->ls_flags)) { | |
12485 | + log_error(ls, "unexpected rcom reply nodeid=%u", nodeid); | |
12486 | + return; | |
12487 | + } | |
12488 | + | |
12489 | + if (reply->rc_msgid != le32_to_cpu(rc->rc_msgid)) { | |
12490 | + log_error(ls, "unexpected rcom msgid %x/%x nodeid=%u", | |
12491 | + reply->rc_msgid, le32_to_cpu(rc->rc_msgid), nodeid); | |
12492 | + return; | |
12493 | + } | |
12494 | + | |
12495 | + memcpy(rc->rc_buf, reply->rc_buf, reply->rc_datalen); | |
12496 | + rc->rc_datalen = reply->rc_datalen; | |
12497 | + | |
12498 | + /* | |
12499 | + * Tell the thread waiting in rcom_send_message() that it can go ahead. | |
12500 | + */ | |
12501 | + | |
12502 | + set_bit(LSFL_RECCOMM_READY, &ls->ls_flags); | |
12503 | + wake_up(&ls->ls_wait_general); | |
12504 | +} | |
12505 | + | |
b7b72b66 AM |
12506 | +static void process_reply_async(struct dlm_ls *ls, uint32_t nodeid, |
12507 | + struct dlm_rcom *reply) | |
c1c6733f AM |
12508 | +{ |
12509 | + restbl_rsb_update_recv(ls, nodeid, reply->rc_buf, reply->rc_datalen, | |
12510 | + reply->rc_msgid); | |
12511 | +} | |
12512 | + | |
12513 | +/* | |
12514 | + * Runs in same context as midcomms. | |
12515 | + */ | |
12516 | + | |
b7b72b66 AM |
12517 | +static void rcom_process_reply(struct dlm_ls *ls, uint32_t nodeid, |
12518 | + struct dlm_rcom *reply) | |
c1c6733f | 12519 | +{ |
b7b72b66 | 12520 | + if (dlm_recovery_stopped(ls)) { |
c1c6733f AM |
12521 | + log_error(ls, "ignoring recovery reply %x from %u", |
12522 | + reply->rc_subcmd, nodeid); | |
12523 | + return; | |
12524 | + } | |
12525 | + | |
12526 | + switch (reply->rc_subcmd) { | |
12527 | + case RECCOMM_GETMASTER: | |
12528 | + process_reply_async(ls, nodeid, reply); | |
12529 | + break; | |
12530 | + case RECCOMM_STATUS: | |
12531 | + case RECCOMM_NEWLOCKS: | |
12532 | + case RECCOMM_NEWLOCKIDS: | |
12533 | + case RECCOMM_RECOVERNAMES: | |
12534 | + process_reply_sync(ls, nodeid, reply); | |
12535 | + break; | |
12536 | + default: | |
12537 | + log_error(ls, "unknown rcom reply subcmd=%x nodeid=%u", | |
12538 | + reply->rc_subcmd, nodeid); | |
12539 | + } | |
12540 | +} | |
12541 | + | |
12542 | + | |
b7b72b66 | 12543 | +static int send_ls_not_ready(uint32_t nodeid, struct dlm_header *header) |
c1c6733f AM |
12544 | +{ |
12545 | + struct writequeue_entry *wq; | |
b7b72b66 AM |
12546 | + struct dlm_rcom *rc = (struct dlm_rcom *) header; |
12547 | + struct dlm_rcom *reply; | |
c1c6733f | 12548 | + |
b7b72b66 | 12549 | + wq = lowcomms_get_buffer(nodeid, sizeof(struct dlm_rcom), GFP_KERNEL, |
c1c6733f AM |
12550 | + (char **)&reply); |
12551 | + if (!wq) | |
12552 | + return -ENOMEM; | |
12553 | + | |
12554 | + reply->rc_header.rh_cmd = GDLM_REMCMD_RECOVERREPLY; | |
12555 | + reply->rc_header.rh_lockspace = rc->rc_header.rh_lockspace; | |
12556 | + reply->rc_subcmd = rc->rc_subcmd; | |
12557 | + reply->rc_msgid = rc->rc_msgid; | |
12558 | + reply->rc_buf[0] = 0; | |
12559 | + | |
12560 | + reply->rc_datalen = 1; | |
b7b72b66 | 12561 | + reply->rc_header.rh_length = sizeof(struct dlm_rcom) + reply->rc_datalen - 1; |
c1c6733f | 12562 | + |
b7b72b66 | 12563 | + midcomms_send_buffer((struct dlm_header *)reply, wq); |
c1c6733f AM |
12564 | + return 0; |
12565 | +} | |
12566 | + | |
12567 | + | |
12568 | +/* | |
12569 | + * Runs in same context as midcomms. Both recovery requests and recovery | |
12570 | + * replies come through this function. | |
12571 | + */ | |
12572 | + | |
b7b72b66 | 12573 | +void process_recovery_comm(uint32_t nodeid, struct dlm_header *header) |
c1c6733f | 12574 | +{ |
b7b72b66 AM |
12575 | + struct dlm_ls *ls = find_lockspace_by_global_id(header->rh_lockspace); |
12576 | + struct dlm_rcom *rc = (struct dlm_rcom *) header; | |
c1c6733f AM |
12577 | + |
12578 | + /* If the lockspace doesn't exist then still send a status message | |
b7b72b66 AM |
12579 | + back; it's possible that it just doesn't have its global_id yet. */ |
12580 | + | |
c1c6733f AM |
12581 | + if (!ls) { |
12582 | + send_ls_not_ready(nodeid, header); | |
12583 | + return; | |
12584 | + } | |
12585 | + | |
12586 | + switch (header->rh_cmd) { | |
12587 | + case GDLM_REMCMD_RECOVERMESSAGE: | |
c1c6733f | 12588 | + rcom_process_message(ls, nodeid, rc); |
c1c6733f AM |
12589 | + break; |
12590 | + | |
12591 | + case GDLM_REMCMD_RECOVERREPLY: | |
12592 | + rcom_process_reply(ls, nodeid, rc); | |
12593 | + break; | |
12594 | + | |
12595 | + default: | |
b7b72b66 | 12596 | + DLM_ASSERT(0, printk("cmd=%x\n", header->rh_cmd);); |
c1c6733f | 12597 | + } |
b7b72b66 AM |
12598 | + |
12599 | + put_lockspace(ls); | |
c1c6733f AM |
12600 | +} |
12601 | + | |
12602 | diff -urN linux-orig/cluster/dlm/reccomms.h linux-patched/cluster/dlm/reccomms.h | |
12603 | --- linux-orig/cluster/dlm/reccomms.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 AM |
12604 | +++ linux-patched/cluster/dlm/reccomms.h 2004-11-03 11:31:56.000000000 +0800 |
12605 | @@ -0,0 +1,36 @@ | |
c1c6733f AM |
12606 | +/****************************************************************************** |
12607 | +******************************************************************************* | |
12608 | +** | |
12609 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12610 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12611 | +** | |
12612 | +** This copyrighted material is made available to anyone wishing to use, | |
12613 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12614 | +** of the GNU General Public License v.2. | |
12615 | +** | |
12616 | +******************************************************************************* | |
12617 | +******************************************************************************/ | |
12618 | + | |
12619 | +#ifndef __RECCOMMS_DOT_H__ | |
12620 | +#define __RECCOMMS_DOT_H__ | |
12621 | + | |
12622 | +/* Bit flags */ | |
12623 | + | |
12624 | +#define RESDIR_VALID (1) | |
12625 | +#define RESDIR_ALL_VALID (2) | |
12626 | +#define NODES_VALID (4) | |
12627 | +#define NODES_ALL_VALID (8) | |
12628 | + | |
12629 | +#define RECCOMM_STATUS (1) | |
12630 | +#define RECCOMM_RECOVERNAMES (2) | |
12631 | +#define RECCOMM_GETMASTER (3) | |
12632 | +#define RECCOMM_BULKLOOKUP (4) | |
12633 | +#define RECCOMM_NEWLOCKS (5) | |
12634 | +#define RECCOMM_NEWLOCKIDS (6) | |
12635 | +#define RECCOMM_REMRESDATA (7) | |
12636 | + | |
b7b72b66 AM |
12637 | +int rcom_send_message(struct dlm_ls *ls, uint32_t nodeid, int type, |
12638 | + struct dlm_rcom *rc, int need_reply); | |
12639 | +void process_recovery_comm(uint32_t nodeid, struct dlm_header *header); | |
c1c6733f AM |
12640 | + |
12641 | +#endif | |
12642 | diff -urN linux-orig/cluster/dlm/recover.c linux-patched/cluster/dlm/recover.c | |
12643 | --- linux-orig/cluster/dlm/recover.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 12644 | +++ linux-patched/cluster/dlm/recover.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 12645 | @@ -0,0 +1,611 @@ |
c1c6733f AM |
12646 | +/****************************************************************************** |
12647 | +******************************************************************************* | |
12648 | +** | |
12649 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
12650 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
12651 | +** | |
12652 | +** This copyrighted material is made available to anyone wishing to use, | |
12653 | +** modify, copy, or redistribute it subject to the terms and conditions | |
12654 | +** of the GNU General Public License v.2. | |
12655 | +** | |
12656 | +******************************************************************************* | |
12657 | +******************************************************************************/ | |
12658 | + | |
12659 | +#include "dlm_internal.h" | |
12660 | +#include "reccomms.h" | |
12661 | +#include "dir.h" | |
12662 | +#include "locking.h" | |
12663 | +#include "rsb.h" | |
12664 | +#include "lockspace.h" | |
12665 | +#include "lkb.h" | |
12666 | +#include "nodes.h" | |
12667 | +#include "config.h" | |
12668 | +#include "ast.h" | |
12669 | +#include "memory.h" | |
12670 | + | |
12671 | +/* | |
12672 | + * Called in recovery routines to check whether the recovery process has been | |
12673 | + * interrupted/stopped by another transition. A recovery in-process will abort | |
12674 | + * if the lockspace is "stopped" so that a new recovery process can start from | |
12675 | + * the beginning when the lockspace is "started" again. | |
12676 | + */ | |
12677 | + | |
b7b72b66 | 12678 | +int dlm_recovery_stopped(struct dlm_ls *ls) |
c1c6733f AM |
12679 | +{ |
12680 | + return test_bit(LSFL_LS_STOP, &ls->ls_flags); | |
12681 | +} | |
12682 | + | |
b7b72b66 | 12683 | +static void dlm_wait_timer_fn(unsigned long data) |
c1c6733f | 12684 | +{ |
b7b72b66 | 12685 | + struct dlm_ls *ls = (struct dlm_ls *) data; |
c1c6733f AM |
12686 | + |
12687 | + wake_up(&ls->ls_wait_general); | |
12688 | +} | |
12689 | + | |
12690 | +/* | |
12691 | + * Wait until given function returns non-zero or lockspace is stopped (LS_STOP | |
12692 | + * set due to failure of a node in ls_nodes). When another function thinks it | |
12693 | + * could have completed the waited-on task, they should wake up ls_wait_general | |
12694 | + * to get an immediate response rather than waiting for the timer to detect the | |
12695 | + * result. A timer wakes us up periodically while waiting to see if we should | |
12696 | + * abort due to a node failure. | |
12697 | + */ | |
12698 | + | |
b7b72b66 | 12699 | +int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) |
c1c6733f AM |
12700 | +{ |
12701 | + struct timer_list timer; | |
12702 | + int error = 0; | |
12703 | + | |
12704 | + init_timer(&timer); | |
b7b72b66 | 12705 | + timer.function = dlm_wait_timer_fn; |
c1c6733f AM |
12706 | + timer.data = (long) ls; |
12707 | + | |
12708 | + for (;;) { | |
b7b72b66 | 12709 | + mod_timer(&timer, jiffies + (dlm_config.recover_timer * HZ)); |
c1c6733f AM |
12710 | + |
12711 | + wchan_cond_sleep_intr(ls->ls_wait_general, | |
12712 | + !testfn(ls) && | |
12713 | + !test_bit(LSFL_LS_STOP, &ls->ls_flags)); | |
12714 | + | |
12715 | + if (timer_pending(&timer)) | |
12716 | + del_timer(&timer); | |
12717 | + | |
12718 | + if (testfn(ls)) | |
12719 | + break; | |
12720 | + | |
12721 | + if (test_bit(LSFL_LS_STOP, &ls->ls_flags)) { | |
12722 | + error = -1; | |
12723 | + break; | |
12724 | + } | |
12725 | + } | |
12726 | + | |
12727 | + return error; | |
12728 | +} | |
12729 | + | |
b7b72b66 | 12730 | +int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status) |
c1c6733f | 12731 | +{ |
b7b72b66 AM |
12732 | + struct dlm_rcom rc_stack, *rc; |
12733 | + struct dlm_csb *csb; | |
c1c6733f AM |
12734 | + int status; |
12735 | + int error = 0; | |
12736 | + | |
b7b72b66 | 12737 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
c1c6733f AM |
12738 | + rc = &rc_stack; |
12739 | + rc->rc_datalen = 0; | |
12740 | + | |
b7b72b66 | 12741 | + list_for_each_entry(csb, &ls->ls_nodes, list) { |
c1c6733f | 12742 | + for (;;) { |
b7b72b66 | 12743 | + error = dlm_recovery_stopped(ls); |
c1c6733f AM |
12744 | + if (error) |
12745 | + goto out; | |
12746 | + | |
b7b72b66 | 12747 | + error = rcom_send_message(ls, csb->node->nodeid, |
c1c6733f AM |
12748 | + RECCOMM_STATUS, rc, 1); |
12749 | + if (error) | |
12750 | + goto out; | |
12751 | + | |
12752 | + status = rc->rc_buf[0]; | |
12753 | + if (status & wait_status) | |
12754 | + break; | |
12755 | + else { | |
12756 | + set_current_state(TASK_INTERRUPTIBLE); | |
12757 | + schedule_timeout(HZ >> 1); | |
12758 | + } | |
12759 | + } | |
12760 | + } | |
12761 | + | |
12762 | + out: | |
12763 | + return error; | |
12764 | +} | |
12765 | + | |
b7b72b66 | 12766 | +int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status) |
c1c6733f | 12767 | +{ |
b7b72b66 | 12768 | + struct dlm_rcom rc_stack, *rc; |
c1c6733f AM |
12769 | + uint32_t nodeid = ls->ls_low_nodeid; |
12770 | + int status; | |
12771 | + int error = 0; | |
12772 | + | |
b7b72b66 | 12773 | + memset(&rc_stack, 0, sizeof(struct dlm_rcom)); |
c1c6733f AM |
12774 | + rc = &rc_stack; |
12775 | + rc->rc_datalen = 0; | |
12776 | + | |
12777 | + for (;;) { | |
b7b72b66 | 12778 | + error = dlm_recovery_stopped(ls); |
c1c6733f AM |
12779 | + if (error) |
12780 | + goto out; | |
12781 | + | |
12782 | + error = rcom_send_message(ls, nodeid, RECCOMM_STATUS, rc, 1); | |
12783 | + if (error) | |
12784 | + break; | |
12785 | + | |
12786 | + status = rc->rc_buf[0]; | |
12787 | + if (status & wait_status) | |
12788 | + break; | |
12789 | + else { | |
12790 | + set_current_state(TASK_INTERRUPTIBLE); | |
12791 | + schedule_timeout(HZ >> 1); | |
12792 | + } | |
12793 | + } | |
12794 | + | |
12795 | + out: | |
12796 | + return error; | |
12797 | +} | |
12798 | + | |
b7b72b66 | 12799 | +static int purge_queue(struct dlm_ls *ls, struct list_head *queue) |
c1c6733f | 12800 | +{ |
b7b72b66 AM |
12801 | + struct dlm_lkb *lkb, *safe; |
12802 | + struct dlm_rsb *rsb; | |
c1c6733f AM |
12803 | + int count = 0; |
12804 | + | |
12805 | + list_for_each_entry_safe(lkb, safe, queue, lkb_statequeue) { | |
12806 | + if (!lkb->lkb_nodeid) | |
12807 | + continue; | |
12808 | + | |
b7b72b66 | 12809 | + DLM_ASSERT(lkb->lkb_flags & GDLM_LKFLG_MSTCPY,); |
c1c6733f AM |
12810 | + |
12811 | + if (in_nodes_gone(ls, lkb->lkb_nodeid)) { | |
12812 | + list_del(&lkb->lkb_statequeue); | |
12813 | + | |
12814 | + rsb = lkb->lkb_resource; | |
12815 | + lkb->lkb_status = 0; | |
12816 | + | |
12817 | + if (lkb->lkb_status == GDLM_LKSTS_CONVERT | |
12818 | + && &lkb->lkb_duetime) | |
12819 | + remove_from_deadlockqueue(lkb); | |
12820 | + | |
12821 | + release_lkb(ls, lkb); | |
b7b72b66 | 12822 | + release_rsb_locked(rsb); |
c1c6733f AM |
12823 | + count++; |
12824 | + } | |
12825 | + } | |
12826 | + | |
12827 | + return count; | |
12828 | +} | |
12829 | + | |
12830 | +/* | |
12831 | + * Go through local restbl and for each rsb we're master of, clear out any | |
12832 | + * lkb's held by departed nodes. | |
12833 | + */ | |
12834 | + | |
b7b72b66 | 12835 | +int restbl_lkb_purge(struct dlm_ls *ls) |
c1c6733f AM |
12836 | +{ |
12837 | + struct list_head *tmp2, *safe2; | |
12838 | + int count = 0; | |
b7b72b66 | 12839 | + struct dlm_rsb *rootrsb, *safe, *rsb; |
c1c6733f AM |
12840 | + |
12841 | + log_all(ls, "purge locks of departed nodes"); | |
b7b72b66 | 12842 | + down_write(&ls->ls_root_lock); |
c1c6733f AM |
12843 | + |
12844 | + list_for_each_entry_safe(rootrsb, safe, &ls->ls_rootres, res_rootlist) { | |
12845 | + | |
c1c6733f AM |
12846 | + if (rootrsb->res_nodeid) |
12847 | + continue; | |
12848 | + | |
12849 | + hold_rsb(rootrsb); | |
12850 | + down_write(&rootrsb->res_lock); | |
12851 | + | |
12852 | + /* This traverses the subreslist in reverse order so we purge | |
12853 | + * the children before their parents. */ | |
12854 | + | |
12855 | + for (tmp2 = rootrsb->res_subreslist.prev, safe2 = tmp2->prev; | |
12856 | + tmp2 != &rootrsb->res_subreslist; | |
12857 | + tmp2 = safe2, safe2 = safe2->prev) { | |
b7b72b66 | 12858 | + rsb = list_entry(tmp2, struct dlm_rsb, res_subreslist); |
c1c6733f AM |
12859 | + |
12860 | + hold_rsb(rsb); | |
12861 | + purge_queue(ls, &rsb->res_grantqueue); | |
12862 | + purge_queue(ls, &rsb->res_convertqueue); | |
12863 | + purge_queue(ls, &rsb->res_waitqueue); | |
b7b72b66 | 12864 | + release_rsb_locked(rsb); |
c1c6733f AM |
12865 | + } |
12866 | + count += purge_queue(ls, &rootrsb->res_grantqueue); | |
12867 | + count += purge_queue(ls, &rootrsb->res_convertqueue); | |
12868 | + count += purge_queue(ls, &rootrsb->res_waitqueue); | |
12869 | + | |
12870 | + up_write(&rootrsb->res_lock); | |
b7b72b66 | 12871 | + release_rsb_locked(rootrsb); |
c1c6733f AM |
12872 | + } |
12873 | + | |
b7b72b66 | 12874 | + up_write(&ls->ls_root_lock); |
c1c6733f AM |
12875 | + log_all(ls, "purged %d locks", count); |
12876 | + | |
12877 | + return 0; | |
12878 | +} | |
12879 | + | |
12880 | +/* | |
12881 | + * Grant any locks that have become grantable after a purge | |
12882 | + */ | |
12883 | + | |
b7b72b66 | 12884 | +int restbl_grant_after_purge(struct dlm_ls *ls) |
c1c6733f | 12885 | +{ |
b7b72b66 | 12886 | + struct dlm_rsb *root, *rsb, *safe; |
c1c6733f AM |
12887 | + int error = 0; |
12888 | + | |
b7b72b66 | 12889 | + down_read(&ls->ls_root_lock); |
c1c6733f AM |
12890 | + |
12891 | + list_for_each_entry_safe(root, safe, &ls->ls_rootres, res_rootlist) { | |
12892 | + /* only the rsb master grants locks */ | |
12893 | + if (root->res_nodeid) | |
12894 | + continue; | |
12895 | + | |
12896 | + if (!test_bit(LSFL_LS_RUN, &ls->ls_flags)) { | |
12897 | + log_debug(ls, "restbl_grant_after_purge aborted"); | |
12898 | + error = -EINTR; | |
b7b72b66 | 12899 | + up_read(&ls->ls_root_lock); |
c1c6733f AM |
12900 | + goto out; |
12901 | + } | |
12902 | + | |
12903 | + down_write(&root->res_lock); | |
12904 | + grant_pending_locks(root); | |
12905 | + up_write(&root->res_lock); | |
12906 | + | |
12907 | + list_for_each_entry(rsb, &root->res_subreslist, res_subreslist){ | |
12908 | + down_write(&rsb->res_lock); | |
12909 | + grant_pending_locks(rsb); | |
12910 | + up_write(&rsb->res_lock); | |
12911 | + } | |
12912 | + } | |
b7b72b66 | 12913 | + up_read(&ls->ls_root_lock); |
c1c6733f AM |
12914 | + wake_astd(); |
12915 | + out: | |
12916 | + return error; | |
12917 | +} | |
12918 | + | |
12919 | +/* | |
12920 | + * Set the lock master for all LKBs in a lock queue | |
12921 | + */ | |
12922 | + | |
12923 | +static void set_lock_master(struct list_head *queue, int nodeid) | |
12924 | +{ | |
b7b72b66 | 12925 | + struct dlm_lkb *lkb; |
c1c6733f AM |
12926 | + |
12927 | + list_for_each_entry(lkb, queue, lkb_statequeue) { | |
12928 | + /* Don't muck around with pre-exising sublocks */ | |
12929 | + if (!(lkb->lkb_flags & GDLM_LKFLG_MSTCPY)) | |
12930 | + lkb->lkb_nodeid = nodeid; | |
12931 | + } | |
12932 | +} | |
12933 | + | |
b7b72b66 | 12934 | +static void set_master_lkbs(struct dlm_rsb *rsb) |
c1c6733f AM |
12935 | +{ |
12936 | + set_lock_master(&rsb->res_grantqueue, rsb->res_nodeid); | |
12937 | + set_lock_master(&rsb->res_convertqueue, rsb->res_nodeid); | |
12938 | + set_lock_master(&rsb->res_waitqueue, rsb->res_nodeid); | |
12939 | +} | |
12940 | + | |
12941 | +/* | |
12942 | + * This rsb struct is now the master so it is responsible for keeping the | |
12943 | + * latest rsb. Find if any current lkb's have an up to date copy of the lvb to | |
12944 | + * be used as the rsb copy. An equivalent step occurs as new lkb's arrive for | |
12945 | + * this rsb in deserialise_lkb. | |
12946 | + */ | |
12947 | + | |
b7b72b66 | 12948 | +static void set_rsb_lvb(struct dlm_rsb *rsb) |
c1c6733f | 12949 | +{ |
b7b72b66 | 12950 | + struct dlm_lkb *lkb; |
c1c6733f AM |
12951 | + |
12952 | + list_for_each_entry(lkb, &rsb->res_grantqueue, lkb_statequeue) { | |
12953 | + | |
12954 | + if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) && | |
12955 | + (lkb->lkb_flags & GDLM_LKFLG_VALBLK) && | |
12956 | + (lkb->lkb_grmode > DLM_LOCK_NL)) | |
12957 | + { | |
12958 | + if (!rsb->res_lvbptr) | |
12959 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
12960 | + | |
12961 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
12962 | + return; | |
12963 | + } | |
12964 | + } | |
12965 | + | |
12966 | + list_for_each_entry(lkb, &rsb->res_convertqueue, lkb_statequeue) { | |
12967 | + | |
12968 | + if (!(lkb->lkb_flags & GDLM_LKFLG_DELETED) && | |
12969 | + (lkb->lkb_flags & GDLM_LKFLG_VALBLK) && | |
12970 | + (lkb->lkb_grmode > DLM_LOCK_NL)) | |
12971 | + { | |
12972 | + if (!rsb->res_lvbptr) | |
12973 | + rsb->res_lvbptr = allocate_lvb(rsb->res_ls); | |
12974 | + | |
12975 | + memcpy(rsb->res_lvbptr, lkb->lkb_lvbptr, DLM_LVB_LEN); | |
12976 | + return; | |
12977 | + } | |
12978 | + } | |
12979 | +} | |
12980 | + | |
12981 | +/* | |
12982 | + * Propogate the new master nodeid to locks, subrsbs, sublocks. | |
12983 | + * The NEW_MASTER flag tells rebuild_rsbs_send() which rsb's to consider. | |
12984 | + */ | |
12985 | + | |
b7b72b66 | 12986 | +static void set_new_master(struct dlm_rsb *rsb, uint32_t nodeid) |
c1c6733f | 12987 | +{ |
b7b72b66 | 12988 | + struct dlm_rsb *subrsb; |
c1c6733f AM |
12989 | + |
12990 | + down_write(&rsb->res_lock); | |
12991 | + | |
b7b72b66 AM |
12992 | + if (nodeid == our_nodeid()) { |
12993 | + set_bit(RESFL_MASTER, &rsb->res_flags); | |
c1c6733f AM |
12994 | + rsb->res_nodeid = 0; |
12995 | + set_rsb_lvb(rsb); | |
b7b72b66 AM |
12996 | + } else |
12997 | + rsb->res_nodeid = nodeid; | |
c1c6733f AM |
12998 | + |
12999 | + set_master_lkbs(rsb); | |
13000 | + | |
13001 | + list_for_each_entry(subrsb, &rsb->res_subreslist, res_subreslist) { | |
13002 | + subrsb->res_nodeid = rsb->res_nodeid; | |
13003 | + set_master_lkbs(subrsb); | |
13004 | + } | |
13005 | + | |
13006 | + up_write(&rsb->res_lock); | |
13007 | + | |
13008 | + set_bit(RESFL_NEW_MASTER, &rsb->res_flags); | |
13009 | +} | |
13010 | + | |
13011 | +/* | |
13012 | + * The recover_list contains all the rsb's for which we've requested the new | |
13013 | + * master nodeid. As replies are returned from the resource directories the | |
13014 | + * rsb's are removed from the list. When the list is empty we're done. | |
13015 | + * | |
13016 | + * The recover_list is later similarly used for all rsb's for which we've sent | |
13017 | + * new lkb's and need to receive new corresponding lkid's. | |
13018 | + */ | |
13019 | + | |
b7b72b66 | 13020 | +int recover_list_empty(struct dlm_ls *ls) |
c1c6733f AM |
13021 | +{ |
13022 | + int empty; | |
13023 | + | |
13024 | + spin_lock(&ls->ls_recover_list_lock); | |
13025 | + empty = list_empty(&ls->ls_recover_list); | |
13026 | + spin_unlock(&ls->ls_recover_list_lock); | |
13027 | + | |
13028 | + return empty; | |
13029 | +} | |
13030 | + | |
b7b72b66 | 13031 | +int recover_list_count(struct dlm_ls *ls) |
c1c6733f AM |
13032 | +{ |
13033 | + int count; | |
13034 | + | |
13035 | + spin_lock(&ls->ls_recover_list_lock); | |
13036 | + count = ls->ls_recover_list_count; | |
13037 | + spin_unlock(&ls->ls_recover_list_lock); | |
13038 | + | |
13039 | + return count; | |
13040 | +} | |
13041 | + | |
b7b72b66 | 13042 | +void recover_list_add(struct dlm_rsb *rsb) |
c1c6733f | 13043 | +{ |
b7b72b66 | 13044 | + struct dlm_ls *ls = rsb->res_ls; |
c1c6733f AM |
13045 | + |
13046 | + spin_lock(&ls->ls_recover_list_lock); | |
13047 | + if (!test_and_set_bit(RESFL_RECOVER_LIST, &rsb->res_flags)) { | |
13048 | + list_add_tail(&rsb->res_recover_list, &ls->ls_recover_list); | |
13049 | + ls->ls_recover_list_count++; | |
13050 | + hold_rsb(rsb); | |
13051 | + } | |
13052 | + spin_unlock(&ls->ls_recover_list_lock); | |
13053 | +} | |
13054 | + | |
b7b72b66 | 13055 | +void recover_list_del(struct dlm_rsb *rsb) |
c1c6733f | 13056 | +{ |
b7b72b66 | 13057 | + struct dlm_ls *ls = rsb->res_ls; |
c1c6733f AM |
13058 | + |
13059 | + spin_lock(&ls->ls_recover_list_lock); | |
13060 | + clear_bit(RESFL_RECOVER_LIST, &rsb->res_flags); | |
13061 | + list_del(&rsb->res_recover_list); | |
13062 | + ls->ls_recover_list_count--; | |
13063 | + spin_unlock(&ls->ls_recover_list_lock); | |
13064 | + | |
13065 | + release_rsb(rsb); | |
13066 | +} | |
13067 | + | |
b7b72b66 | 13068 | +static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, int msgid) |
c1c6733f | 13069 | +{ |
b7b72b66 | 13070 | + struct dlm_rsb *rsb = NULL; |
c1c6733f AM |
13071 | + |
13072 | + spin_lock(&ls->ls_recover_list_lock); | |
13073 | + | |
13074 | + list_for_each_entry(rsb, &ls->ls_recover_list, res_recover_list) { | |
13075 | + if (rsb->res_recover_msgid == msgid) | |
13076 | + goto rec_found; | |
13077 | + } | |
13078 | + rsb = NULL; | |
13079 | + | |
13080 | + rec_found: | |
13081 | + spin_unlock(&ls->ls_recover_list_lock); | |
13082 | + return rsb; | |
13083 | +} | |
13084 | + | |
b7b72b66 | 13085 | +static int rsb_master_lookup(struct dlm_rsb *rsb, struct dlm_rcom *rc) |
c1c6733f | 13086 | +{ |
b7b72b66 AM |
13087 | + struct dlm_ls *ls = rsb->res_ls; |
13088 | + uint32_t dir_nodeid, r_nodeid; | |
c1c6733f AM |
13089 | + int error; |
13090 | + | |
13091 | + dir_nodeid = get_directory_nodeid(rsb); | |
13092 | + | |
13093 | + if (dir_nodeid == our_nodeid()) { | |
b7b72b66 AM |
13094 | + error = dlm_dir_lookup(ls, dir_nodeid, rsb->res_name, |
13095 | + rsb->res_length, &r_nodeid); | |
13096 | + if (error == -EEXIST) { | |
13097 | + log_all(ls, "rsb_master_lookup %u EEXIST %s", | |
13098 | + r_nodeid, rsb->res_name); | |
13099 | + } else if (error) | |
c1c6733f AM |
13100 | + goto fail; |
13101 | + | |
b7b72b66 | 13102 | + set_new_master(rsb, r_nodeid); |
c1c6733f AM |
13103 | + } else { |
13104 | + /* As we are the only thread doing recovery this | |
13105 | + should be safe. if not then we need to use a different | |
13106 | + ID somehow. We must set it in the RSB before rcom_send_msg | |
13107 | + completes cos we may get a reply quite quickly. | |
13108 | + */ | |
13109 | + rsb->res_recover_msgid = ls->ls_rcom_msgid + 1; | |
13110 | + | |
13111 | + recover_list_add(rsb); | |
13112 | + | |
13113 | + memcpy(rc->rc_buf, rsb->res_name, rsb->res_length); | |
13114 | + rc->rc_datalen = rsb->res_length; | |
13115 | + | |
13116 | + error = rcom_send_message(ls, dir_nodeid, RECCOMM_GETMASTER, | |
13117 | + rc, 0); | |
13118 | + if (error) | |
13119 | + goto fail; | |
13120 | + } | |
13121 | + | |
b7b72b66 | 13122 | + fail: |
c1c6733f AM |
13123 | + return error; |
13124 | +} | |
13125 | + | |
b7b72b66 AM |
13126 | +static int needs_update(struct dlm_ls *ls, struct dlm_rsb *r) |
13127 | +{ | |
13128 | + if (!r->res_nodeid) | |
13129 | + return FALSE; | |
13130 | + | |
13131 | + if (r->res_nodeid == -1) | |
13132 | + return FALSE; | |
13133 | + | |
13134 | + if (in_nodes_gone(ls, r->res_nodeid)) | |
13135 | + return TRUE; | |
13136 | + | |
13137 | + return FALSE; | |
13138 | +} | |
13139 | + | |
c1c6733f AM |
13140 | +/* |
13141 | + * Go through local root resources and for each rsb which has a master which | |
13142 | + * has departed, get the new master nodeid from the resdir. The resdir will | |
13143 | + * assign mastery to the first node to look up the new master. That means | |
13144 | + * we'll discover in this lookup if we're the new master of any rsb's. | |
13145 | + * | |
13146 | + * We fire off all the resdir requests individually and asynchronously to the | |
13147 | + * correct resdir node. The replies are processed in rsb_master_recv(). | |
13148 | + */ | |
13149 | + | |
b7b72b66 | 13150 | +int restbl_rsb_update(struct dlm_ls *ls) |
c1c6733f | 13151 | +{ |
b7b72b66 AM |
13152 | + struct dlm_rsb *rsb, *safe; |
13153 | + struct dlm_rcom *rc; | |
c1c6733f AM |
13154 | + int error = -ENOMEM; |
13155 | + int count = 0; | |
13156 | + | |
13157 | + log_all(ls, "update remastered resources"); | |
13158 | + | |
13159 | + rc = allocate_rcom_buffer(ls); | |
13160 | + if (!rc) | |
13161 | + goto out; | |
13162 | + | |
b7b72b66 | 13163 | + down_read(&ls->ls_root_lock); |
c1c6733f | 13164 | + |
b7b72b66 AM |
13165 | + list_for_each_entry_safe(rsb, safe, &ls->ls_rootres, res_rootlist) { |
13166 | + error = dlm_recovery_stopped(ls); | |
13167 | + if (error) { | |
13168 | + up_read(&ls->ls_root_lock); | |
c1c6733f | 13169 | + goto out_free; |
b7b72b66 | 13170 | + } |
c1c6733f | 13171 | + |
b7b72b66 | 13172 | + if (needs_update(ls, rsb)) { |
c1c6733f | 13173 | + error = rsb_master_lookup(rsb, rc); |
b7b72b66 AM |
13174 | + if (error) { |
13175 | + up_read(&ls->ls_root_lock); | |
c1c6733f | 13176 | + goto out_free; |
b7b72b66 | 13177 | + } |
c1c6733f AM |
13178 | + count++; |
13179 | + } | |
13180 | + } | |
b7b72b66 | 13181 | + up_read(&ls->ls_root_lock); |
c1c6733f | 13182 | + |
b7b72b66 | 13183 | + error = dlm_wait_function(ls, &recover_list_empty); |
c1c6733f AM |
13184 | + |
13185 | + log_all(ls, "updated %d resources", count); | |
b7b72b66 | 13186 | + out_free: |
c1c6733f | 13187 | + free_rcom_buffer(rc); |
b7b72b66 | 13188 | + out: |
c1c6733f AM |
13189 | + return error; |
13190 | +} | |
13191 | + | |
b7b72b66 AM |
13192 | +int restbl_rsb_update_recv(struct dlm_ls *ls, uint32_t nodeid, char *buf, |
13193 | + int length, int msgid) | |
c1c6733f | 13194 | +{ |
b7b72b66 | 13195 | + struct dlm_rsb *rsb; |
c1c6733f AM |
13196 | + uint32_t be_nodeid; |
13197 | + | |
13198 | + rsb = recover_list_find(ls, msgid); | |
13199 | + if (!rsb) { | |
13200 | + log_error(ls, "restbl_rsb_update_recv rsb not found %d", msgid); | |
13201 | + goto out; | |
13202 | + } | |
13203 | + | |
13204 | + memcpy(&be_nodeid, buf, sizeof(uint32_t)); | |
b7b72b66 | 13205 | + set_new_master(rsb, be32_to_cpu(be_nodeid)); |
c1c6733f AM |
13206 | + recover_list_del(rsb); |
13207 | + | |
13208 | + if (recover_list_empty(ls)) | |
13209 | + wake_up(&ls->ls_wait_general); | |
13210 | + | |
b7b72b66 | 13211 | + out: |
c1c6733f AM |
13212 | + return 0; |
13213 | +} | |
13214 | + | |
13215 | +/* | |
13216 | + * This function not used any longer. | |
13217 | + */ | |
13218 | + | |
b7b72b66 | 13219 | +int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen, |
c1c6733f AM |
13220 | + char *outbuf) |
13221 | +{ | |
13222 | + char *inbufptr, *outbufptr; | |
13223 | + | |
13224 | + /* | |
13225 | + * The other node wants nodeids matching the resource names in inbuf. | |
13226 | + * The resource names are packed into inbuf as | |
13227 | + * [len1][name1][len2][name2]... where lenX is 1 byte and nameX is | |
13228 | + * lenX bytes. Matching nodeids are packed into outbuf in order | |
13229 | + * [nodeid1][nodeid2]... | |
13230 | + */ | |
13231 | + | |
13232 | + inbufptr = inbuf; | |
13233 | + outbufptr = outbuf; | |
13234 | + | |
13235 | + while (inbufptr < inbuf + inlen) { | |
b7b72b66 | 13236 | + uint32_t r_nodeid, be_nodeid; |
c1c6733f AM |
13237 | + int status; |
13238 | + | |
b7b72b66 AM |
13239 | + status = dlm_dir_lookup(ls, nodeid, inbufptr + 1, *inbufptr, |
13240 | + &r_nodeid); | |
c1c6733f AM |
13241 | + if (status != 0) |
13242 | + goto fail; | |
13243 | + | |
13244 | + inbufptr += *inbufptr + 1; | |
13245 | + | |
b7b72b66 | 13246 | + be_nodeid = cpu_to_be32(r_nodeid); |
c1c6733f AM |
13247 | + memcpy(outbufptr, &be_nodeid, sizeof(uint32_t)); |
13248 | + outbufptr += sizeof(uint32_t); | |
13249 | + | |
13250 | + /* add assertion that outbufptr - outbuf is not > than ... */ | |
13251 | + } | |
13252 | + | |
13253 | + return (outbufptr - outbuf); | |
b7b72b66 | 13254 | + fail: |
c1c6733f AM |
13255 | + return -1; |
13256 | +} | |
13257 | diff -urN linux-orig/cluster/dlm/recover.h linux-patched/cluster/dlm/recover.h | |
13258 | --- linux-orig/cluster/dlm/recover.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 13259 | +++ linux-patched/cluster/dlm/recover.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 13260 | @@ -0,0 +1,33 @@ |
c1c6733f AM |
13261 | +/****************************************************************************** |
13262 | +******************************************************************************* | |
13263 | +** | |
13264 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13265 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
13266 | +** | |
13267 | +** This copyrighted material is made available to anyone wishing to use, | |
13268 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13269 | +** of the GNU General Public License v.2. | |
13270 | +** | |
13271 | +******************************************************************************* | |
13272 | +******************************************************************************/ | |
13273 | + | |
13274 | +#ifndef __RECOVER_DOT_H__ | |
13275 | +#define __RECOVER_DOT_H__ | |
13276 | + | |
b7b72b66 AM |
13277 | +int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls * ls)); |
13278 | +int dlm_wait_status_all(struct dlm_ls *ls, unsigned int wait_status); | |
13279 | +int dlm_wait_status_low(struct dlm_ls *ls, unsigned int wait_status); | |
13280 | +int dlm_recovery_stopped(struct dlm_ls *ls); | |
13281 | +int recover_list_empty(struct dlm_ls *ls); | |
13282 | +int recover_list_count(struct dlm_ls *ls); | |
13283 | +void recover_list_add(struct dlm_rsb *rsb); | |
13284 | +void recover_list_del(struct dlm_rsb *rsb); | |
13285 | +int restbl_lkb_purge(struct dlm_ls *ls); | |
13286 | +void restbl_grant_after_purge(struct dlm_ls *ls); | |
13287 | +int restbl_rsb_update(struct dlm_ls *ls); | |
13288 | +int restbl_rsb_update_recv(struct dlm_ls *ls, int nodeid, char *buf, int len, | |
c1c6733f | 13289 | + int msgid); |
b7b72b66 | 13290 | +int bulk_master_lookup(struct dlm_ls *ls, int nodeid, char *inbuf, int inlen, |
c1c6733f AM |
13291 | + char *outbuf); |
13292 | + | |
13293 | +#endif /* __RECOVER_DOT_H__ */ | |
13294 | diff -urN linux-orig/cluster/dlm/recoverd.c linux-patched/cluster/dlm/recoverd.c | |
13295 | --- linux-orig/cluster/dlm/recoverd.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 13296 | +++ linux-patched/cluster/dlm/recoverd.c 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 13297 | @@ -0,0 +1,713 @@ |
c1c6733f AM |
13298 | +/****************************************************************************** |
13299 | +******************************************************************************* | |
13300 | +** | |
13301 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
13302 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
b7b72b66 | 13303 | +** |
c1c6733f AM |
13304 | +** This copyrighted material is made available to anyone wishing to use, |
13305 | +** modify, copy, or redistribute it subject to the terms and conditions | |
13306 | +** of the GNU General Public License v.2. | |
13307 | +** | |
13308 | +******************************************************************************* | |
13309 | +******************************************************************************/ | |
13310 | + | |
13311 | +#include "dlm_internal.h" | |
13312 | +#include "nodes.h" | |
13313 | +#include "dir.h" | |
13314 | +#include "ast.h" | |
13315 | +#include "recover.h" | |
13316 | +#include "lockspace.h" | |
13317 | +#include "lowcomms.h" | |
13318 | +#include "lockqueue.h" | |
13319 | +#include "lkb.h" | |
13320 | +#include "rebuild.h" | |
13321 | + | |
b7b72b66 | 13322 | +/* |
c1c6733f AM |
13323 | + * next_move actions |
13324 | + */ | |
13325 | + | |
13326 | +#define DO_STOP (1) | |
13327 | +#define DO_START (2) | |
13328 | +#define DO_FINISH (3) | |
13329 | +#define DO_FINISH_STOP (4) | |
13330 | +#define DO_FINISH_START (5) | |
13331 | + | |
b7b72b66 AM |
13332 | +/* |
13333 | + * Queue of lockspaces (dlm_recover structs) which need to be | |
c1c6733f AM |
13334 | + * started/recovered |
13335 | + */ | |
13336 | + | |
b7b72b66 | 13337 | +static int enable_locking(struct dlm_ls *ls, int event_id) |
c1c6733f AM |
13338 | +{ |
13339 | + int error = 0; | |
13340 | + | |
13341 | + spin_lock(&ls->ls_recover_lock); | |
13342 | + if (ls->ls_last_stop < event_id) { | |
13343 | + set_bit(LSFL_LS_RUN, &ls->ls_flags); | |
13344 | + up_write(&ls->ls_in_recovery); | |
13345 | + } else { | |
13346 | + error = -EINTR; | |
13347 | + log_debug(ls, "enable_locking: abort %d", event_id); | |
13348 | + } | |
13349 | + spin_unlock(&ls->ls_recover_lock); | |
13350 | + return error; | |
13351 | +} | |
13352 | + | |
b7b72b66 | 13353 | +static int ls_first_start(struct dlm_ls *ls, struct dlm_recover *rv) |
c1c6733f AM |
13354 | +{ |
13355 | + int error; | |
13356 | + | |
b7b72b66 | 13357 | + log_all(ls, "recover event %u (first)", rv->event_id); |
c1c6733f AM |
13358 | + |
13359 | + kcl_global_service_id(ls->ls_local_id, &ls->ls_global_id); | |
13360 | + | |
b7b72b66 | 13361 | + error = ls_nodes_init(ls, rv); |
c1c6733f AM |
13362 | + if (error) { |
13363 | + log_error(ls, "nodes_init failed %d", error); | |
13364 | + goto out; | |
13365 | + } | |
13366 | + | |
b7b72b66 | 13367 | + error = dlm_dir_rebuild_local(ls); |
c1c6733f | 13368 | + if (error) { |
b7b72b66 | 13369 | + log_error(ls, "dlm_dir_rebuild_local failed %d", error); |
c1c6733f AM |
13370 | + goto out; |
13371 | + } | |
13372 | + | |
b7b72b66 | 13373 | + error = dlm_dir_rebuild_wait(ls); |
c1c6733f | 13374 | + if (error) { |
b7b72b66 | 13375 | + log_error(ls, "dlm_dir_rebuild_wait failed %d", error); |
c1c6733f AM |
13376 | + goto out; |
13377 | + } | |
13378 | + | |
b7b72b66 AM |
13379 | + log_all(ls, "recover event %u done", rv->event_id); |
13380 | + kcl_start_done(ls->ls_local_id, rv->event_id); | |
c1c6733f | 13381 | + |
c783755a | 13382 | + out: |
c1c6733f AM |
13383 | + return error; |
13384 | +} | |
13385 | + | |
b7b72b66 | 13386 | +/* |
c1c6733f AM |
13387 | + * We are given here a new group of nodes which are in the lockspace. We first |
13388 | + * figure out the differences in ls membership from when we were last running. | |
13389 | + * If nodes from before are gone, then there will be some lock recovery to do. | |
13390 | + * If there are only nodes which have joined, then there's no lock recovery. | |
13391 | + * | |
13392 | + * note: cman requires an rc to finish starting on an revent (where nodes die) | |
13393 | + * before it allows an sevent (where nodes join) to be processed. This means | |
13394 | + * that we won't get start1 with nodeA gone, stop/cancel, start2 with nodeA | |
13395 | + * joined. | |
13396 | + */ | |
13397 | + | |
b7b72b66 | 13398 | +static int ls_reconfig(struct dlm_ls *ls, struct dlm_recover *rv) |
c1c6733f AM |
13399 | +{ |
13400 | + int error, neg = 0; | |
13401 | + | |
b7b72b66 | 13402 | + log_all(ls, "recover event %u", rv->event_id); |
c1c6733f | 13403 | + |
b7b72b66 AM |
13404 | + /* |
13405 | + * this list may be left over from a previous aborted recovery | |
13406 | + */ | |
13407 | + | |
13408 | + rebuild_freemem(ls); | |
13409 | + | |
13410 | + /* | |
c1c6733f AM |
13411 | + * Add or remove nodes from the lockspace's ls_nodes list. |
13412 | + */ | |
13413 | + | |
b7b72b66 | 13414 | + error = ls_nodes_reconfig(ls, rv, &neg); |
c1c6733f AM |
13415 | + if (error) { |
13416 | + log_error(ls, "nodes_reconfig failed %d", error); | |
13417 | + goto fail; | |
13418 | + } | |
13419 | + | |
b7b72b66 | 13420 | + /* |
c1c6733f AM |
13421 | + * Rebuild our own share of the resdir by collecting from all other |
13422 | + * nodes rsb name/master pairs for which the name hashes to us. | |
13423 | + */ | |
13424 | + | |
b7b72b66 | 13425 | + error = dlm_dir_rebuild_local(ls); |
c1c6733f | 13426 | + if (error) { |
b7b72b66 | 13427 | + log_error(ls, "dlm_dir_rebuild_local failed %d", error); |
c1c6733f AM |
13428 | + goto fail; |
13429 | + } | |
13430 | + | |
b7b72b66 | 13431 | + /* |
c1c6733f AM |
13432 | + * Purge resdir-related requests that are being held in requestqueue. |
13433 | + * All resdir requests from before recovery started are invalid now due | |
13434 | + * to the resdir rebuild and will be resent by the requesting nodes. | |
13435 | + */ | |
13436 | + | |
13437 | + purge_requestqueue(ls); | |
13438 | + set_bit(LSFL_REQUEST_WARN, &ls->ls_flags); | |
13439 | + | |
b7b72b66 | 13440 | + /* |
c1c6733f AM |
13441 | + * Wait for all nodes to complete resdir rebuild. |
13442 | + */ | |
13443 | + | |
b7b72b66 | 13444 | + error = dlm_dir_rebuild_wait(ls); |
c1c6733f | 13445 | + if (error) { |
b7b72b66 | 13446 | + log_error(ls, "dlm_dir_rebuild_wait failed %d", error); |
c1c6733f AM |
13447 | + goto fail; |
13448 | + } | |
13449 | + | |
b7b72b66 | 13450 | + /* |
c1c6733f AM |
13451 | + * Mark our own lkb's waiting in the lockqueue for remote replies from |
13452 | + * nodes that are now departed. These will be resent to the new | |
13453 | + * masters in resend_cluster_requests. Also mark resdir lookup | |
13454 | + * requests for resending. | |
13455 | + */ | |
13456 | + | |
13457 | + lockqueue_lkb_mark(ls); | |
13458 | + | |
b7b72b66 | 13459 | + error = dlm_recovery_stopped(ls); |
c1c6733f AM |
13460 | + if (error) |
13461 | + goto fail; | |
13462 | + | |
13463 | + if (neg) { | |
b7b72b66 | 13464 | + /* |
c1c6733f AM |
13465 | + * Clear lkb's for departed nodes. This can't fail since it |
13466 | + * doesn't involve communicating with other nodes. | |
13467 | + */ | |
13468 | + | |
c1c6733f | 13469 | + restbl_lkb_purge(ls); |
c1c6733f | 13470 | + |
b7b72b66 | 13471 | + /* |
c1c6733f AM |
13472 | + * Get new master id's for rsb's of departed nodes. This fails |
13473 | + * if we can't communicate with other nodes. | |
13474 | + */ | |
13475 | + | |
13476 | + error = restbl_rsb_update(ls); | |
13477 | + if (error) { | |
13478 | + log_error(ls, "restbl_rsb_update failed %d", error); | |
b7b72b66 | 13479 | + goto fail; |
c1c6733f AM |
13480 | + } |
13481 | + | |
b7b72b66 | 13482 | + /* |
c1c6733f AM |
13483 | + * Send our lkb info to new masters. This fails if we can't |
13484 | + * communicate with a node. | |
13485 | + */ | |
13486 | + | |
13487 | + error = rebuild_rsbs_send(ls); | |
13488 | + if (error) { | |
13489 | + log_error(ls, "rebuild_rsbs_send failed %d", error); | |
b7b72b66 | 13490 | + goto fail; |
c1c6733f | 13491 | + } |
c783755a | 13492 | + } |
c1c6733f AM |
13493 | + |
13494 | + clear_bit(LSFL_REQUEST_WARN, &ls->ls_flags); | |
13495 | + | |
b7b72b66 AM |
13496 | + log_all(ls, "recover event %u done", rv->event_id); |
13497 | + kcl_start_done(ls->ls_local_id, rv->event_id); | |
c1c6733f AM |
13498 | + return 0; |
13499 | + | |
c1c6733f | 13500 | + fail: |
b7b72b66 | 13501 | + log_all(ls, "recover event %d error %d", rv->event_id, error); |
c1c6733f AM |
13502 | + return error; |
13503 | +} | |
13504 | + | |
b7b72b66 | 13505 | +static void clear_finished_nodes(struct dlm_ls *ls, int finish_event) |
c1c6733f | 13506 | +{ |
b7b72b66 | 13507 | + struct dlm_csb *csb, *safe; |
c1c6733f | 13508 | + |
b7b72b66 AM |
13509 | + list_for_each_entry_safe(csb, safe, &ls->ls_nodes_gone, list) { |
13510 | + if (csb->gone_event <= finish_event) { | |
13511 | + list_del(&csb->list); | |
c1c6733f AM |
13512 | + release_csb(csb); |
13513 | + } | |
13514 | + } | |
13515 | +} | |
13516 | + | |
b7b72b66 | 13517 | +/* |
c1c6733f AM |
13518 | + * Between calls to this routine for a ls, there can be multiple stop/start |
13519 | + * events from cman where every start but the latest is cancelled by stops. | |
13520 | + * There can only be a single finish from cman because every finish requires us | |
13521 | + * to call start_done. A single finish event could be followed by multiple | |
13522 | + * stop/start events. This routine takes any combination of events from cman | |
13523 | + * and boils them down to one course of action. | |
13524 | + */ | |
13525 | + | |
b7b72b66 AM |
13526 | +static int next_move(struct dlm_ls *ls, struct dlm_recover **rv_out, |
13527 | + int *finish_out) | |
c1c6733f AM |
13528 | +{ |
13529 | + LIST_HEAD(events); | |
13530 | + unsigned int cmd = 0, stop, start, finish; | |
13531 | + unsigned int last_stop, last_start, last_finish; | |
b7b72b66 | 13532 | + struct dlm_recover *rv = NULL, *start_rv = NULL; |
c1c6733f | 13533 | + |
b7b72b66 | 13534 | + /* |
c1c6733f AM |
13535 | + * Grab the current state of cman/sm events. |
13536 | + */ | |
13537 | + | |
13538 | + spin_lock(&ls->ls_recover_lock); | |
13539 | + | |
13540 | + stop = test_and_clear_bit(LSFL_LS_STOP, &ls->ls_flags) ? 1 : 0; | |
13541 | + start = test_and_clear_bit(LSFL_LS_START, &ls->ls_flags) ? 1 : 0; | |
13542 | + finish = test_and_clear_bit(LSFL_LS_FINISH, &ls->ls_flags) ? 1 : 0; | |
13543 | + | |
13544 | + last_stop = ls->ls_last_stop; | |
13545 | + last_start = ls->ls_last_start; | |
13546 | + last_finish = ls->ls_last_finish; | |
13547 | + | |
13548 | + while (!list_empty(&ls->ls_recover)) { | |
b7b72b66 AM |
13549 | + rv = list_entry(ls->ls_recover.next, struct dlm_recover, list); |
13550 | + list_del(&rv->list); | |
13551 | + list_add_tail(&rv->list, &events); | |
13552 | + } | |
13553 | + | |
c783755a AM |
13554 | + /* |
13555 | + * There are two cases where we need to adjust these event values: | |
13556 | + * 1. - we get a first start | |
13557 | + * - we get a stop | |
13558 | + * - we process the start + stop here and notice this special case | |
13559 | + * | |
13560 | + * 2. - we get a first start | |
13561 | + * - we process the start | |
13562 | + * - we get a stop | |
13563 | + * - we process the stop here and notice this special case | |
13564 | + * | |
13565 | + * In both cases, the first start we received was aborted by a | |
13566 | + * stop before we received a finish. last_finish being zero is the | |
13567 | + * indication that this is the "first" start, i.e. we've not yet | |
13568 | + * finished a start; if we had, last_finish would be non-zero. | |
13569 | + * Part of the problem arises from the fact that when we initially | |
13570 | + * get start/stop/start, SM uses the same event id for both starts | |
13571 | + * (since the first was cancelled). | |
13572 | + * | |
13573 | + * In both cases, last_start and last_stop will be equal. | |
13574 | + * In both cases, finish=0. | |
13575 | + * In the first case start=1 && stop=1. | |
13576 | + * In the second case start=0 && stop=1. | |
13577 | + * | |
13578 | + * In both cases, we need to make adjustments to values so: | |
13579 | + * - we process the current event (now) as a normal stop | |
13580 | + * - the next start we receive will be processed normally | |
13581 | + * (taking into account the assertions below) | |
13582 | + * | |
13583 | + * In the first case, dlm_ls_start() will have printed the | |
13584 | + * "repeated start" warning. | |
13585 | + * | |
13586 | + * In the first case we need to get rid of the recover event struct. | |
13587 | + * | |
13588 | + * - set stop=1, start=0, finish=0 for case 4 below | |
13589 | + * - last_stop and last_start must be set equal per the case 4 assert | |
13590 | + * - ls_last_stop = 0 so the next start will be larger | |
13591 | + * - ls_last_start = 0 not really necessary (avoids dlm_ls_start print) | |
13592 | + */ | |
b7b72b66 | 13593 | + |
c783755a AM |
13594 | + if (!last_finish && (last_start == last_stop)) { |
13595 | + log_all(ls, "move reset %u,%u,%u ids %u,%u,%u", stop, | |
13596 | + start, finish, last_stop, last_start, last_finish); | |
13597 | + stop = 1; | |
13598 | + start = 0; | |
13599 | + finish = 0; | |
b7b72b66 AM |
13600 | + last_stop = 0; |
13601 | + last_start = 0; | |
c783755a AM |
13602 | + ls->ls_last_stop = 0; |
13603 | + ls->ls_last_start = 0; | |
13604 | + | |
13605 | + while (!list_empty(&events)) { | |
13606 | + rv = list_entry(events.next, struct dlm_recover, list); | |
13607 | + list_del(&rv->list); | |
13608 | + kfree(rv->nodeids); | |
13609 | + kfree(rv); | |
13610 | + } | |
c1c6733f AM |
13611 | + } |
13612 | + spin_unlock(&ls->ls_recover_lock); | |
13613 | + | |
13614 | + log_debug(ls, "move flags %u,%u,%u ids %u,%u,%u", stop, start, finish, | |
13615 | + last_stop, last_start, last_finish); | |
13616 | + | |
b7b72b66 | 13617 | + /* |
c1c6733f AM |
13618 | + * Toss start events which have since been cancelled. |
13619 | + */ | |
13620 | + | |
13621 | + while (!list_empty(&events)) { | |
b7b72b66 AM |
13622 | + DLM_ASSERT(start,); |
13623 | + rv = list_entry(events.next, struct dlm_recover, list); | |
13624 | + list_del(&rv->list); | |
13625 | + | |
13626 | + if (rv->event_id <= last_stop) { | |
13627 | + log_debug(ls, "move skip event %u", rv->event_id); | |
13628 | + kfree(rv->nodeids); | |
13629 | + kfree(rv); | |
13630 | + rv = NULL; | |
c1c6733f | 13631 | + } else { |
b7b72b66 AM |
13632 | + log_debug(ls, "move use event %u", rv->event_id); |
13633 | + DLM_ASSERT(!start_rv,); | |
13634 | + start_rv = rv; | |
c1c6733f AM |
13635 | + } |
13636 | + } | |
13637 | + | |
b7b72b66 | 13638 | + /* |
c1c6733f AM |
13639 | + * Eight possible combinations of events. |
13640 | + */ | |
13641 | + | |
13642 | + /* 0 */ | |
13643 | + if (!stop && !start && !finish) { | |
b7b72b66 | 13644 | + DLM_ASSERT(!start_rv,); |
c1c6733f AM |
13645 | + cmd = 0; |
13646 | + goto out; | |
13647 | + } | |
13648 | + | |
13649 | + /* 1 */ | |
13650 | + if (!stop && !start && finish) { | |
b7b72b66 AM |
13651 | + DLM_ASSERT(!start_rv,); |
13652 | + DLM_ASSERT(last_start > last_stop,); | |
13653 | + DLM_ASSERT(last_finish == last_start,); | |
c1c6733f AM |
13654 | + cmd = DO_FINISH; |
13655 | + *finish_out = last_finish; | |
13656 | + goto out; | |
13657 | + } | |
13658 | + | |
13659 | + /* 2 */ | |
13660 | + if (!stop && start && !finish) { | |
b7b72b66 AM |
13661 | + DLM_ASSERT(start_rv,); |
13662 | + DLM_ASSERT(last_start > last_stop,); | |
c1c6733f | 13663 | + cmd = DO_START; |
b7b72b66 | 13664 | + *rv_out = start_rv; |
c1c6733f AM |
13665 | + goto out; |
13666 | + } | |
13667 | + | |
13668 | + /* 3 */ | |
13669 | + if (!stop && start && finish) { | |
b7b72b66 | 13670 | + DLM_ASSERT(0, printk("finish and start with no stop\n");); |
c1c6733f AM |
13671 | + } |
13672 | + | |
13673 | + /* 4 */ | |
13674 | + if (stop && !start && !finish) { | |
b7b72b66 AM |
13675 | + DLM_ASSERT(!start_rv,); |
13676 | + DLM_ASSERT(last_start == last_stop,); | |
c1c6733f AM |
13677 | + cmd = DO_STOP; |
13678 | + goto out; | |
13679 | + } | |
13680 | + | |
13681 | + /* 5 */ | |
13682 | + if (stop && !start && finish) { | |
b7b72b66 AM |
13683 | + DLM_ASSERT(!start_rv,); |
13684 | + DLM_ASSERT(last_finish == last_start,); | |
13685 | + DLM_ASSERT(last_stop == last_start,); | |
c1c6733f AM |
13686 | + cmd = DO_FINISH_STOP; |
13687 | + *finish_out = last_finish; | |
13688 | + goto out; | |
13689 | + } | |
13690 | + | |
13691 | + /* 6 */ | |
13692 | + if (stop && start && !finish) { | |
b7b72b66 AM |
13693 | + if (start_rv) { |
13694 | + DLM_ASSERT(last_start > last_stop,); | |
c1c6733f | 13695 | + cmd = DO_START; |
b7b72b66 | 13696 | + *rv_out = start_rv; |
c1c6733f | 13697 | + } else { |
b7b72b66 | 13698 | + DLM_ASSERT(last_stop == last_start,); |
c1c6733f AM |
13699 | + cmd = DO_STOP; |
13700 | + } | |
13701 | + goto out; | |
13702 | + } | |
13703 | + | |
13704 | + /* 7 */ | |
13705 | + if (stop && start && finish) { | |
b7b72b66 AM |
13706 | + if (start_rv) { |
13707 | + DLM_ASSERT(last_start > last_stop,); | |
13708 | + DLM_ASSERT(last_start > last_finish,); | |
c1c6733f AM |
13709 | + cmd = DO_FINISH_START; |
13710 | + *finish_out = last_finish; | |
b7b72b66 | 13711 | + *rv_out = start_rv; |
c1c6733f | 13712 | + } else { |
b7b72b66 AM |
13713 | + DLM_ASSERT(last_start == last_stop,); |
13714 | + DLM_ASSERT(last_start > last_finish,); | |
c1c6733f AM |
13715 | + cmd = DO_FINISH_STOP; |
13716 | + *finish_out = last_finish; | |
13717 | + } | |
13718 | + goto out; | |
13719 | + } | |
13720 | + | |
c783755a | 13721 | + out: |
c1c6733f AM |
13722 | + return cmd; |
13723 | +} | |
13724 | + | |
b7b72b66 | 13725 | +/* |
c1c6733f AM |
13726 | + * This function decides what to do given every combination of current |
13727 | + * lockspace state and next lockspace state. | |
13728 | + */ | |
13729 | + | |
b7b72b66 | 13730 | +static void do_ls_recovery(struct dlm_ls *ls) |
c1c6733f | 13731 | +{ |
b7b72b66 | 13732 | + struct dlm_recover *rv = NULL; |
c1c6733f AM |
13733 | + int error, cur_state, next_state = 0, do_now, finish_event = 0; |
13734 | + | |
b7b72b66 | 13735 | + do_now = next_move(ls, &rv, &finish_event); |
c1c6733f AM |
13736 | + if (!do_now) |
13737 | + goto out; | |
13738 | + | |
13739 | + cur_state = ls->ls_state; | |
13740 | + next_state = 0; | |
13741 | + | |
b7b72b66 | 13742 | + DLM_ASSERT(!test_bit(LSFL_LS_RUN, &ls->ls_flags), |
c1c6733f AM |
13743 | + log_error(ls, "curstate=%d donow=%d", cur_state, do_now);); |
13744 | + | |
b7b72b66 | 13745 | + /* |
c1c6733f AM |
13746 | + * LSST_CLEAR - we're not in any recovery state. We can get a stop or |
13747 | + * a stop and start which equates with a START. | |
13748 | + */ | |
13749 | + | |
13750 | + if (cur_state == LSST_CLEAR) { | |
13751 | + switch (do_now) { | |
13752 | + case DO_STOP: | |
13753 | + next_state = LSST_WAIT_START; | |
13754 | + break; | |
13755 | + | |
13756 | + case DO_START: | |
b7b72b66 | 13757 | + error = ls_reconfig(ls, rv); |
c1c6733f AM |
13758 | + if (error) |
13759 | + next_state = LSST_WAIT_START; | |
13760 | + else | |
13761 | + next_state = LSST_RECONFIG_DONE; | |
13762 | + break; | |
13763 | + | |
13764 | + case DO_FINISH: /* invalid */ | |
13765 | + case DO_FINISH_STOP: /* invalid */ | |
13766 | + case DO_FINISH_START: /* invalid */ | |
13767 | + default: | |
b7b72b66 | 13768 | + DLM_ASSERT(0,); |
c1c6733f AM |
13769 | + } |
13770 | + goto out; | |
13771 | + } | |
13772 | + | |
b7b72b66 | 13773 | + /* |
c1c6733f AM |
13774 | + * LSST_WAIT_START - we're not running because of getting a stop or |
13775 | + * failing a start. We wait in this state for another stop/start or | |
13776 | + * just the next start to begin another reconfig attempt. | |
13777 | + */ | |
13778 | + | |
13779 | + if (cur_state == LSST_WAIT_START) { | |
13780 | + switch (do_now) { | |
13781 | + case DO_STOP: | |
13782 | + break; | |
13783 | + | |
13784 | + case DO_START: | |
b7b72b66 | 13785 | + error = ls_reconfig(ls, rv); |
c1c6733f AM |
13786 | + if (error) |
13787 | + next_state = LSST_WAIT_START; | |
13788 | + else | |
13789 | + next_state = LSST_RECONFIG_DONE; | |
13790 | + break; | |
13791 | + | |
13792 | + case DO_FINISH: /* invalid */ | |
13793 | + case DO_FINISH_STOP: /* invalid */ | |
13794 | + case DO_FINISH_START: /* invalid */ | |
13795 | + default: | |
b7b72b66 | 13796 | + DLM_ASSERT(0,); |
c1c6733f AM |
13797 | + } |
13798 | + goto out; | |
13799 | + } | |
13800 | + | |
b7b72b66 | 13801 | + /* |
c1c6733f AM |
13802 | + * LSST_RECONFIG_DONE - we entered this state after successfully |
13803 | + * completing ls_reconfig and calling kcl_start_done. We expect to get | |
13804 | + * a finish if everything goes ok. A finish could be followed by stop | |
13805 | + * or stop/start before we get here to check it. Or a finish may never | |
13806 | + * happen, only stop or stop/start. | |
13807 | + */ | |
13808 | + | |
13809 | + if (cur_state == LSST_RECONFIG_DONE) { | |
13810 | + switch (do_now) { | |
13811 | + case DO_FINISH: | |
b7b72b66 AM |
13812 | + rebuild_freemem(ls); |
13813 | + | |
c1c6733f AM |
13814 | + clear_finished_nodes(ls, finish_event); |
13815 | + next_state = LSST_CLEAR; | |
13816 | + | |
13817 | + error = enable_locking(ls, finish_event); | |
13818 | + if (error) | |
13819 | + break; | |
13820 | + | |
13821 | + error = process_requestqueue(ls); | |
13822 | + if (error) | |
13823 | + break; | |
13824 | + | |
13825 | + error = resend_cluster_requests(ls); | |
13826 | + if (error) | |
13827 | + break; | |
13828 | + | |
13829 | + restbl_grant_after_purge(ls); | |
13830 | + | |
13831 | + log_all(ls, "recover event %u finished", finish_event); | |
13832 | + break; | |
13833 | + | |
13834 | + case DO_STOP: | |
13835 | + next_state = LSST_WAIT_START; | |
13836 | + break; | |
13837 | + | |
13838 | + case DO_FINISH_STOP: | |
13839 | + clear_finished_nodes(ls, finish_event); | |
13840 | + next_state = LSST_WAIT_START; | |
13841 | + break; | |
13842 | + | |
13843 | + case DO_FINISH_START: | |
13844 | + clear_finished_nodes(ls, finish_event); | |
13845 | + /* fall into DO_START */ | |
13846 | + | |
13847 | + case DO_START: | |
b7b72b66 | 13848 | + error = ls_reconfig(ls, rv); |
c1c6733f AM |
13849 | + if (error) |
13850 | + next_state = LSST_WAIT_START; | |
13851 | + else | |
13852 | + next_state = LSST_RECONFIG_DONE; | |
13853 | + break; | |
13854 | + | |
13855 | + default: | |
b7b72b66 | 13856 | + DLM_ASSERT(0,); |
c1c6733f AM |
13857 | + } |
13858 | + goto out; | |
13859 | + } | |
13860 | + | |
b7b72b66 | 13861 | + /* |
c1c6733f AM |
13862 | + * LSST_INIT - state after ls is created and before it has been |
13863 | + * started. A start operation will cause the ls to be started for the | |
13864 | + * first time. A failed start will cause to just wait in INIT for | |
13865 | + * another stop/start. | |
13866 | + */ | |
13867 | + | |
13868 | + if (cur_state == LSST_INIT) { | |
13869 | + switch (do_now) { | |
13870 | + case DO_START: | |
b7b72b66 | 13871 | + error = ls_first_start(ls, rv); |
c1c6733f AM |
13872 | + if (!error) |
13873 | + next_state = LSST_INIT_DONE; | |
13874 | + break; | |
13875 | + | |
13876 | + case DO_STOP: | |
13877 | + break; | |
13878 | + | |
13879 | + case DO_FINISH: /* invalid */ | |
13880 | + case DO_FINISH_STOP: /* invalid */ | |
13881 | + case DO_FINISH_START: /* invalid */ | |
13882 | + default: | |
b7b72b66 | 13883 | + DLM_ASSERT(0,); |
c1c6733f AM |
13884 | + } |
13885 | + goto out; | |
13886 | + } | |
13887 | + | |
b7b72b66 | 13888 | + /* |
c1c6733f AM |
13889 | + * LSST_INIT_DONE - after the first start operation is completed |
13890 | + * successfully and kcl_start_done() called. If there are no errors, a | |
13891 | + * finish will arrive next and we'll move to LSST_CLEAR. | |
13892 | + */ | |
13893 | + | |
13894 | + if (cur_state == LSST_INIT_DONE) { | |
13895 | + switch (do_now) { | |
13896 | + case DO_STOP: | |
13897 | + case DO_FINISH_STOP: | |
13898 | + next_state = LSST_WAIT_START; | |
13899 | + break; | |
13900 | + | |
13901 | + case DO_START: | |
13902 | + case DO_FINISH_START: | |
b7b72b66 | 13903 | + error = ls_reconfig(ls, rv); |
c1c6733f AM |
13904 | + if (error) |
13905 | + next_state = LSST_WAIT_START; | |
13906 | + else | |
13907 | + next_state = LSST_RECONFIG_DONE; | |
13908 | + break; | |
13909 | + | |
13910 | + case DO_FINISH: | |
13911 | + next_state = LSST_CLEAR; | |
c783755a | 13912 | + |
c1c6733f | 13913 | + enable_locking(ls, finish_event); |
c783755a AM |
13914 | + |
13915 | + process_requestqueue(ls); | |
13916 | + | |
c1c6733f AM |
13917 | + log_all(ls, "recover event %u finished", finish_event); |
13918 | + break; | |
13919 | + | |
13920 | + default: | |
b7b72b66 | 13921 | + DLM_ASSERT(0,); |
c1c6733f AM |
13922 | + } |
13923 | + goto out; | |
13924 | + } | |
13925 | + | |
c783755a | 13926 | + out: |
c1c6733f AM |
13927 | + if (next_state) |
13928 | + ls->ls_state = next_state; | |
13929 | + | |
b7b72b66 AM |
13930 | + if (rv) { |
13931 | + kfree(rv->nodeids); | |
13932 | + kfree(rv); | |
c1c6733f AM |
13933 | + } |
13934 | +} | |
13935 | + | |
b7b72b66 | 13936 | +int dlm_recoverd(void *arg) |
c1c6733f | 13937 | +{ |
b7b72b66 | 13938 | + struct dlm_ls *ls = arg; |
c1c6733f | 13939 | + |
b7b72b66 | 13940 | + hold_lockspace(ls); |
c1c6733f | 13941 | + |
c783755a | 13942 | + for (;;) { |
b7b72b66 AM |
13943 | + set_current_state(TASK_INTERRUPTIBLE); |
13944 | + if (!test_bit(LSFL_WORK, &ls->ls_flags)) | |
13945 | + schedule(); | |
13946 | + set_current_state(TASK_RUNNING); | |
c1c6733f | 13947 | + |
c783755a AM |
13948 | + if (test_bit(LSFL_RECOVERD_EXIT, &ls->ls_flags)) { |
13949 | + down(&ls->ls_recoverd_lock); | |
13950 | + ls->ls_recoverd_task = NULL; | |
13951 | + up(&ls->ls_recoverd_lock); | |
13952 | + goto out; | |
13953 | + } | |
13954 | + | |
b7b72b66 | 13955 | + if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags)) { |
c1c6733f | 13956 | + do_ls_recovery(ls); |
c783755a AM |
13957 | + |
13958 | + down(&ls->ls_recoverd_lock); | |
13959 | + if (ls->ls_state == LSST_CLEAR && | |
13960 | + !test_bit(LSFL_WORK, &ls->ls_flags)) { | |
13961 | + ls->ls_recoverd_task = NULL; | |
13962 | + up(&ls->ls_recoverd_lock); | |
13963 | + goto out; | |
13964 | + } | |
13965 | + up(&ls->ls_recoverd_lock); | |
b7b72b66 | 13966 | + } |
c1c6733f AM |
13967 | + } |
13968 | + | |
c783755a | 13969 | + out: |
b7b72b66 | 13970 | + put_lockspace(ls); |
c1c6733f AM |
13971 | + return 0; |
13972 | +} | |
13973 | + | |
b7b72b66 | 13974 | +void dlm_recoverd_kick(struct dlm_ls *ls) |
c1c6733f | 13975 | +{ |
b7b72b66 | 13976 | + struct task_struct *p; |
c1c6733f | 13977 | + |
c783755a | 13978 | + down(&ls->ls_recoverd_lock); |
b7b72b66 | 13979 | + set_bit(LSFL_WORK, &ls->ls_flags); |
c783755a AM |
13980 | + |
13981 | + if (!ls->ls_recoverd_task) { | |
d3b4771f | 13982 | + p = kthread_run(dlm_recoverd, (void *) ls, 0, "dlm_recoverd"); |
b7b72b66 | 13983 | + if (IS_ERR(p)) { |
c783755a AM |
13984 | + log_error(ls, "can't start dlm_recoverd %ld", |
13985 | + PTR_ERR(p)); | |
13986 | + goto out; | |
b7b72b66 | 13987 | + } |
b7b72b66 | 13988 | + ls->ls_recoverd_task = p; |
c783755a AM |
13989 | + } else |
13990 | + wake_up_process(ls->ls_recoverd_task); | |
13991 | + out: | |
13992 | + up(&ls->ls_recoverd_lock); | |
13993 | +} | |
c1c6733f | 13994 | + |
c783755a AM |
13995 | +void dlm_recoverd_stop(struct dlm_ls *ls) |
13996 | +{ | |
13997 | + set_bit(LSFL_RECOVERD_EXIT, &ls->ls_flags); | |
13998 | + | |
13999 | + for (;;) { | |
14000 | + down(&ls->ls_recoverd_lock); | |
14001 | + if (!ls->ls_recoverd_task) { | |
14002 | + up(&ls->ls_recoverd_lock); | |
14003 | + break; | |
14004 | + } | |
14005 | + wake_up_process(ls->ls_recoverd_task); | |
14006 | + up(&ls->ls_recoverd_lock); | |
14007 | + msleep(100); | |
b7b72b66 | 14008 | + } |
c1c6733f | 14009 | +} |
c783755a | 14010 | + |
c1c6733f AM |
14011 | diff -urN linux-orig/cluster/dlm/recoverd.h linux-patched/cluster/dlm/recoverd.h |
14012 | --- linux-orig/cluster/dlm/recoverd.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14013 | +++ linux-patched/cluster/dlm/recoverd.h 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 14014 | @@ -0,0 +1,21 @@ |
c1c6733f AM |
14015 | +/****************************************************************************** |
14016 | +******************************************************************************* | |
14017 | +** | |
14018 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14019 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14020 | +** | |
14021 | +** This copyrighted material is made available to anyone wishing to use, | |
14022 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14023 | +** of the GNU General Public License v.2. | |
14024 | +** | |
14025 | +******************************************************************************* | |
14026 | +******************************************************************************/ | |
14027 | + | |
14028 | +#ifndef __RECOVERD_DOT_H__ | |
14029 | +#define __RECOVERD_DOT_H__ | |
14030 | + | |
b7b72b66 AM |
14031 | +int dlm_recoverd(void *arg); |
14032 | +void dlm_recoverd_kick(struct dlm_ls *ls); | |
c783755a | 14033 | +void dlm_recoverd_stop(struct dlm_ls *ls); |
c1c6733f AM |
14034 | + |
14035 | +#endif /* __RECOVERD_DOT_H__ */ | |
14036 | diff -urN linux-orig/cluster/dlm/rsb.c linux-patched/cluster/dlm/rsb.c | |
14037 | --- linux-orig/cluster/dlm/rsb.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14038 | +++ linux-patched/cluster/dlm/rsb.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 14039 | @@ -0,0 +1,329 @@ |
c1c6733f AM |
14040 | +/****************************************************************************** |
14041 | +******************************************************************************* | |
14042 | +** | |
14043 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14044 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14045 | +** | |
14046 | +** This copyrighted material is made available to anyone wishing to use, | |
14047 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14048 | +** of the GNU General Public License v.2. | |
14049 | +** | |
14050 | +******************************************************************************* | |
14051 | +******************************************************************************/ | |
14052 | + | |
14053 | +#include "dlm_internal.h" | |
14054 | +#include "locking.h" | |
14055 | +#include "memory.h" | |
14056 | +#include "lockqueue.h" | |
14057 | +#include "nodes.h" | |
14058 | +#include "dir.h" | |
14059 | +#include "util.h" | |
b7b72b66 | 14060 | +#include "rsb.h" |
c1c6733f | 14061 | + |
b7b72b66 AM |
14062 | +static struct dlm_rsb *search_hashchain(struct list_head *head, |
14063 | + struct dlm_rsb *parent, | |
14064 | + char *name, int namelen) | |
c1c6733f | 14065 | +{ |
b7b72b66 | 14066 | + struct dlm_rsb *r; |
c1c6733f AM |
14067 | + |
14068 | + list_for_each_entry(r, head, res_hashchain) { | |
14069 | + if ((parent == r->res_parent) && (namelen == r->res_length) && | |
14070 | + (memcmp(name, r->res_name, namelen) == 0)) { | |
c1c6733f AM |
14071 | + return r; |
14072 | + } | |
14073 | + } | |
14074 | + | |
14075 | + return NULL; | |
14076 | +} | |
14077 | + | |
14078 | +/* | |
14079 | + * A way to arbitrarily hold onto an rsb which we already have a reference to | |
14080 | + * to make sure it doesn't go away. Opposite of release_rsb(). | |
14081 | + */ | |
14082 | + | |
b7b72b66 | 14083 | +void hold_rsb(struct dlm_rsb *r) |
c1c6733f AM |
14084 | +{ |
14085 | + atomic_inc(&r->res_ref); | |
14086 | +} | |
14087 | + | |
14088 | +/* | |
14089 | + * release_rsb() - Decrement reference count on rsb struct. Free the rsb | |
14090 | + * struct when there are zero references. Every lkb for the rsb adds a | |
14091 | + * reference. When ref is zero there can be no more lkb's for the rsb, on the | |
14092 | + * queue's or anywhere else. | |
14093 | + */ | |
14094 | + | |
b7b72b66 | 14095 | +static void _release_rsb(struct dlm_rsb *r, int locked) |
c1c6733f | 14096 | +{ |
b7b72b66 AM |
14097 | + struct dlm_ls *ls = r->res_ls; |
14098 | + uint32_t nodeid; | |
c1c6733f AM |
14099 | + int removed = FALSE; |
14100 | + | |
b7b72b66 AM |
14101 | + write_lock(&ls->ls_rsbtbl[r->res_bucket].lock); |
14102 | + if (atomic_dec_and_test(&r->res_ref)) { | |
14103 | + DLM_ASSERT(list_empty(&r->res_grantqueue), print_rsb(r);); | |
14104 | + DLM_ASSERT(list_empty(&r->res_waitqueue), print_rsb(r);); | |
14105 | + DLM_ASSERT(list_empty(&r->res_convertqueue), print_rsb(r);); | |
c1c6733f AM |
14106 | + removed = TRUE; |
14107 | + list_del(&r->res_hashchain); | |
14108 | + } | |
b7b72b66 | 14109 | + write_unlock(&ls->ls_rsbtbl[r->res_bucket].lock); |
c1c6733f | 14110 | + |
b7b72b66 AM |
14111 | + if (!removed) |
14112 | + return; | |
c1c6733f | 14113 | + |
b7b72b66 AM |
14114 | + if (!locked) |
14115 | + down_write(&ls->ls_root_lock); | |
14116 | + if (r->res_parent) | |
14117 | + list_del(&r->res_subreslist); | |
14118 | + else | |
14119 | + list_del(&r->res_rootlist); | |
14120 | + if (!locked) | |
14121 | + up_write(&ls->ls_root_lock); | |
14122 | + | |
14123 | + if (r->res_parent || !test_bit(RESFL_MASTER, &r->res_flags)) | |
14124 | + goto out; | |
c1c6733f | 14125 | + |
b7b72b66 | 14126 | + nodeid = get_directory_nodeid(r); |
c1c6733f | 14127 | + |
b7b72b66 AM |
14128 | + if (nodeid != our_nodeid()) |
14129 | + remote_remove_direntry(ls, nodeid, r->res_name, r->res_length); | |
14130 | + else | |
14131 | + dlm_dir_remove(ls, nodeid, r->res_name, r->res_length); | |
14132 | + out: | |
14133 | + if (r->res_lvbptr) | |
14134 | + free_lvb(r->res_lvbptr); | |
14135 | + | |
14136 | + free_rsb(r); | |
14137 | +} | |
14138 | + | |
14139 | +void release_rsb(struct dlm_rsb *r) | |
14140 | +{ | |
14141 | + _release_rsb(r, 0); | |
14142 | +} | |
14143 | + | |
14144 | +void release_rsb_locked(struct dlm_rsb *r) | |
14145 | +{ | |
14146 | + _release_rsb(r, 1); | |
14147 | +} | |
14148 | + | |
14149 | +struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb) | |
14150 | +{ | |
14151 | + struct dlm_rsb *r = lkb->lkb_resource; | |
14152 | + return r; | |
c1c6733f AM |
14153 | +} |
14154 | + | |
14155 | +/* | |
14156 | + * find_or_create_rsb() - Get an rsb struct, or create one if it doesn't exist. | |
14157 | + * If the rsb exists, its ref count is incremented by this function. If it | |
14158 | + * doesn't exist, it's created with a ref count of one. | |
14159 | + */ | |
14160 | + | |
b7b72b66 AM |
14161 | +int find_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name, int len, |
14162 | + int flags, struct dlm_rsb **rp) | |
c1c6733f | 14163 | +{ |
b7b72b66 AM |
14164 | + uint32_t bucket; |
14165 | + struct dlm_rsb *r, *tmp; | |
c1c6733f AM |
14166 | + int error = -ENOMEM; |
14167 | + | |
b7b72b66 | 14168 | + DLM_ASSERT(len <= DLM_RESNAME_MAXLEN,); |
c1c6733f | 14169 | + |
b7b72b66 AM |
14170 | + bucket = dlm_hash(name, len); |
14171 | + bucket &= (ls->ls_rsbtbl_size - 1); | |
c1c6733f | 14172 | + |
b7b72b66 AM |
14173 | + read_lock(&ls->ls_rsbtbl[bucket].lock); |
14174 | + r = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, len); | |
14175 | + if (r) { | |
14176 | + if (r->res_nodeid != 0 && (flags & MASTER)) | |
14177 | + r = NULL; | |
14178 | + else | |
14179 | + atomic_inc(&r->res_ref); | |
14180 | + } | |
14181 | + read_unlock(&ls->ls_rsbtbl[bucket].lock); | |
c1c6733f AM |
14182 | + |
14183 | + if (r) | |
14184 | + goto out_set; | |
b7b72b66 AM |
14185 | + |
14186 | + /* Always create sublocks */ | |
14187 | + if (!(flags & CREATE) && !parent) { | |
c1c6733f AM |
14188 | + *rp = NULL; |
14189 | + goto out; | |
14190 | + } | |
14191 | + | |
b7b72b66 | 14192 | + r = allocate_rsb(ls, len); |
c1c6733f AM |
14193 | + if (!r) |
14194 | + goto fail; | |
14195 | + | |
14196 | + INIT_LIST_HEAD(&r->res_subreslist); | |
14197 | + INIT_LIST_HEAD(&r->res_grantqueue); | |
14198 | + INIT_LIST_HEAD(&r->res_convertqueue); | |
14199 | + INIT_LIST_HEAD(&r->res_waitqueue); | |
14200 | + | |
b7b72b66 AM |
14201 | + memcpy(r->res_name, name, len); |
14202 | + r->res_length = len; | |
c1c6733f AM |
14203 | + r->res_ls = ls; |
14204 | + init_rwsem(&r->res_lock); | |
14205 | + atomic_set(&r->res_ref, 1); | |
b7b72b66 | 14206 | + r->res_bucket = bucket; |
c1c6733f AM |
14207 | + |
14208 | + if (parent) { | |
14209 | + r->res_parent = parent; | |
14210 | + r->res_depth = parent->res_depth + 1; | |
14211 | + r->res_root = parent->res_root; | |
14212 | + r->res_nodeid = parent->res_nodeid; | |
14213 | + } else { | |
14214 | + r->res_parent = NULL; | |
14215 | + r->res_depth = 1; | |
14216 | + r->res_root = r; | |
14217 | + r->res_nodeid = -1; | |
14218 | + } | |
14219 | + | |
b7b72b66 AM |
14220 | + write_lock(&ls->ls_rsbtbl[bucket].lock); |
14221 | + tmp = search_hashchain(&ls->ls_rsbtbl[bucket].list, parent, name, len); | |
c1c6733f | 14222 | + if (tmp) { |
b7b72b66 AM |
14223 | + atomic_inc(&tmp->res_ref); |
14224 | + write_unlock(&ls->ls_rsbtbl[bucket].lock); | |
c1c6733f AM |
14225 | + free_rsb(r); |
14226 | + r = tmp; | |
14227 | + } else { | |
b7b72b66 AM |
14228 | + list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); |
14229 | + write_unlock(&ls->ls_rsbtbl[bucket].lock); | |
c1c6733f | 14230 | + |
b7b72b66 | 14231 | + down_write(&ls->ls_root_lock); |
c1c6733f AM |
14232 | + if (parent) |
14233 | + list_add_tail(&r->res_subreslist, | |
14234 | + &r->res_root->res_subreslist); | |
14235 | + else | |
14236 | + list_add(&r->res_rootlist, &ls->ls_rootres); | |
b7b72b66 | 14237 | + up_write(&ls->ls_root_lock); |
c1c6733f AM |
14238 | + } |
14239 | + | |
14240 | + out_set: | |
14241 | + *rp = r; | |
14242 | + | |
14243 | + out: | |
14244 | + error = 0; | |
14245 | + | |
14246 | + fail: | |
14247 | + return error; | |
14248 | +} | |
14249 | + | |
14250 | +/* | |
14251 | + * Add a LKB to a resource's grant/convert/wait queue. in order | |
14252 | + */ | |
14253 | + | |
14254 | +void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode) | |
14255 | +{ | |
b7b72b66 | 14256 | + struct dlm_lkb *lkb = NULL; |
c1c6733f AM |
14257 | + |
14258 | + list_for_each_entry(lkb, head, lkb_statequeue) { | |
14259 | + if (lkb->lkb_rqmode < mode) | |
14260 | + break; | |
14261 | + } | |
14262 | + | |
14263 | + if (!lkb) { | |
14264 | + /* No entries in the queue, we are alone */ | |
14265 | + list_add_tail(new, head); | |
14266 | + } else { | |
14267 | + __list_add(new, lkb->lkb_statequeue.prev, &lkb->lkb_statequeue); | |
14268 | + } | |
14269 | +} | |
14270 | + | |
14271 | +/* | |
14272 | + * The rsb res_lock must be held in write when this function is called. | |
14273 | + */ | |
14274 | + | |
b7b72b66 | 14275 | +void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
c1c6733f | 14276 | +{ |
b7b72b66 AM |
14277 | + DLM_ASSERT(!lkb->lkb_status, |
14278 | + print_lkb(lkb); | |
14279 | + print_rsb(r);); | |
c1c6733f AM |
14280 | + |
14281 | + lkb->lkb_status = type; | |
14282 | + | |
14283 | + switch (type) { | |
14284 | + case GDLM_LKSTS_WAITING: | |
b7b72b66 AM |
14285 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_HEADQUE) |
14286 | + list_add(&lkb->lkb_statequeue, &r->res_waitqueue); | |
14287 | + else | |
14288 | + list_add_tail(&lkb->lkb_statequeue, &r->res_waitqueue); | |
c1c6733f AM |
14289 | + break; |
14290 | + | |
14291 | + case GDLM_LKSTS_GRANTED: | |
14292 | + lkb_add_ordered(&lkb->lkb_statequeue, &r->res_grantqueue, | |
14293 | + lkb->lkb_grmode); | |
14294 | + break; | |
14295 | + | |
14296 | + case GDLM_LKSTS_CONVERT: | |
b7b72b66 AM |
14297 | + if (lkb->lkb_lockqueue_flags & DLM_LKF_HEADQUE) |
14298 | + list_add(&lkb->lkb_statequeue, &r->res_convertqueue); | |
c1c6733f | 14299 | + else |
b7b72b66 AM |
14300 | + list_add_tail(&lkb->lkb_statequeue, |
14301 | + &r->res_convertqueue); | |
c1c6733f AM |
14302 | + break; |
14303 | + | |
14304 | + default: | |
b7b72b66 | 14305 | + DLM_ASSERT(0,); |
c1c6733f AM |
14306 | + } |
14307 | +} | |
14308 | + | |
b7b72b66 | 14309 | +void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
c1c6733f AM |
14310 | +{ |
14311 | + down_write(&r->res_lock); | |
14312 | + lkb_enqueue(r, lkb, type); | |
14313 | + up_write(&r->res_lock); | |
14314 | +} | |
14315 | + | |
14316 | +/* | |
14317 | + * The rsb res_lock must be held in write when this function is called. | |
14318 | + */ | |
14319 | + | |
b7b72b66 | 14320 | +int lkb_dequeue(struct dlm_lkb *lkb) |
c1c6733f AM |
14321 | +{ |
14322 | + int status = lkb->lkb_status; | |
14323 | + | |
14324 | + if (!status) | |
14325 | + goto out; | |
14326 | + | |
14327 | + lkb->lkb_status = 0; | |
14328 | + list_del(&lkb->lkb_statequeue); | |
14329 | + | |
14330 | + out: | |
14331 | + return status; | |
14332 | +} | |
14333 | + | |
b7b72b66 | 14334 | +int res_lkb_dequeue(struct dlm_lkb *lkb) |
c1c6733f AM |
14335 | +{ |
14336 | + int status; | |
14337 | + | |
14338 | + down_write(&lkb->lkb_resource->res_lock); | |
14339 | + status = lkb_dequeue(lkb); | |
14340 | + up_write(&lkb->lkb_resource->res_lock); | |
14341 | + | |
14342 | + return status; | |
14343 | +} | |
14344 | + | |
14345 | +/* | |
14346 | + * The rsb res_lock must be held in write when this function is called. | |
14347 | + */ | |
14348 | + | |
b7b72b66 | 14349 | +int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
c1c6733f AM |
14350 | +{ |
14351 | + int status; | |
14352 | + | |
14353 | + status = lkb_dequeue(lkb); | |
14354 | + lkb_enqueue(r, lkb, type); | |
14355 | + | |
14356 | + return status; | |
14357 | +} | |
14358 | + | |
b7b72b66 | 14359 | +int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type) |
c1c6733f AM |
14360 | +{ |
14361 | + int status; | |
14362 | + | |
14363 | + down_write(&r->res_lock); | |
14364 | + status = lkb_swqueue(r, lkb, type); | |
14365 | + up_write(&r->res_lock); | |
14366 | + | |
14367 | + return status; | |
14368 | +} | |
14369 | diff -urN linux-orig/cluster/dlm/rsb.h linux-patched/cluster/dlm/rsb.h | |
14370 | --- linux-orig/cluster/dlm/rsb.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14371 | +++ linux-patched/cluster/dlm/rsb.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 14372 | @@ -0,0 +1,34 @@ |
c1c6733f AM |
14373 | +/****************************************************************************** |
14374 | +******************************************************************************* | |
14375 | +** | |
14376 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14377 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14378 | +** | |
14379 | +** This copyrighted material is made available to anyone wishing to use, | |
14380 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14381 | +** of the GNU General Public License v.2. | |
14382 | +** | |
14383 | +******************************************************************************* | |
14384 | +******************************************************************************/ | |
14385 | + | |
14386 | +#ifndef __RSB_DOT_H__ | |
14387 | +#define __RSB_DOT_H__ | |
14388 | + | |
b7b72b66 AM |
14389 | +#define CREATE 1 |
14390 | +#define MASTER 2 | |
14391 | + | |
c1c6733f | 14392 | +void lkb_add_ordered(struct list_head *new, struct list_head *head, int mode); |
b7b72b66 AM |
14393 | +void release_rsb(struct dlm_rsb *r); |
14394 | +void release_rsb_locked(struct dlm_rsb *r); | |
14395 | +void hold_rsb(struct dlm_rsb *r); | |
14396 | +int find_rsb(struct dlm_ls *ls, struct dlm_rsb *parent, char *name, | |
14397 | + int namelen, int flags, struct dlm_rsb **rp); | |
14398 | +struct dlm_rsb *find_rsb_to_unlock(struct dlm_ls *ls, struct dlm_lkb *lkb); | |
14399 | +void lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
14400 | +void res_lkb_enqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
14401 | +int lkb_dequeue(struct dlm_lkb *lkb); | |
14402 | +int res_lkb_dequeue(struct dlm_lkb *lkb); | |
14403 | +int lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
14404 | +int res_lkb_swqueue(struct dlm_rsb *r, struct dlm_lkb *lkb, int type); | |
c1c6733f AM |
14405 | + |
14406 | +#endif /* __RSB_DOT_H__ */ | |
14407 | diff -urN linux-orig/cluster/dlm/util.c linux-patched/cluster/dlm/util.c | |
14408 | --- linux-orig/cluster/dlm/util.c 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14409 | +++ linux-patched/cluster/dlm/util.c 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 14410 | @@ -0,0 +1,183 @@ |
c1c6733f AM |
14411 | +/****************************************************************************** |
14412 | +******************************************************************************* | |
14413 | +** | |
14414 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14415 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14416 | +** | |
14417 | +** This copyrighted material is made available to anyone wishing to use, | |
14418 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14419 | +** of the GNU General Public License v.2. | |
14420 | +** | |
14421 | +******************************************************************************* | |
14422 | +******************************************************************************/ | |
14423 | + | |
14424 | +#include "dlm_internal.h" | |
14425 | + | |
14426 | +static const uint32_t crc_32_tab[] = { | |
14427 | + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, | |
14428 | + 0xe963a535, 0x9e6495a3, | |
14429 | + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, | |
14430 | + 0xe7b82d07, 0x90bf1d91, | |
14431 | + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, | |
14432 | + 0xf4d4b551, 0x83d385c7, | |
14433 | + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, | |
14434 | + 0xfa0f3d63, 0x8d080df5, | |
14435 | + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, | |
14436 | + 0xd20d85fd, 0xa50ab56b, | |
14437 | + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, | |
14438 | + 0xdcd60dcf, 0xabd13d59, | |
14439 | + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, | |
14440 | + 0xcfba9599, 0xb8bda50f, | |
14441 | + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, | |
14442 | + 0xc1611dab, 0xb6662d3d, | |
14443 | + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, | |
14444 | + 0x9fbfe4a5, 0xe8b8d433, | |
14445 | + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, | |
14446 | + 0x91646c97, 0xe6635c01, | |
14447 | + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, | |
14448 | + 0x8208f4c1, 0xf50fc457, | |
14449 | + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, | |
14450 | + 0x8cd37cf3, 0xfbd44c65, | |
14451 | + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, | |
14452 | + 0xa4d1c46d, 0xd3d6f4fb, | |
14453 | + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, | |
14454 | + 0xaa0a4c5f, 0xdd0d7cc9, | |
14455 | + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, | |
14456 | + 0xb966d409, 0xce61e49f, | |
14457 | + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, | |
14458 | + 0xb7bd5c3b, 0xc0ba6cad, | |
14459 | + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, | |
14460 | + 0x04db2615, 0x73dc1683, | |
14461 | + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, | |
14462 | + 0x0a00ae27, 0x7d079eb1, | |
14463 | + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, | |
14464 | + 0x196c3671, 0x6e6b06e7, | |
14465 | + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, | |
14466 | + 0x17b7be43, 0x60b08ed5, | |
14467 | + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, | |
14468 | + 0x3fb506dd, 0x48b2364b, | |
14469 | + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, | |
14470 | + 0x316e8eef, 0x4669be79, | |
14471 | + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, | |
14472 | + 0x220216b9, 0x5505262f, | |
14473 | + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, | |
14474 | + 0x2cd99e8b, 0x5bdeae1d, | |
14475 | + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, | |
14476 | + 0x72076785, 0x05005713, | |
14477 | + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, | |
14478 | + 0x7cdcefb7, 0x0bdbdf21, | |
14479 | + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, | |
14480 | + 0x6fb077e1, 0x18b74777, | |
14481 | + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, | |
14482 | + 0x616bffd3, 0x166ccf45, | |
14483 | + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, | |
14484 | + 0x4969474d, 0x3e6e77db, | |
14485 | + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, | |
14486 | + 0x47b2cf7f, 0x30b5ffe9, | |
14487 | + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, | |
14488 | + 0x54de5729, 0x23d967bf, | |
14489 | + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, | |
14490 | + 0x5a05df1b, 0x2d02ef8d | |
14491 | +}; | |
14492 | + | |
14493 | +/** | |
b7b72b66 | 14494 | + * dlm_hash - hash an array of data |
c1c6733f AM |
14495 | + * @data: the data to be hashed |
14496 | + * @len: the length of data to be hashed | |
14497 | + * | |
14498 | + * Copied from GFS. | |
14499 | + * | |
14500 | + * Take some data and convert it to a 32-bit hash. | |
14501 | + * | |
14502 | + * The hash function is a 32-bit CRC of the data. The algorithm uses | |
14503 | + * the crc_32_tab table above. | |
14504 | + * | |
14505 | + * This may not be the fastest hash function, but it does a fair bit better | |
14506 | + * at providing uniform results than the others I've looked at. That's | |
14507 | + * really important for efficient directories. | |
14508 | + * | |
14509 | + * Returns: the hash | |
14510 | + */ | |
14511 | + | |
b7b72b66 | 14512 | +uint32_t dlm_hash(const char *data, int len) |
c1c6733f AM |
14513 | +{ |
14514 | + uint32_t hash = 0xFFFFFFFF; | |
14515 | + | |
14516 | + for (; len--; data++) | |
14517 | + hash = crc_32_tab[(hash ^ *data) & 0xFF] ^ (hash >> 8); | |
14518 | + | |
14519 | + hash = ~hash; | |
14520 | + | |
14521 | + return hash; | |
14522 | +} | |
14523 | + | |
b7b72b66 AM |
14524 | +void print_lkb(struct dlm_lkb *lkb) |
14525 | +{ | |
14526 | + printk("dlm: lkb\n" | |
14527 | + "id %x\n" | |
14528 | + "remid %x\n" | |
14529 | + "flags %x\n" | |
14530 | + "status %x\n" | |
14531 | + "rqmode %d\n" | |
14532 | + "grmode %d\n" | |
14533 | + "nodeid %d\n" | |
14534 | + "lqstate %x\n" | |
14535 | + "lqflags %x\n", | |
14536 | + lkb->lkb_id, | |
14537 | + lkb->lkb_remid, | |
14538 | + lkb->lkb_flags, | |
14539 | + lkb->lkb_status, | |
14540 | + lkb->lkb_rqmode, | |
14541 | + lkb->lkb_grmode, | |
14542 | + lkb->lkb_nodeid, | |
14543 | + lkb->lkb_lockqueue_state, | |
14544 | + lkb->lkb_lockqueue_flags); | |
14545 | +} | |
14546 | + | |
14547 | +void print_rsb(struct dlm_rsb *r) | |
14548 | +{ | |
14549 | + printk("dlm: rsb\n" | |
14550 | + "name \"%s\"\n" | |
14551 | + "nodeid %d\n" | |
14552 | + "flags %lx\n" | |
14553 | + "ref %u\n", | |
14554 | + r->res_name, | |
14555 | + r->res_nodeid, | |
14556 | + r->res_flags, | |
14557 | + atomic_read(&r->res_ref)); | |
14558 | +} | |
14559 | + | |
14560 | +void print_request(struct dlm_request *req) | |
14561 | +{ | |
14562 | + printk("dlm: request\n" | |
14563 | + "rh_cmd %u\n" | |
14564 | + "rh_lkid %x\n" | |
14565 | + "remlkid %x\n" | |
14566 | + "flags %x\n" | |
14567 | + "status %u\n" | |
14568 | + "rqmode %u\n", | |
14569 | + req->rr_header.rh_cmd, | |
14570 | + req->rr_header.rh_lkid, | |
14571 | + req->rr_remlkid, | |
14572 | + req->rr_flags, | |
14573 | + req->rr_status, | |
14574 | + req->rr_rqmode); | |
14575 | +} | |
14576 | + | |
14577 | +void print_reply(struct dlm_reply *rp) | |
14578 | +{ | |
14579 | + printk("dlm: reply\n" | |
14580 | + "rh_cmd %u\n" | |
14581 | + "rh_lkid %x\n" | |
14582 | + "lockstate %u\n" | |
14583 | + "nodeid %u\n" | |
14584 | + "status %u\n" | |
14585 | + "lkid %x\n", | |
14586 | + rp->rl_header.rh_cmd, | |
14587 | + rp->rl_header.rh_lkid, | |
14588 | + rp->rl_lockstate, | |
14589 | + rp->rl_nodeid, | |
14590 | + rp->rl_status, | |
14591 | + rp->rl_lkid); | |
c1c6733f AM |
14592 | +} |
14593 | + | |
c1c6733f AM |
14594 | diff -urN linux-orig/cluster/dlm/util.h linux-patched/cluster/dlm/util.h |
14595 | --- linux-orig/cluster/dlm/util.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14596 | +++ linux-patched/cluster/dlm/util.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 14597 | @@ -0,0 +1,24 @@ |
c1c6733f AM |
14598 | +/****************************************************************************** |
14599 | +******************************************************************************* | |
14600 | +** | |
14601 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14602 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14603 | +** | |
14604 | +** This copyrighted material is made available to anyone wishing to use, | |
14605 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14606 | +** of the GNU General Public License v.2. | |
14607 | +** | |
14608 | +******************************************************************************* | |
14609 | +******************************************************************************/ | |
14610 | + | |
14611 | +#ifndef __UTIL_DOT_H__ | |
14612 | +#define __UTIL_DOT_H__ | |
14613 | + | |
b7b72b66 | 14614 | +uint32_t dlm_hash(const char *data, int len); |
c1c6733f | 14615 | + |
b7b72b66 AM |
14616 | +void print_lkb(struct dlm_lkb *lkb); |
14617 | +void print_rsb(struct dlm_rsb *r); | |
14618 | +void print_request(struct dlm_request *req); | |
14619 | +void print_reply(struct dlm_reply *rp); | |
c1c6733f AM |
14620 | + |
14621 | +#endif | |
14622 | diff -urN linux-orig/include/cluster/dlm.h linux-patched/include/cluster/dlm.h | |
14623 | --- linux-orig/include/cluster/dlm.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 14624 | +++ linux-patched/include/cluster/dlm.h 2004-11-03 11:31:56.000000000 +0800 |
c783755a | 14625 | @@ -0,0 +1,416 @@ |
c1c6733f AM |
14626 | +/****************************************************************************** |
14627 | +******************************************************************************* | |
14628 | +** | |
14629 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
14630 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
14631 | +** | |
14632 | +** This copyrighted material is made available to anyone wishing to use, | |
14633 | +** modify, copy, or redistribute it subject to the terms and conditions | |
14634 | +** of the GNU General Public License v.2. | |
14635 | +** | |
14636 | +******************************************************************************* | |
14637 | +******************************************************************************/ | |
14638 | + | |
14639 | +#ifndef __DLM_DOT_H__ | |
14640 | +#define __DLM_DOT_H__ | |
14641 | + | |
14642 | +/* | |
14643 | + * Interface to DLM - routines and structures to use DLM lockspaces. | |
14644 | + */ | |
14645 | + | |
14646 | +/* | |
14647 | + * Lock Modes | |
14648 | + */ | |
14649 | + | |
14650 | +#define DLM_LOCK_IV (-1) /* invalid */ | |
14651 | +#define DLM_LOCK_NL (0) /* null */ | |
14652 | +#define DLM_LOCK_CR (1) /* concurrent read */ | |
14653 | +#define DLM_LOCK_CW (2) /* concurrent write */ | |
14654 | +#define DLM_LOCK_PR (3) /* protected read */ | |
14655 | +#define DLM_LOCK_PW (4) /* protected write */ | |
14656 | +#define DLM_LOCK_EX (5) /* exclusive */ | |
14657 | + | |
14658 | +/* | |
14659 | + * Maximum size in bytes of a dlm_lock name | |
14660 | + */ | |
14661 | + | |
14662 | +#define DLM_RESNAME_MAXLEN (64) | |
14663 | + | |
14664 | +/* | |
14665 | + * Size in bytes of Lock Value Block | |
14666 | + */ | |
14667 | + | |
14668 | +#define DLM_LVB_LEN (32) | |
14669 | + | |
14670 | +/* | |
14671 | + * Flags to dlm_new_lockspace | |
14672 | + * | |
14673 | + * DLM_LSF_NOTIMERS | |
14674 | + * | |
14675 | + * Do not subject locks in this lockspace to time-outs. | |
c1c6733f AM |
14676 | + */ |
14677 | + | |
14678 | +#define DLM_LSF_NOTIMERS (1) | |
14679 | + | |
14680 | +/* | |
14681 | + * Flags to dlm_lock | |
14682 | + * | |
14683 | + * DLM_LKF_NOQUEUE | |
14684 | + * | |
14685 | + * Do not queue the lock request on the wait queue if it cannot be granted | |
14686 | + * immediately. If the lock cannot be granted because of this flag, DLM will | |
14687 | + * either return -EAGAIN from the dlm_lock call or will return 0 from | |
14688 | + * dlm_lock and -EAGAIN in the lock status block when the AST is executed. | |
14689 | + * | |
14690 | + * DLM_LKF_CONVERT | |
14691 | + * | |
14692 | + * Indicates a lock conversion request. For conversions the name and namelen | |
14693 | + * are ignored and the lock ID in the LKSB is used to identify the lock. | |
14694 | + * | |
14695 | + * DLM_LKF_VALBLK | |
14696 | + * | |
14697 | + * Requests DLM to return the current contents of the lock value block in the | |
14698 | + * lock status block. When this flag is set in a lock conversion from PW or EX | |
14699 | + * modes, DLM assigns the value specified in the lock status block to the lock | |
14700 | + * value block of the lock resource. The LVB is a DLM_LVB_LEN size array | |
14701 | + * containing application-specific information. | |
14702 | + * | |
14703 | + * DLM_LKF_QUECVT | |
14704 | + * | |
b7b72b66 AM |
14705 | + * Force a conversion request to be queued, even if it is compatible with |
14706 | + * the granted modes of other locks on the same resource. | |
c1c6733f AM |
14707 | + * |
14708 | + * DLM_LKF_CANCEL | |
14709 | + * | |
14710 | + * Used to cancel a pending conversion (with dlm_unlock). Lock is returned to | |
14711 | + * previously granted mode. | |
14712 | + * | |
14713 | + * DLM_LKF_IVVALBLK | |
14714 | + * | |
14715 | + * Invalidate/clear the lock value block. | |
14716 | + * | |
14717 | + * DLM_LKF_CONVDEADLK | |
14718 | + * | |
14719 | + * The granted mode of a lock being converted (from a non-NL mode) can be | |
14720 | + * changed to NL in the process of acquiring the requested mode to avoid | |
14721 | + * conversion deadlock. | |
14722 | + * | |
14723 | + * DLM_LKF_PERSISTENT | |
14724 | + * | |
14725 | + * Only relevant to locks originating in userspace. Signals to the ioctl.c code | |
14726 | + * that this lock should not be unlocked when the process exits. | |
14727 | + * | |
14728 | + * DLM_LKF_NODLKWT | |
14729 | + * | |
14730 | + * This lock is not to be checked for conversion deadlocks. | |
14731 | + * | |
14732 | + * DLM_LKF_NODLCKBLK | |
14733 | + * | |
14734 | + * not yet implemented | |
14735 | + * | |
14736 | + * DLM_LKF_EXPEDITE | |
14737 | + * | |
b7b72b66 AM |
14738 | + * Used only with new requests for NL mode locks. Tells the lock manager |
14739 | + * to grant the lock, ignoring other locks in convert and wait queues. | |
c1c6733f AM |
14740 | + * |
14741 | + * DLM_LKF_NOQUEUEBAST | |
14742 | + * | |
14743 | + * Send blocking AST's before returning -EAGAIN to the caller. It is only | |
14744 | + * used along with the NOQUEUE flag. Blocking AST's are not sent for failed | |
14745 | + * NOQUEUE requests otherwise. | |
14746 | + * | |
b7b72b66 AM |
14747 | + * DLM_LKF_HEADQUE |
14748 | + * | |
14749 | + * Add a lock to the head of the convert or wait queue rather than the tail. | |
14750 | + * | |
14751 | + * DLM_LKF_NOORDER | |
14752 | + * | |
14753 | + * Disregard the standard grant order rules and grant a lock as soon as it | |
14754 | + * is compatible with other granted locks. | |
c1c6733f AM |
14755 | + */ |
14756 | + | |
14757 | +#define DLM_LKF_NOQUEUE (0x00000001) | |
14758 | +#define DLM_LKF_CANCEL (0x00000002) | |
14759 | +#define DLM_LKF_CONVERT (0x00000004) | |
14760 | +#define DLM_LKF_VALBLK (0x00000008) | |
14761 | +#define DLM_LKF_QUECVT (0x00000010) | |
14762 | +#define DLM_LKF_IVVALBLK (0x00000020) | |
14763 | +#define DLM_LKF_CONVDEADLK (0x00000040) | |
14764 | +#define DLM_LKF_PERSISTENT (0x00000080) | |
14765 | +#define DLM_LKF_NODLCKWT (0x00000100) | |
14766 | +#define DLM_LKF_NODLCKBLK (0x00000200) | |
14767 | +#define DLM_LKF_EXPEDITE (0x00000400) | |
14768 | +#define DLM_LKF_NOQUEUEBAST (0x00000800) | |
b7b72b66 AM |
14769 | +#define DLM_LKF_HEADQUE (0x00001000) |
14770 | +#define DLM_LKF_NOORDER (0x00002000) | |
c783755a | 14771 | +#define DLM_LKF_ORPHAN (0x00004000) |
c1c6733f AM |
14772 | + |
14773 | +/* | |
b7b72b66 | 14774 | + * Some return codes that are not in errno.h |
c1c6733f AM |
14775 | + */ |
14776 | + | |
14777 | +#define DLM_ECANCEL (0x10001) | |
14778 | +#define DLM_EUNLOCK (0x10002) | |
14779 | + | |
14780 | +typedef void dlm_lockspace_t; | |
14781 | + | |
14782 | +/* | |
14783 | + * Lock range structure | |
14784 | + */ | |
14785 | + | |
14786 | +struct dlm_range { | |
14787 | + uint64_t ra_start; | |
14788 | + uint64_t ra_end; | |
14789 | +}; | |
14790 | + | |
14791 | +/* | |
14792 | + * Lock status block | |
14793 | + * | |
14794 | + * Use this structure to specify the contents of the lock value block. For a | |
14795 | + * conversion request, this structure is used to specify the lock ID of the | |
14796 | + * lock. DLM writes the status of the lock request and the lock ID assigned | |
14797 | + * to the request in the lock status block. | |
14798 | + * | |
14799 | + * sb_lkid: the returned lock ID. It is set on new (non-conversion) requests. | |
14800 | + * It is available when dlm_lock returns. | |
14801 | + * | |
14802 | + * sb_lvbptr: saves or returns the contents of the lock's LVB according to rules | |
14803 | + * shown for the DLM_LKF_VALBLK flag. | |
14804 | + * | |
14805 | + * sb_flags: DLM_SBF_DEMOTED is returned if in the process of promoting a lock, | |
14806 | + * it was first demoted to NL to avoid conversion deadlock. | |
14807 | + * | |
14808 | + * sb_status: the returned status of the lock request set prior to AST | |
14809 | + * execution. Possible return values: | |
14810 | + * | |
14811 | + * 0 if lock request was successful | |
14812 | + * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | |
14813 | + * -ENOMEM if there is no memory to process request | |
14814 | + * -EINVAL if there are invalid parameters | |
14815 | + * -DLM_EUNLOCK if unlock request was successful | |
14816 | + * -DLM_ECANCEL ? | |
14817 | + */ | |
14818 | + | |
14819 | +#define DLM_SBF_DEMOTED (0x01) | |
14820 | + | |
14821 | +struct dlm_lksb { | |
14822 | + int sb_status; | |
14823 | + uint32_t sb_lkid; | |
14824 | + char sb_flags; | |
14825 | + char * sb_lvbptr; | |
14826 | +}; | |
14827 | + | |
14828 | +/* | |
b7b72b66 | 14829 | + * These defines are the bits that make up the query code. |
c1c6733f AM |
14830 | + */ |
14831 | + | |
14832 | +/* Bits 0, 1, 2, the lock mode or DLM_LOCK_THIS, see DLM_LOCK_NL etc in | |
14833 | + * dlm.h Ignored for DLM_QUERY_LOCKS_ALL */ | |
14834 | +#define DLM_LOCK_THIS 0x0007 | |
14835 | +#define DLM_QUERY_MODE_MASK 0x0007 | |
14836 | + | |
14837 | +/* Bits 3, 4, 5 bitmap of queue(s) to query */ | |
14838 | +#define DLM_QUERY_QUEUE_WAIT 0x0008 | |
14839 | +#define DLM_QUERY_QUEUE_CONVERT 0x0010 | |
14840 | +#define DLM_QUERY_QUEUE_GRANT 0x0020 | |
14841 | +#define DLM_QUERY_QUEUE_GRANTED 0x0030 /* Shorthand */ | |
14842 | +#define DLM_QUERY_QUEUE_ALL 0x0038 /* Shorthand */ | |
14843 | + | |
14844 | +/* Bit 6, Return only the information that can be established without a network | |
14845 | + * round-trip. The caller must be aware of the implications of this. Useful for | |
14846 | + * just getting the master node id or resource name. */ | |
14847 | +#define DLM_QUERY_LOCAL 0x0040 | |
14848 | + | |
14849 | +/* Bits 8 up, query type */ | |
14850 | +#define DLM_QUERY_LOCKS_HIGHER 0x0100 | |
14851 | +#define DLM_QUERY_LOCKS_LOWER 0x0200 | |
14852 | +#define DLM_QUERY_LOCKS_EQUAL 0x0300 | |
14853 | +#define DLM_QUERY_LOCKS_BLOCKING 0x0400 | |
14854 | +#define DLM_QUERY_LOCKS_NOTBLOCK 0x0500 | |
14855 | +#define DLM_QUERY_LOCKS_ALL 0x0600 | |
c783755a | 14856 | +#define DLM_QUERY_LOCKS_ORPHAN 0x0700 |
c1c6733f AM |
14857 | +#define DLM_QUERY_MASK 0x0F00 |
14858 | + | |
14859 | +/* GRMODE is the default for mode comparisons, | |
14860 | + RQMODE might also be handy */ | |
14861 | +#define DLM_QUERY_GRMODE 0x0000 | |
14862 | +#define DLM_QUERY_RQMODE 0x1000 | |
14863 | + | |
14864 | +/* Structures passed into and out of the query */ | |
14865 | + | |
14866 | +struct dlm_lockinfo { | |
14867 | + int lki_lkid; /* Lock ID on originating node */ | |
14868 | + int lki_mstlkid; /* Lock ID on master node */ | |
14869 | + int lki_parent; | |
14870 | + int lki_node; /* Originating node (not master) */ | |
b7b72b66 | 14871 | + int lki_ownpid; /* Owner pid on originating node */ |
c1c6733f AM |
14872 | + uint8_t lki_state; /* Queue the lock is on */ |
14873 | + uint8_t lki_grmode; /* Granted mode */ | |
14874 | + uint8_t lki_rqmode; /* Requested mode */ | |
14875 | + struct dlm_range lki_grrange; /* Granted range, if applicable */ | |
14876 | + struct dlm_range lki_rqrange; /* Requested range, if applicable */ | |
14877 | +}; | |
14878 | + | |
14879 | +struct dlm_resinfo { | |
14880 | + int rsi_length; | |
14881 | + int rsi_grantcount; /* No. of nodes on grant queue */ | |
14882 | + int rsi_convcount; /* No. of nodes on convert queue */ | |
14883 | + int rsi_waitcount; /* No. of nodes on wait queue */ | |
14884 | + int rsi_masternode; /* Master for this resource */ | |
14885 | + char rsi_name[DLM_RESNAME_MAXLEN]; /* Resource name */ | |
14886 | + char rsi_valblk[DLM_LVB_LEN]; /* Master's LVB contents, if applicable | |
14887 | + */ | |
14888 | +}; | |
14889 | + | |
14890 | +struct dlm_queryinfo { | |
14891 | + struct dlm_resinfo *gqi_resinfo; | |
14892 | + struct dlm_lockinfo *gqi_lockinfo; /* This points to an array | |
14893 | + * of structs */ | |
14894 | + int gqi_locksize; /* input */ | |
14895 | + int gqi_lockcount; /* output */ | |
14896 | +}; | |
14897 | + | |
14898 | +#ifdef __KERNEL__ | |
14899 | +/* | |
14900 | + * dlm_init | |
14901 | + * | |
14902 | + * Starts and initializes DLM threads and structures. Creation of the first | |
14903 | + * lockspace will call this if it has not been called already. | |
14904 | + * | |
14905 | + * Returns: 0 if successful, -EXXX on error | |
14906 | + */ | |
14907 | + | |
14908 | +int dlm_init(void); | |
14909 | + | |
14910 | +/* | |
14911 | + * dlm_release | |
14912 | + * | |
14913 | + * Stops DLM threads. | |
14914 | + * | |
14915 | + * Returns: 0 if successful, -EXXX on error | |
14916 | + */ | |
14917 | + | |
14918 | +int dlm_release(void); | |
14919 | + | |
14920 | +/* | |
14921 | + * dlm_new_lockspace | |
14922 | + * | |
14923 | + * Starts a lockspace with the given name. If the named lockspace exists in | |
14924 | + * the cluster, the calling node joins it. | |
14925 | + */ | |
14926 | + | |
14927 | +int dlm_new_lockspace(char *name, int namelen, dlm_lockspace_t **lockspace, | |
14928 | + int flags); | |
14929 | + | |
14930 | +/* | |
14931 | + * dlm_release_lockspace | |
14932 | + * | |
14933 | + * Stop a lockspace. | |
14934 | + */ | |
14935 | + | |
14936 | +int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); | |
14937 | + | |
14938 | +/* | |
14939 | + * dlm_lock | |
14940 | + * | |
14941 | + * Make an asyncronous request to acquire or convert a lock on a named | |
14942 | + * resource. | |
14943 | + * | |
14944 | + * lockspace: context for the request | |
14945 | + * mode: the requested mode of the lock (DLM_LOCK_) | |
14946 | + * lksb: lock status block for input and async return values | |
14947 | + * flags: input flags (DLM_LKF_) | |
14948 | + * name: name of the resource to lock, can be binary | |
14949 | + * namelen: the length in bytes of the resource name (MAX_RESNAME_LEN) | |
14950 | + * parent: the lock ID of a parent lock or 0 if none | |
14951 | + * lockast: function DLM executes when it completes processing the request | |
14952 | + * astarg: argument passed to lockast and bast functions | |
14953 | + * bast: function DLM executes when this lock later blocks another request | |
14954 | + * | |
14955 | + * Returns: | |
14956 | + * 0 if request is successfully queued for processing | |
14957 | + * -EINVAL if any input parameters are invalid | |
14958 | + * -EAGAIN if request would block and is flagged DLM_LKF_NOQUEUE | |
14959 | + * -ENOMEM if there is no memory to process request | |
14960 | + * -ENOTCONN if there is a communication error | |
14961 | + * | |
14962 | + * If the call to dlm_lock returns an error then the operation has failed and | |
14963 | + * the AST routine will not be called. If dlm_lock returns 0 it is still | |
14964 | + * possible that the lock operation will fail. The AST routine will be called | |
14965 | + * when the locking is complete and the status is returned in the lksb. | |
14966 | + * | |
14967 | + * If the AST routines or parameter are passed to a conversion operation then | |
14968 | + * they will overwrite those values that were passed to a previous dlm_lock | |
14969 | + * call. | |
14970 | + * | |
14971 | + * AST routines should not block (at least not for long), but may make | |
14972 | + * any locking calls they please. | |
14973 | + */ | |
14974 | + | |
14975 | +int dlm_lock(dlm_lockspace_t *lockspace, | |
14976 | + uint32_t mode, | |
14977 | + struct dlm_lksb *lksb, | |
14978 | + uint32_t flags, | |
14979 | + void *name, | |
14980 | + unsigned int namelen, | |
14981 | + uint32_t parent, | |
14982 | + void (*lockast) (void *astarg), | |
14983 | + void *astarg, | |
14984 | + void (*bast) (void *astarg, int mode), | |
14985 | + struct dlm_range *range); | |
14986 | + | |
14987 | +/* | |
14988 | + * dlm_unlock | |
14989 | + * | |
14990 | + * Asynchronously release a lock on a resource. The AST routine is called | |
14991 | + * when the resource is successfully unlocked. | |
14992 | + * | |
14993 | + * lockspace: context for the request | |
14994 | + * lkid: the lock ID as returned in the lksb | |
14995 | + * flags: input flags (DLM_LKF_) | |
14996 | + * lksb: if NULL the lksb parameter passed to last lock request is used | |
c783755a | 14997 | + * astarg: the arg used with the completion ast for the unlock |
c1c6733f AM |
14998 | + * |
14999 | + * Returns: | |
15000 | + * 0 if request is successfully queued for processing | |
15001 | + * -EINVAL if any input parameters are invalid | |
15002 | + * -ENOTEMPTY if the lock still has sublocks | |
15003 | + * -EBUSY if the lock is waiting for a remote lock operation | |
15004 | + * -ENOTCONN if there is a communication error | |
15005 | + */ | |
15006 | + | |
15007 | +extern int dlm_unlock(dlm_lockspace_t *lockspace, | |
15008 | + uint32_t lkid, | |
15009 | + uint32_t flags, | |
15010 | + struct dlm_lksb *lksb, | |
15011 | + void *astarg); | |
15012 | + | |
15013 | +/* Query interface | |
15014 | + * | |
15015 | + * Query the other holders of a resource, given a known lock ID | |
15016 | + * | |
15017 | + * lockspace: context for the request | |
15018 | + * lksb: LKSB, sb_lkid contains the lock ID of a valid lock | |
15019 | + * on the resource. sb_status will contain the status | |
15020 | + * of the request on completion. | |
15021 | + * query: query bitmap see DLM_QUERY_* above | |
15022 | + * qinfo: pointer to dlm_queryinfo structure | |
15023 | + * ast_routine: AST routine to call on completion | |
15024 | + * artarg: argument to AST routine. It is "traditional" | |
15025 | + * to put the qinfo pointer into lksb->sb_lvbptr | |
15026 | + * and pass the lksb in here. | |
15027 | + */ | |
15028 | +extern int dlm_query(dlm_lockspace_t *lockspace, | |
15029 | + struct dlm_lksb *lksb, | |
15030 | + int query, | |
15031 | + struct dlm_queryinfo *qinfo, | |
15032 | + void (ast_routine(void *)), | |
15033 | + void *astarg); | |
15034 | + | |
b7b72b66 AM |
15035 | + |
15036 | +void dlm_debug_dump(void); | |
15037 | +void dlm_locks_dump(void); | |
15038 | + | |
c1c6733f AM |
15039 | +#endif /* __KERNEL__ */ |
15040 | + | |
15041 | +#endif /* __DLM_DOT_H__ */ | |
15042 | diff -urN linux-orig/include/cluster/dlm_device.h linux-patched/include/cluster/dlm_device.h | |
15043 | --- linux-orig/include/cluster/dlm_device.h 1970-01-01 07:30:00.000000000 +0730 | |
bb1d8b11 | 15044 | +++ linux-patched/include/cluster/dlm_device.h 2004-11-03 11:31:56.000000000 +0800 |
b7b72b66 | 15045 | @@ -0,0 +1,64 @@ |
c1c6733f AM |
15046 | +/****************************************************************************** |
15047 | +******************************************************************************* | |
15048 | +** | |
15049 | +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
15050 | +** Copyright (C) 2004 Red Hat, Inc. All rights reserved. | |
15051 | +** | |
15052 | +** This copyrighted material is made available to anyone wishing to use, | |
15053 | +** modify, copy, or redistribute it subject to the terms and conditions | |
15054 | +** of the GNU General Public License v.2. | |
15055 | +** | |
15056 | +******************************************************************************* | |
15057 | +******************************************************************************/ | |
15058 | + | |
15059 | +/* This is the device interface for dlm, most users will use a library | |
15060 | + * interface. | |
15061 | + */ | |
15062 | + | |
15063 | +/* Version of the device interface */ | |
15064 | +#define DLM_DEVICE_VERSION_MAJOR 2 | |
15065 | +#define DLM_DEVICE_VERSION_MINOR 0 | |
15066 | +#define DLM_DEVICE_VERSION_PATCH 0 | |
15067 | + | |
15068 | +/* struct passed to the lock write */ | |
15069 | +struct dlm_lock_params { | |
15070 | + uint32_t version[3]; | |
15071 | + uint8_t cmd; | |
15072 | + uint8_t mode; | |
15073 | + uint16_t flags; | |
15074 | + uint32_t lkid; | |
15075 | + uint32_t parent; | |
15076 | + struct dlm_range range; | |
15077 | + uint8_t namelen; | |
b7b72b66 AM |
15078 | + void *castparam; |
15079 | + void *castaddr; | |
15080 | + void *bastparam; | |
c1c6733f AM |
15081 | + void *bastaddr; |
15082 | + struct dlm_lksb *lksb; | |
15083 | + char name[1]; | |
15084 | +}; | |
15085 | + | |
15086 | + | |
15087 | +/* struct read from the "device" fd, | |
15088 | + consists mainly of userspace pointers for the library to use */ | |
15089 | +struct dlm_lock_result { | |
15090 | + uint8_t cmd; | |
15091 | + void *astparam; | |
15092 | + void (*astaddr)(void *astparam); | |
15093 | + struct dlm_lksb *user_lksb; | |
15094 | + struct dlm_lksb lksb; /* But this has real data in it */ | |
15095 | + uint8_t bast_mode; /* Not yet used */ | |
15096 | +}; | |
15097 | + | |
15098 | +/* commands passed to the device */ | |
15099 | +#define DLM_USER_LOCK 1 | |
15100 | +#define DLM_USER_UNLOCK 2 | |
15101 | +#define DLM_USER_QUERY 3 | |
15102 | + | |
15103 | +/* Arbitrary length restriction */ | |
15104 | +#define MAX_LS_NAME_LEN 64 | |
15105 | + | |
15106 | +/* ioctls on the device */ | |
15107 | +#define DLM_CREATE_LOCKSPACE _IOW('D', 0x01, char *) | |
15108 | +#define DLM_RELEASE_LOCKSPACE _IOW('D', 0x02, char *) | |
15109 | +#define DLM_FORCE_RELEASE_LOCKSPACE _IOW('D', 0x03, char *) |