]>
Commit | Line | Data |
---|---|---|
5206b469 AM |
1 | diff -urN linux-2.4.24.org/arch/mips64/kernel/ioctl32.c linux-2.4.24/arch/mips64/kernel/ioctl32.c |
2 | --- linux-2.4.24.org/arch/mips64/kernel/ioctl32.c 2004-01-18 14:59:17.636181134 +0100 | |
3 | +++ linux-2.4.24/arch/mips64/kernel/ioctl32.c 2004-01-18 15:01:17.736881093 +0100 | |
4 | @@ -62,6 +62,7 @@ | |
5 | ||
6 | #include <linux/mtd/mtd.h> | |
7 | #include <linux/serial.h> | |
8 | +#include <linux/dm-ioctl.h> | |
9 | ||
10 | #ifdef CONFIG_SIBYTE_TBPROF | |
11 | #include <asm/sibyte/trace_prof.h> | |
12 | @@ -2324,6 +2325,22 @@ | |
13 | IOCTL32_DEFAULT(RESTART_ARRAY_RW), | |
14 | #endif /* CONFIG_MD */ | |
15 | ||
16 | +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) | |
17 | + IOCTL32_DEFAULT(DM_VERSION), | |
18 | + IOCTL32_DEFAULT(DM_REMOVE_ALL), | |
19 | + IOCTL32_DEFAULT(DM_DEV_CREATE), | |
20 | + IOCTL32_DEFAULT(DM_DEV_REMOVE), | |
21 | + IOCTL32_DEFAULT(DM_TABLE_LOAD), | |
22 | + IOCTL32_DEFAULT(DM_DEV_SUSPEND), | |
23 | + IOCTL32_DEFAULT(DM_DEV_RENAME), | |
24 | + IOCTL32_DEFAULT(DM_TABLE_DEPS), | |
25 | + IOCTL32_DEFAULT(DM_DEV_STATUS), | |
26 | + IOCTL32_DEFAULT(DM_TABLE_STATUS), | |
27 | + IOCTL32_DEFAULT(DM_DEV_WAIT), | |
28 | + IOCTL32_DEFAULT(DM_LIST_DEVICES), | |
29 | + IOCTL32_DEFAULT(DM_TABLE_CLEAR), | |
30 | +#endif /* CONFIG_BLK_DEV_DM */ | |
31 | + | |
32 | #ifdef CONFIG_SIBYTE_TBPROF | |
33 | IOCTL32_DEFAULT(SBPROF_ZBSTART), | |
34 | IOCTL32_DEFAULT(SBPROF_ZBSTOP), | |
35 | diff -urN linux-2.4.24.org/arch/parisc/kernel/ioctl32.c linux-2.4.24/arch/parisc/kernel/ioctl32.c | |
36 | --- linux-2.4.24.org/arch/parisc/kernel/ioctl32.c 2004-01-18 14:59:20.929484849 +0100 | |
37 | +++ linux-2.4.24/arch/parisc/kernel/ioctl32.c 2004-01-18 15:01:17.742879834 +0100 | |
38 | @@ -55,6 +55,7 @@ | |
39 | #define max max */ | |
40 | #include <linux/lvm.h> | |
41 | #endif /* LVM */ | |
42 | +#include <linux/dm-ioctl.h> | |
43 | ||
44 | #include <scsi/scsi.h> | |
45 | /* Ugly hack. */ | |
46 | @@ -3423,6 +3424,22 @@ | |
47 | COMPATIBLE_IOCTL(LV_BMAP) | |
48 | COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE) | |
49 | #endif /* LVM */ | |
50 | +/* Device-Mapper */ | |
51 | +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) | |
52 | +COMPATIBLE_IOCTL(DM_VERSION) | |
53 | +COMPATIBLE_IOCTL(DM_REMOVE_ALL) | |
54 | +COMPATIBLE_IOCTL(DM_DEV_CREATE) | |
55 | +COMPATIBLE_IOCTL(DM_DEV_REMOVE) | |
56 | +COMPATIBLE_IOCTL(DM_TABLE_LOAD) | |
57 | +COMPATIBLE_IOCTL(DM_DEV_SUSPEND) | |
58 | +COMPATIBLE_IOCTL(DM_DEV_RENAME) | |
59 | +COMPATIBLE_IOCTL(DM_TABLE_DEPS) | |
60 | +COMPATIBLE_IOCTL(DM_DEV_STATUS) | |
61 | +COMPATIBLE_IOCTL(DM_TABLE_STATUS) | |
62 | +COMPATIBLE_IOCTL(DM_DEV_WAIT) | |
63 | +COMPATIBLE_IOCTL(DM_LIST_DEVICES) | |
64 | +COMPATIBLE_IOCTL(DM_TABLE_CLEAR) | |
65 | +#endif /* CONFIG_BLK_DEV_DM */ | |
66 | #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE) | |
67 | COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC) | |
68 | COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID) | |
69 | diff -urN linux-2.4.24.org/arch/ppc64/kernel/ioctl32.c linux-2.4.24/arch/ppc64/kernel/ioctl32.c | |
70 | --- linux-2.4.24.org/arch/ppc64/kernel/ioctl32.c 2004-01-18 14:58:17.568907286 +0100 | |
71 | +++ linux-2.4.24/arch/ppc64/kernel/ioctl32.c 2004-01-18 15:01:17.754877316 +0100 | |
72 | @@ -66,6 +66,7 @@ | |
73 | #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) | |
74 | #include <linux/lvm.h> | |
75 | #endif /* LVM */ | |
76 | +#include <linux/dm-ioctl.h> | |
77 | ||
78 | #include <scsi/scsi.h> | |
79 | /* Ugly hack. */ | |
80 | @@ -4408,6 +4409,22 @@ | |
81 | COMPATIBLE_IOCTL(NBD_PRINT_DEBUG), | |
82 | COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS), | |
83 | COMPATIBLE_IOCTL(NBD_DISCONNECT), | |
84 | +/* device-mapper */ | |
85 | +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) | |
86 | +COMPATIBLE_IOCTL(DM_VERSION), | |
87 | +COMPATIBLE_IOCTL(DM_REMOVE_ALL), | |
88 | +COMPATIBLE_IOCTL(DM_DEV_CREATE), | |
89 | +COMPATIBLE_IOCTL(DM_DEV_REMOVE), | |
90 | +COMPATIBLE_IOCTL(DM_TABLE_LOAD), | |
91 | +COMPATIBLE_IOCTL(DM_DEV_SUSPEND), | |
92 | +COMPATIBLE_IOCTL(DM_DEV_RENAME), | |
93 | +COMPATIBLE_IOCTL(DM_TABLE_DEPS), | |
94 | +COMPATIBLE_IOCTL(DM_DEV_STATUS), | |
95 | +COMPATIBLE_IOCTL(DM_TABLE_STATUS), | |
96 | +COMPATIBLE_IOCTL(DM_DEV_WAIT), | |
97 | +COMPATIBLE_IOCTL(DM_LIST_DEVICES), | |
98 | +COMPATIBLE_IOCTL(DM_TABLE_CLEAR), | |
99 | +#endif /* CONFIG_BLK_DEV_DM */ | |
100 | /* Remove *PRIVATE in 2.5 */ | |
101 | COMPATIBLE_IOCTL(SIOCDEVPRIVATE), | |
102 | COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1), | |
103 | diff -urN linux-2.4.24.org/arch/s390x/kernel/ioctl32.c linux-2.4.24/arch/s390x/kernel/ioctl32.c | |
104 | --- linux-2.4.24.org/arch/s390x/kernel/ioctl32.c 2004-01-18 14:59:24.825661296 +0100 | |
105 | +++ linux-2.4.24/arch/s390x/kernel/ioctl32.c 2004-01-18 15:01:17.759876266 +0100 | |
106 | @@ -30,6 +30,7 @@ | |
107 | #include <linux/blk.h> | |
108 | #include <linux/elevator.h> | |
109 | #include <linux/raw.h> | |
110 | +#include <linux/dm-ioctl.h> | |
111 | #include <asm/types.h> | |
112 | #include <asm/uaccess.h> | |
113 | #include <asm/dasd.h> | |
114 | @@ -627,6 +628,20 @@ | |
115 | ||
116 | IOCTL32_DEFAULT(SIOCGSTAMP), | |
117 | ||
118 | + IOCTL32_DEFAULT(DM_VERSION), | |
119 | + IOCTL32_DEFAULT(DM_REMOVE_ALL), | |
120 | + IOCTL32_DEFAULT(DM_DEV_CREATE), | |
121 | + IOCTL32_DEFAULT(DM_DEV_REMOVE), | |
122 | + IOCTL32_DEFAULT(DM_TABLE_LOAD), | |
123 | + IOCTL32_DEFAULT(DM_DEV_SUSPEND), | |
124 | + IOCTL32_DEFAULT(DM_DEV_RENAME), | |
125 | + IOCTL32_DEFAULT(DM_TABLE_DEPS), | |
126 | + IOCTL32_DEFAULT(DM_DEV_STATUS), | |
127 | + IOCTL32_DEFAULT(DM_TABLE_STATUS), | |
128 | + IOCTL32_DEFAULT(DM_DEV_WAIT), | |
129 | + IOCTL32_DEFAULT(DM_LIST_DEVICES), | |
130 | + IOCTL32_DEFAULT(DM_TABLE_CLEAR), | |
131 | + | |
132 | IOCTL32_DEFAULT(LOOP_SET_FD), | |
133 | IOCTL32_DEFAULT(LOOP_CLR_FD), | |
134 | ||
135 | diff -urN linux-2.4.24.org/arch/sparc64/kernel/ioctl32.c linux-2.4.24/arch/sparc64/kernel/ioctl32.c | |
136 | --- linux-2.4.24.org/arch/sparc64/kernel/ioctl32.c 2004-01-18 14:58:59.210079599 +0100 | |
137 | +++ linux-2.4.24/arch/sparc64/kernel/ioctl32.c 2004-01-18 15:01:17.768874378 +0100 | |
138 | @@ -56,6 +56,7 @@ | |
139 | #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE) | |
140 | #include <linux/lvm.h> | |
141 | #endif /* LVM */ | |
142 | +#include <linux/dm-ioctl.h> | |
143 | ||
144 | #include <scsi/scsi.h> | |
145 | /* Ugly hack. */ | |
146 | @@ -5086,6 +5087,22 @@ | |
147 | COMPATIBLE_IOCTL(NBD_PRINT_DEBUG) | |
148 | COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS) | |
149 | COMPATIBLE_IOCTL(NBD_DISCONNECT) | |
150 | +/* device-mapper */ | |
151 | +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) | |
152 | +COMPATIBLE_IOCTL(DM_VERSION) | |
153 | +COMPATIBLE_IOCTL(DM_REMOVE_ALL) | |
154 | +COMPATIBLE_IOCTL(DM_DEV_CREATE) | |
155 | +COMPATIBLE_IOCTL(DM_DEV_REMOVE) | |
156 | +COMPATIBLE_IOCTL(DM_TABLE_LOAD) | |
157 | +COMPATIBLE_IOCTL(DM_DEV_SUSPEND) | |
158 | +COMPATIBLE_IOCTL(DM_DEV_RENAME) | |
159 | +COMPATIBLE_IOCTL(DM_TABLE_DEPS) | |
160 | +COMPATIBLE_IOCTL(DM_DEV_STATUS) | |
161 | +COMPATIBLE_IOCTL(DM_TABLE_STATUS) | |
162 | +COMPATIBLE_IOCTL(DM_DEV_WAIT) | |
163 | +COMPATIBLE_IOCTL(DM_LIST_DEVICES) | |
164 | +COMPATIBLE_IOCTL(DM_TABLE_CLEAR) | |
165 | +#endif /* CONFIG_BLK_DEV_DM */ | |
166 | /* Linux-1394 */ | |
167 | #if defined(CONFIG_IEEE1394) || defined(CONFIG_IEEE1394_MODULE) | |
168 | COMPATIBLE_IOCTL(AMDTP_IOC_CHANNEL) | |
169 | diff -urN linux-2.4.24.org/arch/x86_64/ia32/ia32_ioctl.c linux-2.4.24/arch/x86_64/ia32/ia32_ioctl.c | |
170 | --- linux-2.4.24.org/arch/x86_64/ia32/ia32_ioctl.c 2004-01-18 14:58:15.119427333 +0100 | |
171 | +++ linux-2.4.24/arch/x86_64/ia32/ia32_ioctl.c 2004-01-18 15:01:17.778872279 +0100 | |
172 | @@ -67,6 +67,7 @@ | |
173 | #define max max | |
174 | #include <linux/lvm.h> | |
175 | #endif /* LVM */ | |
176 | +#include <linux/dm-ioctl.h> | |
177 | ||
178 | #include <scsi/scsi.h> | |
179 | /* Ugly hack. */ | |
180 | @@ -4051,6 +4052,22 @@ | |
181 | COMPATIBLE_IOCTL(LV_BMAP) | |
182 | COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE) | |
183 | #endif /* LVM */ | |
184 | +/* Device-Mapper */ | |
185 | +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE) | |
186 | +COMPATIBLE_IOCTL(DM_VERSION) | |
187 | +COMPATIBLE_IOCTL(DM_REMOVE_ALL) | |
188 | +COMPATIBLE_IOCTL(DM_DEV_CREATE) | |
189 | +COMPATIBLE_IOCTL(DM_DEV_REMOVE) | |
190 | +COMPATIBLE_IOCTL(DM_TABLE_LOAD) | |
191 | +COMPATIBLE_IOCTL(DM_DEV_SUSPEND) | |
192 | +COMPATIBLE_IOCTL(DM_DEV_RENAME) | |
193 | +COMPATIBLE_IOCTL(DM_TABLE_DEPS) | |
194 | +COMPATIBLE_IOCTL(DM_DEV_STATUS) | |
195 | +COMPATIBLE_IOCTL(DM_TABLE_STATUS) | |
196 | +COMPATIBLE_IOCTL(DM_DEV_WAIT) | |
197 | +COMPATIBLE_IOCTL(DM_LIST_DEVICES) | |
198 | +COMPATIBLE_IOCTL(DM_TABLE_CLEAR) | |
199 | +#endif /* CONFIG_BLK_DEV_DM */ | |
200 | #ifdef CONFIG_AUTOFS_FS | |
201 | COMPATIBLE_IOCTL(AUTOFS_IOC_READY) | |
202 | COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL) | |
203 | diff -urN linux-2.4.24.org/Documentation/Configure.help linux-2.4.24/Documentation/Configure.help | |
204 | --- linux-2.4.24.org/Documentation/Configure.help 2004-01-18 14:59:47.177940541 +0100 | |
205 | +++ linux-2.4.24/Documentation/Configure.help 2004-01-18 15:01:13.758716197 +0100 | |
206 | @@ -1952,6 +1952,20 @@ | |
207 | want), say M here and read <file:Documentation/modules.txt>. The | |
208 | module will be called lvm-mod.o. | |
209 | ||
210 | +Device-mapper support | |
211 | +CONFIG_BLK_DEV_DM | |
212 | + Device-mapper is a low level volume manager. It works by allowing | |
213 | + people to specify mappings for ranges of logical sectors. Various | |
214 | + mapping types are available, in addition people may write their own | |
215 | + modules containing custom mappings if they wish. | |
216 | + | |
217 | + Higher level volume managers such as LVM2 use this driver. | |
218 | + | |
219 | + If you want to compile this as a module, say M here and read | |
220 | + <file:Documentation/modules.txt>. The module will be called dm-mod.o. | |
221 | + | |
222 | + If unsure, say N. | |
223 | + | |
224 | Multiple devices driver support (RAID and LVM) | |
225 | CONFIG_MD | |
226 | Support multiple physical spindles through a single logical device. | |
227 | diff -urN linux-2.4.24.org/drivers/md/Config.in linux-2.4.24/drivers/md/Config.in | |
228 | --- linux-2.4.24.org/drivers/md/Config.in 2004-01-18 14:58:09.306661789 +0100 | |
229 | +++ linux-2.4.24/drivers/md/Config.in 2004-01-18 15:01:13.770713678 +0100 | |
230 | @@ -14,5 +14,6 @@ | |
231 | dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD | |
232 | ||
233 | dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD | |
234 | +dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD | |
235 | ||
236 | endmenu | |
237 | diff -urN linux-2.4.24.org/drivers/md/dm.c linux-2.4.24/drivers/md/dm.c | |
238 | --- linux-2.4.24.org/drivers/md/dm.c 1970-01-01 01:00:00.000000000 +0100 | |
239 | +++ linux-2.4.24/drivers/md/dm.c 2004-01-18 15:01:29.214472770 +0100 | |
240 | @@ -0,0 +1,1115 @@ | |
241 | +/* | |
242 | + * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | |
243 | + * | |
244 | + * This file is released under the GPL. | |
245 | + */ | |
246 | + | |
247 | +#include "dm.h" | |
248 | +#include "kcopyd.h" | |
249 | + | |
250 | +#include <linux/init.h> | |
251 | +#include <linux/module.h> | |
252 | +#include <linux/blk.h> | |
253 | +#include <linux/blkpg.h> | |
254 | +#include <linux/mempool.h> | |
255 | +#include <linux/slab.h> | |
256 | +#include <linux/major.h> | |
257 | +#include <linux/kdev_t.h> | |
258 | +#include <linux/lvm.h> | |
259 | + | |
260 | +#include <asm/uaccess.h> | |
261 | + | |
262 | +static const char *_name = DM_NAME; | |
263 | +#define DEFAULT_READ_AHEAD 64 | |
264 | + | |
265 | +struct dm_io { | |
266 | + struct mapped_device *md; | |
267 | + | |
268 | + struct dm_target *ti; | |
269 | + int rw; | |
270 | + union map_info map_context; | |
271 | + void (*end_io) (struct buffer_head * bh, int uptodate); | |
272 | + void *context; | |
273 | +}; | |
274 | + | |
275 | +struct deferred_io { | |
276 | + int rw; | |
277 | + struct buffer_head *bh; | |
278 | + struct deferred_io *next; | |
279 | +}; | |
280 | + | |
281 | +/* | |
282 | + * Bits for the md->flags field. | |
283 | + */ | |
284 | +#define DMF_BLOCK_IO 0 | |
285 | +#define DMF_SUSPENDED 1 | |
286 | + | |
287 | +struct mapped_device { | |
288 | + struct rw_semaphore lock; | |
289 | + atomic_t holders; | |
290 | + | |
291 | + kdev_t dev; | |
292 | + unsigned long flags; | |
293 | + | |
294 | + /* | |
295 | + * A list of ios that arrived while we were suspended. | |
296 | + */ | |
297 | + atomic_t pending; | |
298 | + wait_queue_head_t wait; | |
299 | + struct deferred_io *deferred; | |
300 | + | |
301 | + /* | |
302 | + * The current mapping. | |
303 | + */ | |
304 | + struct dm_table *map; | |
305 | + | |
306 | + /* | |
307 | + * io objects are allocated from here. | |
308 | + */ | |
309 | + mempool_t *io_pool; | |
310 | + | |
311 | + /* | |
312 | + * Event handling. | |
313 | + */ | |
314 | + uint32_t event_nr; | |
315 | + wait_queue_head_t eventq; | |
316 | +}; | |
317 | + | |
318 | +#define MIN_IOS 256 | |
319 | +static kmem_cache_t *_io_cache; | |
320 | + | |
321 | +static struct mapped_device *get_kdev(kdev_t dev); | |
322 | +static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh); | |
323 | +static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb); | |
324 | + | |
325 | +/*----------------------------------------------------------------- | |
326 | + * In order to avoid the 256 minor number limit we are going to | |
327 | + * register more major numbers as neccessary. | |
328 | + *---------------------------------------------------------------*/ | |
329 | +#define MAX_MINORS (1 << MINORBITS) | |
330 | + | |
331 | +struct major_details { | |
332 | + unsigned int major; | |
333 | + | |
334 | + int transient; | |
335 | + struct list_head transient_list; | |
336 | + | |
337 | + unsigned int first_free_minor; | |
338 | + int nr_free_minors; | |
339 | + | |
340 | + struct mapped_device *mds[MAX_MINORS]; | |
341 | + int blk_size[MAX_MINORS]; | |
342 | + int blksize_size[MAX_MINORS]; | |
343 | + int hardsect_size[MAX_MINORS]; | |
344 | +}; | |
345 | + | |
346 | +static struct rw_semaphore _dev_lock; | |
347 | +static struct major_details *_majors[MAX_BLKDEV]; | |
348 | + | |
349 | +/* | |
350 | + * This holds a list of majors that non-specified device numbers | |
351 | + * may be allocated from. Only majors with free minors appear on | |
352 | + * this list. | |
353 | + */ | |
354 | +static LIST_HEAD(_transients_free); | |
355 | + | |
356 | +static int __alloc_major(unsigned int major, struct major_details **result) | |
357 | +{ | |
358 | + int r; | |
359 | + unsigned int transient = !major; | |
360 | + struct major_details *maj; | |
361 | + | |
362 | + /* Major already allocated? */ | |
363 | + if (major && _majors[major]) | |
364 | + return 0; | |
365 | + | |
366 | + maj = kmalloc(sizeof(*maj), GFP_KERNEL); | |
367 | + if (!maj) | |
368 | + return -ENOMEM; | |
369 | + | |
370 | + memset(maj, 0, sizeof(*maj)); | |
371 | + INIT_LIST_HEAD(&maj->transient_list); | |
372 | + | |
373 | + maj->nr_free_minors = MAX_MINORS; | |
374 | + | |
375 | + r = register_blkdev(major, _name, &dm_blk_dops); | |
376 | + if (r < 0) { | |
377 | + DMERR("register_blkdev failed for %d", major); | |
378 | + kfree(maj); | |
379 | + return r; | |
380 | + } | |
381 | + if (r > 0) | |
382 | + major = r; | |
383 | + | |
384 | + maj->major = major; | |
385 | + | |
386 | + if (transient) { | |
387 | + maj->transient = transient; | |
388 | + list_add_tail(&maj->transient_list, &_transients_free); | |
389 | + } | |
390 | + | |
391 | + _majors[major] = maj; | |
392 | + | |
393 | + blk_size[major] = maj->blk_size; | |
394 | + blksize_size[major] = maj->blksize_size; | |
395 | + hardsect_size[major] = maj->hardsect_size; | |
396 | + read_ahead[major] = DEFAULT_READ_AHEAD; | |
397 | + | |
398 | + blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request); | |
399 | + | |
400 | + *result = maj; | |
401 | + return 0; | |
402 | +} | |
403 | + | |
404 | +static void __free_major(struct major_details *maj) | |
405 | +{ | |
406 | + unsigned int major = maj->major; | |
407 | + | |
408 | + list_del(&maj->transient_list); | |
409 | + | |
410 | + read_ahead[major] = 0; | |
411 | + blk_size[major] = NULL; | |
412 | + blksize_size[major] = NULL; | |
413 | + hardsect_size[major] = NULL; | |
414 | + | |
415 | + _majors[major] = NULL; | |
416 | + kfree(maj); | |
417 | + | |
418 | + if (unregister_blkdev(major, _name) < 0) | |
419 | + DMERR("devfs_unregister_blkdev failed"); | |
420 | +} | |
421 | + | |
422 | +static void free_all_majors(void) | |
423 | +{ | |
424 | + unsigned int major = ARRAY_SIZE(_majors); | |
425 | + | |
426 | + down_write(&_dev_lock); | |
427 | + | |
428 | + while (major--) | |
429 | + if (_majors[major]) | |
430 | + __free_major(_majors[major]); | |
431 | + | |
432 | + up_write(&_dev_lock); | |
433 | +} | |
434 | + | |
435 | +static void free_dev(kdev_t dev) | |
436 | +{ | |
437 | + unsigned int major = major(dev); | |
438 | + unsigned int minor = minor(dev); | |
439 | + struct major_details *maj; | |
440 | + | |
441 | + down_write(&_dev_lock); | |
442 | + | |
443 | + maj = _majors[major]; | |
444 | + if (!maj) | |
445 | + goto out; | |
446 | + | |
447 | + maj->mds[minor] = NULL; | |
448 | + maj->nr_free_minors++; | |
449 | + | |
450 | + if (maj->nr_free_minors == MAX_MINORS) { | |
451 | + __free_major(maj); | |
452 | + goto out; | |
453 | + } | |
454 | + | |
455 | + if (!maj->transient) | |
456 | + goto out; | |
457 | + | |
458 | + if (maj->nr_free_minors == 1) | |
459 | + list_add_tail(&maj->transient_list, &_transients_free); | |
460 | + | |
461 | + if (minor < maj->first_free_minor) | |
462 | + maj->first_free_minor = minor; | |
463 | + | |
464 | + out: | |
465 | + up_write(&_dev_lock); | |
466 | +} | |
467 | + | |
468 | +static void __alloc_minor(struct major_details *maj, unsigned int minor, | |
469 | + struct mapped_device *md) | |
470 | +{ | |
471 | + maj->mds[minor] = md; | |
472 | + md->dev = mk_kdev(maj->major, minor); | |
473 | + maj->nr_free_minors--; | |
474 | + | |
475 | + if (maj->transient && !maj->nr_free_minors) | |
476 | + list_del_init(&maj->transient_list); | |
477 | +} | |
478 | + | |
479 | +/* | |
480 | + * See if requested kdev_t is available. | |
481 | + */ | |
482 | +static int specific_dev(kdev_t dev, struct mapped_device *md) | |
483 | +{ | |
484 | + int r = 0; | |
485 | + unsigned int major = major(dev); | |
486 | + unsigned int minor = minor(dev); | |
487 | + struct major_details *maj; | |
488 | + | |
489 | + if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) { | |
490 | + DMWARN("device number requested out of range (%d, %d)", | |
491 | + major, minor); | |
492 | + return -EINVAL; | |
493 | + } | |
494 | + | |
495 | + down_write(&_dev_lock); | |
496 | + maj = _majors[major]; | |
497 | + | |
498 | + /* Register requested major? */ | |
499 | + if (!maj) { | |
500 | + r = __alloc_major(major, &maj); | |
501 | + if (r) | |
502 | + goto out; | |
503 | + | |
504 | + major = maj->major; | |
505 | + } | |
506 | + | |
507 | + if (maj->mds[minor]) { | |
508 | + r = -EBUSY; | |
509 | + goto out; | |
510 | + } | |
511 | + | |
512 | + __alloc_minor(maj, minor, md); | |
513 | + | |
514 | + out: | |
515 | + up_write(&_dev_lock); | |
516 | + | |
517 | + return r; | |
518 | +} | |
519 | + | |
520 | +/* | |
521 | + * Find first unused device number, requesting a new major number if required. | |
522 | + */ | |
523 | +static int first_free_dev(struct mapped_device *md) | |
524 | +{ | |
525 | + int r = 0; | |
526 | + struct major_details *maj; | |
527 | + | |
528 | + down_write(&_dev_lock); | |
529 | + | |
530 | + if (list_empty(&_transients_free)) { | |
531 | + r = __alloc_major(0, &maj); | |
532 | + if (r) | |
533 | + goto out; | |
534 | + } else | |
535 | + maj = list_entry(_transients_free.next, struct major_details, | |
536 | + transient_list); | |
537 | + | |
538 | + while (maj->mds[maj->first_free_minor++]) | |
539 | + ; | |
540 | + | |
541 | + __alloc_minor(maj, maj->first_free_minor - 1, md); | |
542 | + | |
543 | + out: | |
544 | + up_write(&_dev_lock); | |
545 | + | |
546 | + return r; | |
547 | +} | |
548 | + | |
549 | +static struct mapped_device *get_kdev(kdev_t dev) | |
550 | +{ | |
551 | + struct mapped_device *md; | |
552 | + struct major_details *maj; | |
553 | + | |
554 | + down_read(&_dev_lock); | |
555 | + maj = _majors[major(dev)]; | |
556 | + if (!maj) { | |
557 | + md = NULL; | |
558 | + goto out; | |
559 | + } | |
560 | + md = maj->mds[minor(dev)]; | |
561 | + if (md) | |
562 | + dm_get(md); | |
563 | + out: | |
564 | + up_read(&_dev_lock); | |
565 | + | |
566 | + return md; | |
567 | +} | |
568 | + | |
569 | +/*----------------------------------------------------------------- | |
570 | + * init/exit code | |
571 | + *---------------------------------------------------------------*/ | |
572 | + | |
573 | +static __init int local_init(void) | |
574 | +{ | |
575 | + init_rwsem(&_dev_lock); | |
576 | + | |
577 | + /* allocate a slab for the dm_ios */ | |
578 | + _io_cache = kmem_cache_create("dm io", | |
579 | + sizeof(struct dm_io), 0, 0, NULL, NULL); | |
580 | + | |
581 | + if (!_io_cache) | |
582 | + return -ENOMEM; | |
583 | + | |
584 | + return 0; | |
585 | +} | |
586 | + | |
587 | +static void local_exit(void) | |
588 | +{ | |
589 | + kmem_cache_destroy(_io_cache); | |
590 | + free_all_majors(); | |
591 | + | |
592 | + DMINFO("cleaned up"); | |
593 | +} | |
594 | + | |
595 | +/* | |
596 | + * We have a lot of init/exit functions, so it seems easier to | |
597 | + * store them in an array. The disposable macro 'xx' | |
598 | + * expands a prefix into a pair of function names. | |
599 | + */ | |
600 | +static struct { | |
601 | + int (*init) (void); | |
602 | + void (*exit) (void); | |
603 | + | |
604 | +} _inits[] = { | |
605 | +#define xx(n) {n ## _init, n ## _exit}, | |
606 | + xx(local) | |
607 | + xx(dm_target) | |
608 | + xx(dm_linear) | |
609 | + xx(dm_stripe) | |
610 | + xx(dm_interface) | |
611 | + xx(kcopyd) | |
612 | + xx(dm_snapshot) | |
613 | +#undef xx | |
614 | +}; | |
615 | + | |
616 | +static int __init dm_init(void) | |
617 | +{ | |
618 | + const int count = ARRAY_SIZE(_inits); | |
619 | + | |
620 | + int r, i; | |
621 | + | |
622 | + for (i = 0; i < count; i++) { | |
623 | + r = _inits[i].init(); | |
624 | + if (r) | |
625 | + goto bad; | |
626 | + } | |
627 | + | |
628 | + return 0; | |
629 | + | |
630 | + bad: | |
631 | + while (i--) | |
632 | + _inits[i].exit(); | |
633 | + | |
634 | + return r; | |
635 | +} | |
636 | + | |
637 | +static void __exit dm_exit(void) | |
638 | +{ | |
639 | + int i = ARRAY_SIZE(_inits); | |
640 | + | |
641 | + while (i--) | |
642 | + _inits[i].exit(); | |
643 | +} | |
644 | + | |
645 | +/* | |
646 | + * Block device functions | |
647 | + */ | |
648 | +static int dm_blk_open(struct inode *inode, struct file *file) | |
649 | +{ | |
650 | + struct mapped_device *md; | |
651 | + | |
652 | + md = get_kdev(inode->i_rdev); | |
653 | + if (!md) | |
654 | + return -ENXIO; | |
655 | + | |
656 | + return 0; | |
657 | +} | |
658 | + | |
659 | +static int dm_blk_close(struct inode *inode, struct file *file) | |
660 | +{ | |
661 | + struct mapped_device *md; | |
662 | + | |
663 | + md = get_kdev(inode->i_rdev); | |
664 | + dm_put(md); /* put the reference gained by dm_blk_open */ | |
665 | + dm_put(md); | |
666 | + return 0; | |
667 | +} | |
668 | + | |
669 | +static inline struct dm_io *alloc_io(struct mapped_device *md) | |
670 | +{ | |
671 | + return mempool_alloc(md->io_pool, GFP_NOIO); | |
672 | +} | |
673 | + | |
674 | +static inline void free_io(struct mapped_device *md, struct dm_io *io) | |
675 | +{ | |
676 | + mempool_free(io, md->io_pool); | |
677 | +} | |
678 | + | |
679 | +static inline struct deferred_io *alloc_deferred(void) | |
680 | +{ | |
681 | + return kmalloc(sizeof(struct deferred_io), GFP_NOIO); | |
682 | +} | |
683 | + | |
684 | +static inline void free_deferred(struct deferred_io *di) | |
685 | +{ | |
686 | + kfree(di); | |
687 | +} | |
688 | + | |
689 | +static inline sector_t volume_size(kdev_t dev) | |
690 | +{ | |
691 | + return blk_size[major(dev)][minor(dev)] << 1; | |
692 | +} | |
693 | + | |
694 | +/* FIXME: check this */ | |
695 | +static int dm_blk_ioctl(struct inode *inode, struct file *file, | |
696 | + unsigned int command, unsigned long a) | |
697 | +{ | |
698 | + kdev_t dev = inode->i_rdev; | |
699 | + long size; | |
700 | + | |
701 | + switch (command) { | |
702 | + case BLKROSET: | |
703 | + case BLKROGET: | |
704 | + case BLKRASET: | |
705 | + case BLKRAGET: | |
706 | + case BLKFLSBUF: | |
707 | + case BLKSSZGET: | |
708 | + //case BLKRRPART: /* Re-read partition tables */ | |
709 | + //case BLKPG: | |
710 | + case BLKELVGET: | |
711 | + case BLKELVSET: | |
712 | + case BLKBSZGET: | |
713 | + case BLKBSZSET: | |
714 | + return blk_ioctl(dev, command, a); | |
715 | + break; | |
716 | + | |
717 | + case BLKGETSIZE: | |
718 | + size = volume_size(dev); | |
719 | + if (copy_to_user((void *) a, &size, sizeof(long))) | |
720 | + return -EFAULT; | |
721 | + break; | |
722 | + | |
723 | + case BLKGETSIZE64: | |
724 | + size = volume_size(dev); | |
725 | + if (put_user((u64) ((u64) size) << 9, (u64 *) a)) | |
726 | + return -EFAULT; | |
727 | + break; | |
728 | + | |
729 | + case BLKRRPART: | |
730 | + return -ENOTTY; | |
731 | + | |
732 | + case LV_BMAP: | |
733 | + return dm_user_bmap(inode, (struct lv_bmap *) a); | |
734 | + | |
735 | + default: | |
736 | + DMWARN("unknown block ioctl 0x%x", command); | |
737 | + return -ENOTTY; | |
738 | + } | |
739 | + | |
740 | + return 0; | |
741 | +} | |
742 | + | |
743 | +/* | |
744 | + * Add the buffer to the list of deferred io. | |
745 | + */ | |
746 | +static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw) | |
747 | +{ | |
748 | + struct deferred_io *di; | |
749 | + | |
750 | + di = alloc_deferred(); | |
751 | + if (!di) | |
752 | + return -ENOMEM; | |
753 | + | |
754 | + down_write(&md->lock); | |
755 | + | |
756 | + if (!test_bit(DMF_BLOCK_IO, &md->flags)) { | |
757 | + up_write(&md->lock); | |
758 | + free_deferred(di); | |
759 | + return 1; | |
760 | + } | |
761 | + | |
762 | + di->bh = bh; | |
763 | + di->rw = rw; | |
764 | + di->next = md->deferred; | |
765 | + md->deferred = di; | |
766 | + | |
767 | + up_write(&md->lock); | |
768 | + return 0; /* deferred successfully */ | |
769 | +} | |
770 | + | |
771 | +/* | |
772 | + * bh->b_end_io routine that decrements the pending count | |
773 | + * and then calls the original bh->b_end_io fn. | |
774 | + */ | |
775 | +static void dec_pending(struct buffer_head *bh, int uptodate) | |
776 | +{ | |
777 | + int r; | |
778 | + struct dm_io *io = bh->b_private; | |
779 | + dm_endio_fn endio = io->ti->type->end_io; | |
780 | + | |
781 | + if (endio) { | |
782 | + r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO, | |
783 | + &io->map_context); | |
784 | + if (r < 0) | |
785 | + uptodate = 0; | |
786 | + | |
787 | + else if (r > 0) | |
788 | + /* the target wants another shot at the io */ | |
789 | + return; | |
790 | + } | |
791 | + | |
792 | + if (atomic_dec_and_test(&io->md->pending)) | |
793 | + /* nudge anyone waiting on suspend queue */ | |
794 | + wake_up(&io->md->wait); | |
795 | + | |
796 | + bh->b_end_io = io->end_io; | |
797 | + bh->b_private = io->context; | |
798 | + free_io(io->md, io); | |
799 | + | |
800 | + bh->b_end_io(bh, uptodate); | |
801 | +} | |
802 | + | |
803 | +/* | |
804 | + * Do the bh mapping for a given leaf | |
805 | + */ | |
806 | +static inline int __map_buffer(struct mapped_device *md, int rw, | |
807 | + struct buffer_head *bh, struct dm_io *io) | |
808 | +{ | |
809 | + struct dm_target *ti; | |
810 | + | |
811 | + if (!md->map) | |
812 | + return -EINVAL; | |
813 | + | |
814 | + ti = dm_table_find_target(md->map, bh->b_rsector); | |
815 | + if (!ti->type) | |
816 | + return -EINVAL; | |
817 | + | |
818 | + /* hook the end io request fn */ | |
819 | + atomic_inc(&md->pending); | |
820 | + io->md = md; | |
821 | + io->ti = ti; | |
822 | + io->rw = rw; | |
823 | + io->end_io = bh->b_end_io; | |
824 | + io->context = bh->b_private; | |
825 | + bh->b_end_io = dec_pending; | |
826 | + bh->b_private = io; | |
827 | + | |
828 | + return ti->type->map(ti, bh, rw, &io->map_context); | |
829 | +} | |
830 | + | |
831 | +/* | |
832 | + * Checks to see if we should be deferring io, if so it queues it | |
833 | + * and returns 1. | |
834 | + */ | |
835 | +static inline int __deferring(struct mapped_device *md, int rw, | |
836 | + struct buffer_head *bh) | |
837 | +{ | |
838 | + int r; | |
839 | + | |
840 | + /* | |
841 | + * If we're suspended we have to queue this io for later. | |
842 | + */ | |
843 | + while (test_bit(DMF_BLOCK_IO, &md->flags)) { | |
844 | + up_read(&md->lock); | |
845 | + | |
846 | + /* | |
847 | + * There's no point deferring a read ahead | |
848 | + * request, just drop it. | |
849 | + */ | |
850 | + if (rw == READA) { | |
851 | + down_read(&md->lock); | |
852 | + return -EIO; | |
853 | + } | |
854 | + | |
855 | + r = queue_io(md, bh, rw); | |
856 | + down_read(&md->lock); | |
857 | + | |
858 | + if (r < 0) | |
859 | + return r; | |
860 | + | |
861 | + if (r == 0) | |
862 | + return 1; /* deferred successfully */ | |
863 | + | |
864 | + } | |
865 | + | |
866 | + return 0; | |
867 | +} | |
868 | + | |
869 | +static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh) | |
870 | +{ | |
871 | + int r; | |
872 | + struct dm_io *io; | |
873 | + struct mapped_device *md; | |
874 | + | |
875 | + md = get_kdev(bh->b_rdev); | |
876 | + if (!md) { | |
877 | + buffer_IO_error(bh); | |
878 | + return 0; | |
879 | + } | |
880 | + | |
881 | + io = alloc_io(md); | |
882 | + down_read(&md->lock); | |
883 | + | |
884 | + r = __deferring(md, rw, bh); | |
885 | + if (r < 0) | |
886 | + goto bad; | |
887 | + | |
888 | + else if (!r) { | |
889 | + /* not deferring */ | |
890 | + r = __map_buffer(md, rw, bh, io); | |
891 | + if (r < 0) | |
892 | + goto bad; | |
893 | + } else | |
894 | + r = 0; | |
895 | + | |
896 | + up_read(&md->lock); | |
897 | + dm_put(md); | |
898 | + return r; | |
899 | + | |
900 | + bad: | |
901 | + buffer_IO_error(bh); | |
902 | + up_read(&md->lock); | |
903 | + dm_put(md); | |
904 | + return 0; | |
905 | +} | |
906 | + | |
907 | +static int check_dev_size(kdev_t dev, unsigned long block) | |
908 | +{ | |
909 | + unsigned int major = major(dev); | |
910 | + unsigned int minor = minor(dev); | |
911 | + | |
912 | + /* FIXME: check this */ | |
913 | + unsigned long max_sector = (blk_size[major][minor] << 1) + 1; | |
914 | + unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9); | |
915 | + | |
916 | + return (sector > max_sector) ? 0 : 1; | |
917 | +} | |
918 | + | |
919 | +/* | |
920 | + * Creates a dummy buffer head and maps it (for lilo). | |
921 | + */ | |
922 | +static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block, | |
923 | + kdev_t *r_dev, unsigned long *r_block) | |
924 | +{ | |
925 | + struct buffer_head bh; | |
926 | + struct dm_target *ti; | |
927 | + union map_info map_context; | |
928 | + int r; | |
929 | + | |
930 | + if (test_bit(DMF_BLOCK_IO, &md->flags)) { | |
931 | + return -EPERM; | |
932 | + } | |
933 | + | |
934 | + if (!check_dev_size(dev, block)) { | |
935 | + return -EINVAL; | |
936 | + } | |
937 | + | |
938 | + if (!md->map) | |
939 | + return -EINVAL; | |
940 | + | |
941 | + /* setup dummy bh */ | |
942 | + memset(&bh, 0, sizeof(bh)); | |
943 | + bh.b_blocknr = block; | |
944 | + bh.b_dev = bh.b_rdev = dev; | |
945 | + bh.b_size = blksize_size[major(dev)][minor(dev)]; | |
946 | + bh.b_rsector = block * (bh.b_size >> 9); | |
947 | + | |
948 | + /* find target */ | |
949 | + ti = dm_table_find_target(md->map, bh.b_rsector); | |
950 | + | |
951 | + /* do the mapping */ | |
952 | + r = ti->type->map(ti, &bh, READ, &map_context); | |
953 | + ti->type->end_io(ti, &bh, READ, 0, &map_context); | |
954 | + | |
955 | + if (!r) { | |
956 | + *r_dev = bh.b_rdev; | |
957 | + *r_block = bh.b_rsector / (bh.b_size >> 9); | |
958 | + } | |
959 | + | |
960 | + return r; | |
961 | +} | |
962 | + | |
963 | +/* | |
964 | + * Marshals arguments and results between user and kernel space. | |
965 | + */ | |
966 | +static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb) | |
967 | +{ | |
968 | + struct mapped_device *md; | |
969 | + unsigned long block, r_block; | |
970 | + kdev_t r_dev; | |
971 | + int r; | |
972 | + | |
973 | + if (get_user(block, &lvb->lv_block)) | |
974 | + return -EFAULT; | |
975 | + | |
976 | + md = get_kdev(inode->i_rdev); | |
977 | + if (!md) | |
978 | + return -ENXIO; | |
979 | + | |
980 | + down_read(&md->lock); | |
981 | + r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block); | |
982 | + up_read(&md->lock); | |
983 | + dm_put(md); | |
984 | + | |
985 | + if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) || | |
986 | + put_user(r_block, &lvb->lv_block))) | |
987 | + r = -EFAULT; | |
988 | + | |
989 | + return r; | |
990 | +} | |
991 | + | |
992 | +static void free_md(struct mapped_device *md) | |
993 | +{ | |
994 | + free_dev(md->dev); | |
995 | + mempool_destroy(md->io_pool); | |
996 | + kfree(md); | |
997 | +} | |
998 | + | |
999 | +/* | |
1000 | + * Allocate and initialise a blank device with a given minor. | |
1001 | + */ | |
1002 | +static struct mapped_device *alloc_md(kdev_t dev) | |
1003 | +{ | |
1004 | + int r; | |
1005 | + struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); | |
1006 | + | |
1007 | + if (!md) { | |
1008 | + DMWARN("unable to allocate device, out of memory."); | |
1009 | + return NULL; | |
1010 | + } | |
1011 | + | |
1012 | + memset(md, 0, sizeof(*md)); | |
1013 | + | |
1014 | + /* Allocate suitable device number */ | |
1015 | + if (!dev) | |
1016 | + r = first_free_dev(md); | |
1017 | + else | |
1018 | + r = specific_dev(dev, md); | |
1019 | + | |
1020 | + if (r) { | |
1021 | + kfree(md); | |
1022 | + return NULL; | |
1023 | + } | |
1024 | + | |
1025 | + md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab, | |
1026 | + mempool_free_slab, _io_cache); | |
1027 | + if (!md->io_pool) { | |
1028 | + free_md(md); | |
1029 | + kfree(md); | |
1030 | + return NULL; | |
1031 | + } | |
1032 | + | |
1033 | + init_rwsem(&md->lock); | |
1034 | + atomic_set(&md->holders, 1); | |
1035 | + atomic_set(&md->pending, 0); | |
1036 | + init_waitqueue_head(&md->wait); | |
1037 | + init_waitqueue_head(&md->eventq); | |
1038 | + | |
1039 | + return md; | |
1040 | +} | |
1041 | + | |
1042 | +/* | |
1043 | + * The hardsect size for a mapped device is the largest hardsect size | |
1044 | + * from the devices it maps onto. | |
1045 | + */ | |
1046 | +static int __find_hardsect_size(struct list_head *devices) | |
1047 | +{ | |
1048 | + int result = 512, size; | |
1049 | + struct list_head *tmp; | |
1050 | + | |
1051 | + list_for_each (tmp, devices) { | |
1052 | + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); | |
1053 | + size = get_hardsect_size(dd->dev); | |
1054 | + if (size > result) | |
1055 | + result = size; | |
1056 | + } | |
1057 | + | |
1058 | + return result; | |
1059 | +} | |
1060 | + | |
1061 | +/* | |
1062 | + * Bind a table to the device. | |
1063 | + */ | |
1064 | +static void event_callback(void *context) | |
1065 | +{ | |
1066 | + struct mapped_device *md = (struct mapped_device *) context; | |
1067 | + | |
1068 | + down_write(&md->lock); | |
1069 | + md->event_nr++; | |
1070 | + wake_up_interruptible(&md->eventq); | |
1071 | + up_write(&md->lock); | |
1072 | +} | |
1073 | + | |
1074 | +static int __bind(struct mapped_device *md, struct dm_table *t) | |
1075 | +{ | |
1076 | + unsigned int minor = minor(md->dev); | |
1077 | + unsigned int major = major(md->dev); | |
1078 | + md->map = t; | |
1079 | + | |
1080 | + /* in k */ | |
1081 | + blk_size[major][minor] = dm_table_get_size(t) >> 1; | |
1082 | + blksize_size[major][minor] = BLOCK_SIZE; | |
1083 | + hardsect_size[major][minor] = | |
1084 | + __find_hardsect_size(dm_table_get_devices(t)); | |
1085 | + register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]); | |
1086 | + | |
1087 | + dm_table_event_callback(md->map, event_callback, md); | |
1088 | + dm_table_get(t); | |
1089 | + return 0; | |
1090 | +} | |
1091 | + | |
1092 | +static void __unbind(struct mapped_device *md) | |
1093 | +{ | |
1094 | + unsigned int minor = minor(md->dev); | |
1095 | + unsigned int major = major(md->dev); | |
1096 | + | |
1097 | + if (md->map) { | |
1098 | + dm_table_event_callback(md->map, NULL, NULL); | |
1099 | + dm_table_put(md->map); | |
1100 | + md->map = NULL; | |
1101 | + | |
1102 | + } | |
1103 | + | |
1104 | + blk_size[major][minor] = 0; | |
1105 | + blksize_size[major][minor] = 0; | |
1106 | + hardsect_size[major][minor] = 0; | |
1107 | +} | |
1108 | + | |
1109 | +/* | |
1110 | + * Constructor for a new device. | |
1111 | + */ | |
1112 | +int dm_create(kdev_t dev, struct mapped_device **result) | |
1113 | +{ | |
1114 | + struct mapped_device *md; | |
1115 | + | |
1116 | + md = alloc_md(dev); | |
1117 | + if (!md) | |
1118 | + return -ENXIO; | |
1119 | + | |
1120 | + __unbind(md); /* Ensure zero device size */ | |
1121 | + | |
1122 | + *result = md; | |
1123 | + return 0; | |
1124 | +} | |
1125 | + | |
1126 | +void dm_get(struct mapped_device *md) | |
1127 | +{ | |
1128 | + atomic_inc(&md->holders); | |
1129 | +} | |
1130 | + | |
1131 | +void dm_put(struct mapped_device *md) | |
1132 | +{ | |
1133 | + if (atomic_dec_and_test(&md->holders)) { | |
1134 | + if (md->map) | |
1135 | + dm_table_suspend_targets(md->map); | |
1136 | + __unbind(md); | |
1137 | + free_md(md); | |
1138 | + } | |
1139 | +} | |
1140 | + | |
1141 | +/* | |
1142 | + * Requeue the deferred io by calling generic_make_request. | |
1143 | + */ | |
1144 | +static void flush_deferred_io(struct deferred_io *c) | |
1145 | +{ | |
1146 | + struct deferred_io *n; | |
1147 | + | |
1148 | + while (c) { | |
1149 | + n = c->next; | |
1150 | + generic_make_request(c->rw, c->bh); | |
1151 | + free_deferred(c); | |
1152 | + c = n; | |
1153 | + } | |
1154 | +} | |
1155 | + | |
1156 | +/* | |
1157 | + * Swap in a new table (destroying old one). | |
1158 | + */ | |
1159 | +int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |
1160 | +{ | |
1161 | + int r; | |
1162 | + | |
1163 | + down_write(&md->lock); | |
1164 | + | |
1165 | + /* | |
1166 | + * The device must be suspended, or have no table bound yet. | |
1167 | + */ | |
1168 | + if (md->map && !test_bit(DMF_SUSPENDED, &md->flags)) { | |
1169 | + up_write(&md->lock); | |
1170 | + return -EPERM; | |
1171 | + } | |
1172 | + | |
1173 | + __unbind(md); | |
1174 | + r = __bind(md, table); | |
1175 | + if (r) | |
1176 | + return r; | |
1177 | + | |
1178 | + up_write(&md->lock); | |
1179 | + return 0; | |
1180 | +} | |
1181 | + | |
1182 | +/* | |
1183 | + * We need to be able to change a mapping table under a mounted | |
1184 | + * filesystem. For example we might want to move some data in | |
1185 | + * the background. Before the table can be swapped with | |
1186 | + * dm_bind_table, dm_suspend must be called to flush any in | |
1187 | + * flight io and ensure that any further io gets deferred. | |
1188 | + */ | |
1189 | +int dm_suspend(struct mapped_device *md) | |
1190 | +{ | |
1191 | + int r = 0; | |
1192 | + DECLARE_WAITQUEUE(wait, current); | |
1193 | + | |
1194 | + down_write(&md->lock); | |
1195 | + | |
1196 | + /* | |
1197 | + * First we set the BLOCK_IO flag so no more ios will be | |
1198 | + * mapped. | |
1199 | + */ | |
1200 | + if (test_bit(DMF_BLOCK_IO, &md->flags)) { | |
1201 | + up_write(&md->lock); | |
1202 | + return -EINVAL; | |
1203 | + } | |
1204 | + | |
1205 | + set_bit(DMF_BLOCK_IO, &md->flags); | |
1206 | + add_wait_queue(&md->wait, &wait); | |
1207 | + up_write(&md->lock); | |
1208 | + | |
1209 | + /* | |
1210 | + * Then we wait for the already mapped ios to | |
1211 | + * complete. | |
1212 | + */ | |
1213 | + run_task_queue(&tq_disk); | |
1214 | + while (1) { | |
1215 | + set_current_state(TASK_INTERRUPTIBLE); | |
1216 | + | |
1217 | + if (!atomic_read(&md->pending) || signal_pending(current)) | |
1218 | + break; | |
1219 | + | |
1220 | + schedule(); | |
1221 | + } | |
1222 | + set_current_state(TASK_RUNNING); | |
1223 | + | |
1224 | + down_write(&md->lock); | |
1225 | + remove_wait_queue(&md->wait, &wait); | |
1226 | + | |
1227 | + /* did we flush everything ? */ | |
1228 | + if (atomic_read(&md->pending)) { | |
1229 | + clear_bit(DMF_BLOCK_IO, &md->flags); | |
1230 | + r = -EINTR; | |
1231 | + } else { | |
1232 | + set_bit(DMF_SUSPENDED, &md->flags); | |
1233 | + if (md->map) | |
1234 | + dm_table_suspend_targets(md->map); | |
1235 | + } | |
1236 | + up_write(&md->lock); | |
1237 | + | |
1238 | + return r; | |
1239 | +} | |
1240 | + | |
1241 | +int dm_resume(struct mapped_device *md) | |
1242 | +{ | |
1243 | + struct deferred_io *def; | |
1244 | + | |
1245 | + down_write(&md->lock); | |
1246 | + if (!test_bit(DMF_SUSPENDED, &md->flags)) { | |
1247 | + up_write(&md->lock); | |
1248 | + return -EINVAL; | |
1249 | + } | |
1250 | + | |
1251 | + if (md->map) | |
1252 | + dm_table_resume_targets(md->map); | |
1253 | + | |
1254 | + clear_bit(DMF_SUSPENDED, &md->flags); | |
1255 | + clear_bit(DMF_BLOCK_IO, &md->flags); | |
1256 | + def = md->deferred; | |
1257 | + md->deferred = NULL; | |
1258 | + up_write(&md->lock); | |
1259 | + | |
1260 | + flush_deferred_io(def); | |
1261 | + run_task_queue(&tq_disk); | |
1262 | + | |
1263 | + return 0; | |
1264 | +} | |
1265 | + | |
1266 | +struct dm_table *dm_get_table(struct mapped_device *md) | |
1267 | +{ | |
1268 | + struct dm_table *t; | |
1269 | + | |
1270 | + down_read(&md->lock); | |
1271 | + t = md->map; | |
1272 | + if (t) | |
1273 | + dm_table_get(t); | |
1274 | + up_read(&md->lock); | |
1275 | + | |
1276 | + return t; | |
1277 | +} | |
1278 | + | |
1279 | +/*----------------------------------------------------------------- | |
1280 | + * Event notification. | |
1281 | + *---------------------------------------------------------------*/ | |
1282 | +uint32_t dm_get_event_nr(struct mapped_device *md) | |
1283 | +{ | |
1284 | + uint32_t r; | |
1285 | + | |
1286 | + down_read(&md->lock); | |
1287 | + r = md->event_nr; | |
1288 | + up_read(&md->lock); | |
1289 | + | |
1290 | + return r; | |
1291 | +} | |
1292 | + | |
1293 | +int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq, | |
1294 | + uint32_t event_nr) | |
1295 | +{ | |
1296 | + down_write(&md->lock); | |
1297 | + if (event_nr != md->event_nr) { | |
1298 | + up_write(&md->lock); | |
1299 | + return 1; | |
1300 | + } | |
1301 | + | |
1302 | + add_wait_queue(&md->eventq, wq); | |
1303 | + up_write(&md->lock); | |
1304 | + | |
1305 | + return 0; | |
1306 | +} | |
1307 | + | |
1308 | +const char *dm_kdevname(kdev_t dev) | |
1309 | +{ | |
1310 | + static char buffer[32]; | |
1311 | + sprintf(buffer, "%03d:%03d", MAJOR(dev), MINOR(dev)); | |
1312 | + return buffer; | |
1313 | +} | |
1314 | + | |
1315 | +void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq) | |
1316 | +{ | |
1317 | + down_write(&md->lock); | |
1318 | + remove_wait_queue(&md->eventq, wq); | |
1319 | + up_write(&md->lock); | |
1320 | +} | |
1321 | + | |
1322 | +kdev_t dm_kdev(struct mapped_device *md) | |
1323 | +{ | |
1324 | + kdev_t dev; | |
1325 | + | |
1326 | + down_read(&md->lock); | |
1327 | + dev = md->dev; | |
1328 | + up_read(&md->lock); | |
1329 | + | |
1330 | + return dev; | |
1331 | +} | |
1332 | + | |
1333 | +int dm_suspended(struct mapped_device *md) | |
1334 | +{ | |
1335 | + return test_bit(DMF_SUSPENDED, &md->flags); | |
1336 | +} | |
1337 | + | |
1338 | +struct block_device_operations dm_blk_dops = { | |
1339 | + .open = dm_blk_open, | |
1340 | + .release = dm_blk_close, | |
1341 | + .ioctl = dm_blk_ioctl, | |
1342 | + .owner = THIS_MODULE | |
1343 | +}; | |
1344 | + | |
1345 | +/* | |
1346 | + * module hooks | |
1347 | + */ | |
1348 | +module_init(dm_init); | |
1349 | +module_exit(dm_exit); | |
1350 | + | |
1351 | +MODULE_DESCRIPTION(DM_NAME " driver"); | |
1352 | +MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>"); | |
1353 | +MODULE_LICENSE("GPL"); | |
1354 | + | |
1355 | +EXPORT_SYMBOL(dm_kdevname); | |
1356 | diff -urN linux-2.4.24.org/drivers/md/dm-daemon.c linux-2.4.24/drivers/md/dm-daemon.c | |
1357 | --- linux-2.4.24.org/drivers/md/dm-daemon.c 1970-01-01 01:00:00.000000000 +0100 | |
1358 | +++ linux-2.4.24/drivers/md/dm-daemon.c 2004-01-18 15:01:21.977991002 +0100 | |
1359 | @@ -0,0 +1,113 @@ | |
1360 | +/* | |
1361 | + * Copyright (C) 2003 Sistina Software | |
1362 | + * | |
1363 | + * This file is released under the LGPL. | |
1364 | + */ | |
1365 | + | |
1366 | +#include "dm.h" | |
1367 | +#include "dm-daemon.h" | |
1368 | + | |
1369 | +#include <linux/module.h> | |
1370 | +#include <linux/sched.h> | |
1371 | + | |
1372 | +static int daemon(void *arg) | |
1373 | +{ | |
1374 | + struct dm_daemon *dd = (struct dm_daemon *) arg; | |
1375 | + DECLARE_WAITQUEUE(wq, current); | |
1376 | + | |
1377 | + daemonize(); | |
1378 | + reparent_to_init(); | |
1379 | + | |
1380 | + /* block all signals */ | |
1381 | + spin_lock_irq(¤t->sigmask_lock); | |
1382 | + sigfillset(¤t->blocked); | |
1383 | + flush_signals(current); | |
1384 | + spin_unlock_irq(¤t->sigmask_lock); | |
1385 | + | |
1386 | + strcpy(current->comm, dd->name); | |
1387 | + atomic_set(&dd->please_die, 0); | |
1388 | + | |
1389 | + add_wait_queue(&dd->job_queue, &wq); | |
1390 | + | |
1391 | + down(&dd->run_lock); | |
1392 | + up(&dd->start_lock); | |
1393 | + | |
1394 | + /* | |
1395 | + * dd->fn() could do anything, very likely it will | |
1396 | + * suspend. So we can't set the state to | |
1397 | + * TASK_INTERRUPTIBLE before calling it. In order to | |
1398 | + * prevent a race with a waking thread we do this little | |
1399 | + * dance with the dd->woken variable. | |
1400 | + */ | |
1401 | + while (1) { | |
1402 | + do { | |
1403 | + set_current_state(TASK_RUNNING); | |
1404 | + | |
1405 | + if (atomic_read(&dd->please_die)) | |
1406 | + goto out; | |
1407 | + | |
1408 | + atomic_set(&dd->woken, 0); | |
1409 | + dd->fn(); | |
1410 | + yield(); | |
1411 | + | |
1412 | + set_current_state(TASK_INTERRUPTIBLE); | |
1413 | + } while (atomic_read(&dd->woken)); | |
1414 | + | |
1415 | + schedule(); | |
1416 | + } | |
1417 | + | |
1418 | + out: | |
1419 | + remove_wait_queue(&dd->job_queue, &wq); | |
1420 | + up(&dd->run_lock); | |
1421 | + return 0; | |
1422 | +} | |
1423 | + | |
1424 | +int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void)) | |
1425 | +{ | |
1426 | + pid_t pid = 0; | |
1427 | + | |
1428 | + /* | |
1429 | + * Initialise the dm_daemon. | |
1430 | + */ | |
1431 | + dd->fn = fn; | |
1432 | + strncpy(dd->name, name, sizeof(dd->name) - 1); | |
1433 | + sema_init(&dd->start_lock, 1); | |
1434 | + sema_init(&dd->run_lock, 1); | |
1435 | + init_waitqueue_head(&dd->job_queue); | |
1436 | + | |
1437 | + /* | |
1438 | + * Start the new thread. | |
1439 | + */ | |
1440 | + down(&dd->start_lock); | |
1441 | + pid = kernel_thread(daemon, dd, 0); | |
1442 | + if (pid <= 0) { | |
1443 | + DMERR("Failed to start %s thread", name); | |
1444 | + return -EAGAIN; | |
1445 | + } | |
1446 | + | |
1447 | + /* | |
1448 | + * wait for the daemon to up this mutex. | |
1449 | + */ | |
1450 | + down(&dd->start_lock); | |
1451 | + up(&dd->start_lock); | |
1452 | + | |
1453 | + return 0; | |
1454 | +} | |
1455 | + | |
1456 | +void dm_daemon_stop(struct dm_daemon *dd) | |
1457 | +{ | |
1458 | + atomic_set(&dd->please_die, 1); | |
1459 | + dm_daemon_wake(dd); | |
1460 | + down(&dd->run_lock); | |
1461 | + up(&dd->run_lock); | |
1462 | +} | |
1463 | + | |
1464 | +void dm_daemon_wake(struct dm_daemon *dd) | |
1465 | +{ | |
1466 | + atomic_set(&dd->woken, 1); | |
1467 | + wake_up_interruptible(&dd->job_queue); | |
1468 | +} | |
1469 | + | |
1470 | +EXPORT_SYMBOL(dm_daemon_start); | |
1471 | +EXPORT_SYMBOL(dm_daemon_stop); | |
1472 | +EXPORT_SYMBOL(dm_daemon_wake); | |
1473 | diff -urN linux-2.4.24.org/drivers/md/dm-daemon.h linux-2.4.24/drivers/md/dm-daemon.h | |
1474 | --- linux-2.4.24.org/drivers/md/dm-daemon.h 1970-01-01 01:00:00.000000000 +0100 | |
1475 | +++ linux-2.4.24/drivers/md/dm-daemon.h 2004-01-18 15:01:21.980990372 +0100 | |
1476 | @@ -0,0 +1,29 @@ | |
1477 | +/* | |
1478 | + * Copyright (C) 2003 Sistina Software | |
1479 | + * | |
1480 | + * This file is released under the LGPL. | |
1481 | + */ | |
1482 | + | |
1483 | +#ifndef DM_DAEMON_H | |
1484 | +#define DM_DAEMON_H | |
1485 | + | |
1486 | +#include <asm/atomic.h> | |
1487 | +#include <asm/semaphore.h> | |
1488 | + | |
1489 | +struct dm_daemon { | |
1490 | + void (*fn)(void); | |
1491 | + char name[16]; | |
1492 | + atomic_t please_die; | |
1493 | + struct semaphore start_lock; | |
1494 | + struct semaphore run_lock; | |
1495 | + | |
1496 | + atomic_t woken; | |
1497 | + wait_queue_head_t job_queue; | |
1498 | +}; | |
1499 | + | |
1500 | +int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void)); | |
1501 | +void dm_daemon_stop(struct dm_daemon *dd); | |
1502 | +void dm_daemon_wake(struct dm_daemon *dd); | |
1503 | +int dm_daemon_running(struct dm_daemon *dd); | |
1504 | + | |
1505 | +#endif | |
1506 | diff -urN linux-2.4.24.org/drivers/md/dm-exception-store.c linux-2.4.24/drivers/md/dm-exception-store.c | |
1507 | --- linux-2.4.24.org/drivers/md/dm-exception-store.c 1970-01-01 01:00:00.000000000 +0100 | |
1508 | +++ linux-2.4.24/drivers/md/dm-exception-store.c 2004-01-18 15:01:29.225470463 +0100 | |
1509 | @@ -0,0 +1,673 @@ | |
1510 | +/* | |
1511 | + * dm-snapshot.c | |
1512 | + * | |
1513 | + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | |
1514 | + * | |
1515 | + * This file is released under the GPL. | |
1516 | + */ | |
1517 | + | |
1518 | +#include "dm-snapshot.h" | |
1519 | +#include "dm-io.h" | |
1520 | +#include "kcopyd.h" | |
1521 | + | |
1522 | +#include <linux/mm.h> | |
1523 | +#include <linux/pagemap.h> | |
1524 | +#include <linux/vmalloc.h> | |
1525 | +#include <linux/slab.h> | |
1526 | + | |
1527 | +/*----------------------------------------------------------------- | |
1528 | + * Persistent snapshots, by persistent we mean that the snapshot | |
1529 | + * will survive a reboot. | |
1530 | + *---------------------------------------------------------------*/ | |
1531 | + | |
1532 | +/* | |
1533 | + * We need to store a record of which parts of the origin have | |
1534 | + * been copied to the snapshot device. The snapshot code | |
1535 | + * requires that we copy exception chunks to chunk aligned areas | |
1536 | + * of the COW store. It makes sense therefore, to store the | |
1537 | + * metadata in chunk size blocks. | |
1538 | + * | |
1539 | + * There is no backward or forward compatibility implemented, | |
1540 | + * snapshots with different disk versions than the kernel will | |
1541 | + * not be usable. It is expected that "lvcreate" will blank out | |
1542 | + * the start of a fresh COW device before calling the snapshot | |
1543 | + * constructor. | |
1544 | + * | |
1545 | + * The first chunk of the COW device just contains the header. | |
1546 | + * After this there is a chunk filled with exception metadata, | |
1547 | + * followed by as many exception chunks as can fit in the | |
1548 | + * metadata areas. | |
1549 | + * | |
1550 | + * All on disk structures are in little-endian format. The end | |
1551 | + * of the exceptions info is indicated by an exception with a | |
1552 | + * new_chunk of 0, which is invalid since it would point to the | |
1553 | + * header chunk. | |
1554 | + */ | |
1555 | + | |
1556 | +/* | |
1557 | + * Magic for persistent snapshots: "SnAp" - Feeble isn't it. | |
1558 | + */ | |
1559 | +#define SNAP_MAGIC 0x70416e53 | |
1560 | + | |
1561 | +/* | |
1562 | + * The on-disk version of the metadata. | |
1563 | + */ | |
1564 | +#define SNAPSHOT_DISK_VERSION 1 | |
1565 | + | |
1566 | +struct disk_header { | |
1567 | + uint32_t magic; | |
1568 | + | |
1569 | + /* | |
1570 | + * Is this snapshot valid. There is no way of recovering | |
1571 | + * an invalid snapshot. | |
1572 | + */ | |
1573 | + uint32_t valid; | |
1574 | + | |
1575 | + /* | |
1576 | + * Simple, incrementing version. no backward | |
1577 | + * compatibility. | |
1578 | + */ | |
1579 | + uint32_t version; | |
1580 | + | |
1581 | + /* In sectors */ | |
1582 | + uint32_t chunk_size; | |
1583 | +}; | |
1584 | + | |
1585 | +struct disk_exception { | |
1586 | + uint64_t old_chunk; | |
1587 | + uint64_t new_chunk; | |
1588 | +}; | |
1589 | + | |
1590 | +struct commit_callback { | |
1591 | + void (*callback)(void *, int success); | |
1592 | + void *context; | |
1593 | +}; | |
1594 | + | |
1595 | +/* | |
1596 | + * The top level structure for a persistent exception store. | |
1597 | + */ | |
1598 | +struct pstore { | |
1599 | + struct dm_snapshot *snap; /* up pointer to my snapshot */ | |
1600 | + int version; | |
1601 | + int valid; | |
1602 | + uint32_t chunk_size; | |
1603 | + uint32_t exceptions_per_area; | |
1604 | + | |
1605 | + /* | |
1606 | + * Now that we have an asynchronous kcopyd there is no | |
1607 | + * need for large chunk sizes, so it wont hurt to have a | |
1608 | + * whole chunks worth of metadata in memory at once. | |
1609 | + */ | |
1610 | + void *area; | |
1611 | + | |
1612 | + /* | |
1613 | + * Used to keep track of which metadata area the data in | |
1614 | + * 'chunk' refers to. | |
1615 | + */ | |
1616 | + uint32_t current_area; | |
1617 | + | |
1618 | + /* | |
1619 | + * The next free chunk for an exception. | |
1620 | + */ | |
1621 | + uint32_t next_free; | |
1622 | + | |
1623 | + /* | |
1624 | + * The index of next free exception in the current | |
1625 | + * metadata area. | |
1626 | + */ | |
1627 | + uint32_t current_committed; | |
1628 | + | |
1629 | + atomic_t pending_count; | |
1630 | + uint32_t callback_count; | |
1631 | + struct commit_callback *callbacks; | |
1632 | +}; | |
1633 | + | |
1634 | +static inline unsigned int sectors_to_pages(unsigned int sectors) | |
1635 | +{ | |
1636 | + return sectors / (PAGE_SIZE / SECTOR_SIZE); | |
1637 | +} | |
1638 | + | |
1639 | +static int alloc_area(struct pstore *ps) | |
1640 | +{ | |
1641 | + int r = -ENOMEM; | |
1642 | + size_t i, len, nr_pages; | |
1643 | + struct page *page, *last = NULL; | |
1644 | + | |
1645 | + len = ps->chunk_size << SECTOR_SHIFT; | |
1646 | + | |
1647 | + /* | |
1648 | + * Allocate the chunk_size block of memory that will hold | |
1649 | + * a single metadata area. | |
1650 | + */ | |
1651 | + ps->area = vmalloc(len); | |
1652 | + if (!ps->area) | |
1653 | + return r; | |
1654 | + | |
1655 | + nr_pages = sectors_to_pages(ps->chunk_size); | |
1656 | + | |
1657 | + /* | |
1658 | + * We lock the pages for ps->area into memory since | |
1659 | + * they'll be doing a lot of io. We also chain them | |
1660 | + * together ready for dm-io. | |
1661 | + */ | |
1662 | + for (i = 0; i < nr_pages; i++) { | |
1663 | + page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); | |
1664 | + LockPage(page); | |
1665 | + if (last) | |
1666 | + last->list.next = &page->list; | |
1667 | + last = page; | |
1668 | + } | |
1669 | + | |
1670 | + return 0; | |
1671 | +} | |
1672 | + | |
1673 | +static void free_area(struct pstore *ps) | |
1674 | +{ | |
1675 | + size_t i, nr_pages; | |
1676 | + struct page *page; | |
1677 | + | |
1678 | + nr_pages = sectors_to_pages(ps->chunk_size); | |
1679 | + for (i = 0; i < nr_pages; i++) { | |
1680 | + page = vmalloc_to_page(ps->area + (i * PAGE_SIZE)); | |
1681 | + page->list.next = NULL; | |
1682 | + UnlockPage(page); | |
1683 | + } | |
1684 | + | |
1685 | + vfree(ps->area); | |
1686 | +} | |
1687 | + | |
1688 | +/* | |
1689 | + * Read or write a chunk aligned and sized block of data from a device. | |
1690 | + */ | |
1691 | +static int chunk_io(struct pstore *ps, uint32_t chunk, int rw) | |
1692 | +{ | |
1693 | + struct io_region where; | |
1694 | + unsigned int bits; | |
1695 | + | |
1696 | + where.dev = ps->snap->cow->dev; | |
1697 | + where.sector = ps->chunk_size * chunk; | |
1698 | + where.count = ps->chunk_size; | |
1699 | + | |
1700 | + return dm_io_sync(1, &where, rw, vmalloc_to_page(ps->area), 0, &bits); | |
1701 | +} | |
1702 | + | |
1703 | +/* | |
1704 | + * Read or write a metadata area. Remembering to skip the first | |
1705 | + * chunk which holds the header. | |
1706 | + */ | |
1707 | +static int area_io(struct pstore *ps, uint32_t area, int rw) | |
1708 | +{ | |
1709 | + int r; | |
1710 | + uint32_t chunk; | |
1711 | + | |
1712 | + /* convert a metadata area index to a chunk index */ | |
1713 | + chunk = 1 + ((ps->exceptions_per_area + 1) * area); | |
1714 | + | |
1715 | + r = chunk_io(ps, chunk, rw); | |
1716 | + if (r) | |
1717 | + return r; | |
1718 | + | |
1719 | + ps->current_area = area; | |
1720 | + return 0; | |
1721 | +} | |
1722 | + | |
1723 | +static int zero_area(struct pstore *ps, uint32_t area) | |
1724 | +{ | |
1725 | + memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); | |
1726 | + return area_io(ps, area, WRITE); | |
1727 | +} | |
1728 | + | |
1729 | +static int read_header(struct pstore *ps, int *new_snapshot) | |
1730 | +{ | |
1731 | + int r; | |
1732 | + struct disk_header *dh; | |
1733 | + | |
1734 | + r = chunk_io(ps, 0, READ); | |
1735 | + if (r) | |
1736 | + return r; | |
1737 | + | |
1738 | + dh = (struct disk_header *) ps->area; | |
1739 | + | |
1740 | + if (le32_to_cpu(dh->magic) == 0) { | |
1741 | + *new_snapshot = 1; | |
1742 | + | |
1743 | + } else if (le32_to_cpu(dh->magic) == SNAP_MAGIC) { | |
1744 | + *new_snapshot = 0; | |
1745 | + ps->valid = le32_to_cpu(dh->valid); | |
1746 | + ps->version = le32_to_cpu(dh->version); | |
1747 | + ps->chunk_size = le32_to_cpu(dh->chunk_size); | |
1748 | + | |
1749 | + } else { | |
1750 | + DMWARN("Invalid/corrupt snapshot"); | |
1751 | + r = -ENXIO; | |
1752 | + } | |
1753 | + | |
1754 | + return r; | |
1755 | +} | |
1756 | + | |
1757 | +static int write_header(struct pstore *ps) | |
1758 | +{ | |
1759 | + struct disk_header *dh; | |
1760 | + | |
1761 | + memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT); | |
1762 | + | |
1763 | + dh = (struct disk_header *) ps->area; | |
1764 | + dh->magic = cpu_to_le32(SNAP_MAGIC); | |
1765 | + dh->valid = cpu_to_le32(ps->valid); | |
1766 | + dh->version = cpu_to_le32(ps->version); | |
1767 | + dh->chunk_size = cpu_to_le32(ps->chunk_size); | |
1768 | + | |
1769 | + return chunk_io(ps, 0, WRITE); | |
1770 | +} | |
1771 | + | |
1772 | +/* | |
1773 | + * Access functions for the disk exceptions, these do the endian conversions. | |
1774 | + */ | |
1775 | +static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) | |
1776 | +{ | |
1777 | + if (index >= ps->exceptions_per_area) | |
1778 | + return NULL; | |
1779 | + | |
1780 | + return ((struct disk_exception *) ps->area) + index; | |
1781 | +} | |
1782 | + | |
1783 | +static int read_exception(struct pstore *ps, | |
1784 | + uint32_t index, struct disk_exception *result) | |
1785 | +{ | |
1786 | + struct disk_exception *e; | |
1787 | + | |
1788 | + e = get_exception(ps, index); | |
1789 | + if (!e) | |
1790 | + return -EINVAL; | |
1791 | + | |
1792 | + /* copy it */ | |
1793 | + result->old_chunk = le64_to_cpu(e->old_chunk); | |
1794 | + result->new_chunk = le64_to_cpu(e->new_chunk); | |
1795 | + | |
1796 | + return 0; | |
1797 | +} | |
1798 | + | |
1799 | +static int write_exception(struct pstore *ps, | |
1800 | + uint32_t index, struct disk_exception *de) | |
1801 | +{ | |
1802 | + struct disk_exception *e; | |
1803 | + | |
1804 | + e = get_exception(ps, index); | |
1805 | + if (!e) | |
1806 | + return -EINVAL; | |
1807 | + | |
1808 | + /* copy it */ | |
1809 | + e->old_chunk = cpu_to_le64(de->old_chunk); | |
1810 | + e->new_chunk = cpu_to_le64(de->new_chunk); | |
1811 | + | |
1812 | + return 0; | |
1813 | +} | |
1814 | + | |
1815 | +/* | |
1816 | + * Registers the exceptions that are present in the current area. | |
1817 | + * 'full' is filled in to indicate if the area has been | |
1818 | + * filled. | |
1819 | + */ | |
1820 | +static int insert_exceptions(struct pstore *ps, int *full) | |
1821 | +{ | |
1822 | + int r; | |
1823 | + unsigned int i; | |
1824 | + struct disk_exception de; | |
1825 | + | |
1826 | + /* presume the area is full */ | |
1827 | + *full = 1; | |
1828 | + | |
1829 | + for (i = 0; i < ps->exceptions_per_area; i++) { | |
1830 | + r = read_exception(ps, i, &de); | |
1831 | + | |
1832 | + if (r) | |
1833 | + return r; | |
1834 | + | |
1835 | + /* | |
1836 | + * If the new_chunk is pointing at the start of | |
1837 | + * the COW device, where the first metadata area | |
1838 | + * is we know that we've hit the end of the | |
1839 | + * exceptions. Therefore the area is not full. | |
1840 | + */ | |
1841 | + if (de.new_chunk == 0LL) { | |
1842 | + ps->current_committed = i; | |
1843 | + *full = 0; | |
1844 | + break; | |
1845 | + } | |
1846 | + | |
1847 | + /* | |
1848 | + * Keep track of the start of the free chunks. | |
1849 | + */ | |
1850 | + if (ps->next_free <= de.new_chunk) | |
1851 | + ps->next_free = de.new_chunk + 1; | |
1852 | + | |
1853 | + /* | |
1854 | + * Otherwise we add the exception to the snapshot. | |
1855 | + */ | |
1856 | + r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); | |
1857 | + if (r) | |
1858 | + return r; | |
1859 | + } | |
1860 | + | |
1861 | + return 0; | |
1862 | +} | |
1863 | + | |
1864 | +static int read_exceptions(struct pstore *ps) | |
1865 | +{ | |
1866 | + uint32_t area; | |
1867 | + int r, full = 1; | |
1868 | + | |
1869 | + /* | |
1870 | + * Keeping reading chunks and inserting exceptions until | |
1871 | + * we find a partially full area. | |
1872 | + */ | |
1873 | + for (area = 0; full; area++) { | |
1874 | + r = area_io(ps, area, READ); | |
1875 | + if (r) | |
1876 | + return r; | |
1877 | + | |
1878 | + r = insert_exceptions(ps, &full); | |
1879 | + if (r) | |
1880 | + return r; | |
1881 | + } | |
1882 | + | |
1883 | + return 0; | |
1884 | +} | |
1885 | + | |
1886 | +static inline struct pstore *get_info(struct exception_store *store) | |
1887 | +{ | |
1888 | + return (struct pstore *) store->context; | |
1889 | +} | |
1890 | + | |
1891 | +static void persistent_fraction_full(struct exception_store *store, | |
1892 | + sector_t *numerator, sector_t *denominator) | |
1893 | +{ | |
1894 | + *numerator = get_info(store)->next_free * store->snap->chunk_size; | |
1895 | + *denominator = get_dev_size(store->snap->cow->dev); | |
1896 | +} | |
1897 | + | |
1898 | +static void persistent_destroy(struct exception_store *store) | |
1899 | +{ | |
1900 | + struct pstore *ps = get_info(store); | |
1901 | + | |
1902 | + dm_io_put(sectors_to_pages(ps->chunk_size)); | |
1903 | + vfree(ps->callbacks); | |
1904 | + free_area(ps); | |
1905 | + kfree(ps); | |
1906 | +} | |
1907 | + | |
1908 | +static int persistent_read_metadata(struct exception_store *store) | |
1909 | +{ | |
1910 | + int r, new_snapshot; | |
1911 | + struct pstore *ps = get_info(store); | |
1912 | + | |
1913 | + /* | |
1914 | + * Read the snapshot header. | |
1915 | + */ | |
1916 | + r = read_header(ps, &new_snapshot); | |
1917 | + if (r) | |
1918 | + return r; | |
1919 | + | |
1920 | + /* | |
1921 | + * Do we need to setup a new snapshot ? | |
1922 | + */ | |
1923 | + if (new_snapshot) { | |
1924 | + r = write_header(ps); | |
1925 | + if (r) { | |
1926 | + DMWARN("write_header failed"); | |
1927 | + return r; | |
1928 | + } | |
1929 | + | |
1930 | + r = zero_area(ps, 0); | |
1931 | + if (r) { | |
1932 | + DMWARN("zero_area(0) failed"); | |
1933 | + return r; | |
1934 | + } | |
1935 | + | |
1936 | + } else { | |
1937 | + /* | |
1938 | + * Sanity checks. | |
1939 | + */ | |
1940 | + if (!ps->valid) { | |
1941 | + DMWARN("snapshot is marked invalid"); | |
1942 | + return -EINVAL; | |
1943 | + } | |
1944 | + | |
1945 | + if (ps->version != SNAPSHOT_DISK_VERSION) { | |
1946 | + DMWARN("unable to handle snapshot disk version %d", | |
1947 | + ps->version); | |
1948 | + return -EINVAL; | |
1949 | + } | |
1950 | + | |
1951 | + /* | |
1952 | + * Read the metadata. | |
1953 | + */ | |
1954 | + r = read_exceptions(ps); | |
1955 | + if (r) | |
1956 | + return r; | |
1957 | + } | |
1958 | + | |
1959 | + return 0; | |
1960 | +} | |
1961 | + | |
1962 | +static int persistent_prepare(struct exception_store *store, | |
1963 | + struct exception *e) | |
1964 | +{ | |
1965 | + struct pstore *ps = get_info(store); | |
1966 | + uint32_t stride; | |
1967 | + sector_t size = get_dev_size(store->snap->cow->dev); | |
1968 | + | |
1969 | + /* Is there enough room ? */ | |
1970 | + if (size < ((ps->next_free + 1) * store->snap->chunk_size)) | |
1971 | + return -ENOSPC; | |
1972 | + | |
1973 | + e->new_chunk = ps->next_free; | |
1974 | + | |
1975 | + /* | |
1976 | + * Move onto the next free pending, making sure to take | |
1977 | + * into account the location of the metadata chunks. | |
1978 | + */ | |
1979 | + stride = (ps->exceptions_per_area + 1); | |
1980 | + if ((++ps->next_free % stride) == 1) | |
1981 | + ps->next_free++; | |
1982 | + | |
1983 | + atomic_inc(&ps->pending_count); | |
1984 | + return 0; | |
1985 | +} | |
1986 | + | |
1987 | +static void persistent_commit(struct exception_store *store, | |
1988 | + struct exception *e, | |
1989 | + void (*callback) (void *, int success), | |
1990 | + void *callback_context) | |
1991 | +{ | |
1992 | + int r; | |
1993 | + unsigned int i; | |
1994 | + struct pstore *ps = get_info(store); | |
1995 | + struct disk_exception de; | |
1996 | + struct commit_callback *cb; | |
1997 | + | |
1998 | + de.old_chunk = e->old_chunk; | |
1999 | + de.new_chunk = e->new_chunk; | |
2000 | + write_exception(ps, ps->current_committed++, &de); | |
2001 | + | |
2002 | + /* | |
2003 | + * Add the callback to the back of the array. This code | |
2004 | + * is the only place where the callback array is | |
2005 | + * manipulated, and we know that it will never be called | |
2006 | + * multiple times concurrently. | |
2007 | + */ | |
2008 | + cb = ps->callbacks + ps->callback_count++; | |
2009 | + cb->callback = callback; | |
2010 | + cb->context = callback_context; | |
2011 | + | |
2012 | + /* | |
2013 | + * If there are no more exceptions in flight, or we have | |
2014 | + * filled this metadata area we commit the exceptions to | |
2015 | + * disk. | |
2016 | + */ | |
2017 | + if (atomic_dec_and_test(&ps->pending_count) || | |
2018 | + (ps->current_committed == ps->exceptions_per_area)) { | |
2019 | + r = area_io(ps, ps->current_area, WRITE); | |
2020 | + if (r) | |
2021 | + ps->valid = 0; | |
2022 | + | |
2023 | + for (i = 0; i < ps->callback_count; i++) { | |
2024 | + cb = ps->callbacks + i; | |
2025 | + cb->callback(cb->context, r == 0 ? 1 : 0); | |
2026 | + } | |
2027 | + | |
2028 | + ps->callback_count = 0; | |
2029 | + } | |
2030 | + | |
2031 | + /* | |
2032 | + * Have we completely filled the current area ? | |
2033 | + */ | |
2034 | + if (ps->current_committed == ps->exceptions_per_area) { | |
2035 | + ps->current_committed = 0; | |
2036 | + r = zero_area(ps, ps->current_area + 1); | |
2037 | + if (r) | |
2038 | + ps->valid = 0; | |
2039 | + } | |
2040 | +} | |
2041 | + | |
2042 | +static void persistent_drop(struct exception_store *store) | |
2043 | +{ | |
2044 | + struct pstore *ps = get_info(store); | |
2045 | + | |
2046 | + ps->valid = 0; | |
2047 | + if (write_header(ps)) | |
2048 | + DMWARN("write header failed"); | |
2049 | +} | |
2050 | + | |
2051 | +int dm_create_persistent(struct exception_store *store, uint32_t chunk_size) | |
2052 | +{ | |
2053 | + int r; | |
2054 | + struct pstore *ps; | |
2055 | + | |
2056 | + r = dm_io_get(sectors_to_pages(chunk_size)); | |
2057 | + if (r) | |
2058 | + return r; | |
2059 | + | |
2060 | + /* allocate the pstore */ | |
2061 | + ps = kmalloc(sizeof(*ps), GFP_KERNEL); | |
2062 | + if (!ps) { | |
2063 | + r = -ENOMEM; | |
2064 | + goto bad; | |
2065 | + } | |
2066 | + | |
2067 | + ps->snap = store->snap; | |
2068 | + ps->valid = 1; | |
2069 | + ps->version = SNAPSHOT_DISK_VERSION; | |
2070 | + ps->chunk_size = chunk_size; | |
2071 | + ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) / | |
2072 | + sizeof(struct disk_exception); | |
2073 | + ps->next_free = 2; /* skipping the header and first area */ | |
2074 | + ps->current_committed = 0; | |
2075 | + | |
2076 | + r = alloc_area(ps); | |
2077 | + if (r) | |
2078 | + goto bad; | |
2079 | + | |
2080 | + /* | |
2081 | + * Allocate space for all the callbacks. | |
2082 | + */ | |
2083 | + ps->callback_count = 0; | |
2084 | + atomic_set(&ps->pending_count, 0); | |
2085 | + ps->callbacks = dm_vcalloc(ps->exceptions_per_area, | |
2086 | + sizeof(*ps->callbacks)); | |
2087 | + | |
2088 | + if (!ps->callbacks) { | |
2089 | + r = -ENOMEM; | |
2090 | + goto bad; | |
2091 | + } | |
2092 | + | |
2093 | + store->destroy = persistent_destroy; | |
2094 | + store->read_metadata = persistent_read_metadata; | |
2095 | + store->prepare_exception = persistent_prepare; | |
2096 | + store->commit_exception = persistent_commit; | |
2097 | + store->drop_snapshot = persistent_drop; | |
2098 | + store->fraction_full = persistent_fraction_full; | |
2099 | + store->context = ps; | |
2100 | + | |
2101 | + return 0; | |
2102 | + | |
2103 | + bad: | |
2104 | + dm_io_put(sectors_to_pages(chunk_size)); | |
2105 | + if (ps) { | |
2106 | + if (ps->callbacks) | |
2107 | + vfree(ps->callbacks); | |
2108 | + | |
2109 | + kfree(ps); | |
2110 | + } | |
2111 | + return r; | |
2112 | +} | |
2113 | + | |
2114 | +/*----------------------------------------------------------------- | |
2115 | + * Implementation of the store for non-persistent snapshots. | |
2116 | + *---------------------------------------------------------------*/ | |
2117 | +struct transient_c { | |
2118 | + sector_t next_free; | |
2119 | +}; | |
2120 | + | |
2121 | +void transient_destroy(struct exception_store *store) | |
2122 | +{ | |
2123 | + kfree(store->context); | |
2124 | +} | |
2125 | + | |
2126 | +int transient_read_metadata(struct exception_store *store) | |
2127 | +{ | |
2128 | + return 0; | |
2129 | +} | |
2130 | + | |
2131 | +int transient_prepare(struct exception_store *store, struct exception *e) | |
2132 | +{ | |
2133 | + struct transient_c *tc = (struct transient_c *) store->context; | |
2134 | + sector_t size = get_dev_size(store->snap->cow->dev); | |
2135 | + | |
2136 | + if (size < (tc->next_free + store->snap->chunk_size)) | |
2137 | + return -1; | |
2138 | + | |
2139 | + e->new_chunk = sector_to_chunk(store->snap, tc->next_free); | |
2140 | + tc->next_free += store->snap->chunk_size; | |
2141 | + | |
2142 | + return 0; | |
2143 | +} | |
2144 | + | |
2145 | +void transient_commit(struct exception_store *store, | |
2146 | + struct exception *e, | |
2147 | + void (*callback) (void *, int success), | |
2148 | + void *callback_context) | |
2149 | +{ | |
2150 | + /* Just succeed */ | |
2151 | + callback(callback_context, 1); | |
2152 | +} | |
2153 | + | |
2154 | +static void transient_fraction_full(struct exception_store *store, | |
2155 | + sector_t *numerator, sector_t *denominator) | |
2156 | +{ | |
2157 | + *numerator = ((struct transient_c *) store->context)->next_free; | |
2158 | + *denominator = get_dev_size(store->snap->cow->dev); | |
2159 | +} | |
2160 | + | |
2161 | +int dm_create_transient(struct exception_store *store, | |
2162 | + struct dm_snapshot *s, int blocksize) | |
2163 | +{ | |
2164 | + struct transient_c *tc; | |
2165 | + | |
2166 | + memset(store, 0, sizeof(*store)); | |
2167 | + store->destroy = transient_destroy; | |
2168 | + store->read_metadata = transient_read_metadata; | |
2169 | + store->prepare_exception = transient_prepare; | |
2170 | + store->commit_exception = transient_commit; | |
2171 | + store->fraction_full = transient_fraction_full; | |
2172 | + store->snap = s; | |
2173 | + | |
2174 | + tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); | |
2175 | + if (!tc) | |
2176 | + return -ENOMEM; | |
2177 | + | |
2178 | + tc->next_free = 0; | |
2179 | + store->context = tc; | |
2180 | + | |
2181 | + return 0; | |
2182 | +} | |
2183 | diff -urN linux-2.4.24.org/drivers/md/dm.h linux-2.4.24/drivers/md/dm.h | |
2184 | --- linux-2.4.24.org/drivers/md/dm.h 1970-01-01 01:00:00.000000000 +0100 | |
2185 | +++ linux-2.4.24/drivers/md/dm.h 2004-01-18 15:01:29.219471722 +0100 | |
2186 | @@ -0,0 +1,176 @@ | |
2187 | +/* | |
2188 | + * Internal header file for device mapper | |
2189 | + * | |
2190 | + * Copyright (C) 2001, 2002 Sistina Software | |
2191 | + * | |
2192 | + * This file is released under the LGPL. | |
2193 | + */ | |
2194 | + | |
2195 | +#ifndef DM_INTERNAL_H | |
2196 | +#define DM_INTERNAL_H | |
2197 | + | |
2198 | +#include <linux/fs.h> | |
2199 | +#include <linux/device-mapper.h> | |
2200 | +#include <linux/list.h> | |
2201 | +#include <linux/blkdev.h> | |
2202 | + | |
2203 | +#define DM_NAME "device-mapper" | |
2204 | +#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x) | |
2205 | +#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x) | |
2206 | +#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x) | |
2207 | + | |
2208 | +/* | |
2209 | + * FIXME: I think this should be with the definition of sector_t | |
2210 | + * in types.h. | |
2211 | + */ | |
2212 | +#ifdef CONFIG_LBD | |
2213 | +#define SECTOR_FORMAT "%Lu" | |
2214 | +#else | |
2215 | +#define SECTOR_FORMAT "%lu" | |
2216 | +#endif | |
2217 | + | |
2218 | +#define SECTOR_SHIFT 9 | |
2219 | +#define SECTOR_SIZE (1 << SECTOR_SHIFT) | |
2220 | + | |
2221 | +extern struct block_device_operations dm_blk_dops; | |
2222 | + | |
2223 | +/* | |
2224 | + * List of devices that a metadevice uses and should open/close. | |
2225 | + */ | |
2226 | +struct dm_dev { | |
2227 | + struct list_head list; | |
2228 | + | |
2229 | + atomic_t count; | |
2230 | + int mode; | |
2231 | + kdev_t dev; | |
2232 | + struct block_device *bdev; | |
2233 | +}; | |
2234 | + | |
2235 | +struct dm_table; | |
2236 | +struct mapped_device; | |
2237 | + | |
2238 | +/*----------------------------------------------------------------- | |
2239 | + * Functions for manipulating a struct mapped_device. | |
2240 | + * Drop the reference with dm_put when you finish with the object. | |
2241 | + *---------------------------------------------------------------*/ | |
2242 | +int dm_create(kdev_t dev, struct mapped_device **md); | |
2243 | + | |
2244 | +/* | |
2245 | + * Reference counting for md. | |
2246 | + */ | |
2247 | +void dm_get(struct mapped_device *md); | |
2248 | +void dm_put(struct mapped_device *md); | |
2249 | + | |
2250 | +/* | |
2251 | + * A device can still be used while suspended, but I/O is deferred. | |
2252 | + */ | |
2253 | +int dm_suspend(struct mapped_device *md); | |
2254 | +int dm_resume(struct mapped_device *md); | |
2255 | + | |
2256 | +/* | |
2257 | + * The device must be suspended before calling this method. | |
2258 | + */ | |
2259 | +int dm_swap_table(struct mapped_device *md, struct dm_table *t); | |
2260 | + | |
2261 | +/* | |
2262 | + * Drop a reference on the table when you've finished with the | |
2263 | + * result. | |
2264 | + */ | |
2265 | +struct dm_table *dm_get_table(struct mapped_device *md); | |
2266 | + | |
2267 | +/* | |
2268 | + * Event functions. | |
2269 | + */ | |
2270 | +uint32_t dm_get_event_nr(struct mapped_device *md); | |
2271 | +int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq, | |
2272 | + uint32_t event_nr); | |
2273 | +void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq); | |
2274 | + | |
2275 | +/* | |
2276 | + * Info functions. | |
2277 | + */ | |
2278 | +kdev_t dm_kdev(struct mapped_device *md); | |
2279 | +int dm_suspended(struct mapped_device *md); | |
2280 | + | |
2281 | +/*----------------------------------------------------------------- | |
2282 | + * Functions for manipulating a table. Tables are also reference | |
2283 | + * counted. | |
2284 | + *---------------------------------------------------------------*/ | |
2285 | +int dm_table_create(struct dm_table **result, int mode, unsigned num_targets); | |
2286 | + | |
2287 | +void dm_table_get(struct dm_table *t); | |
2288 | +void dm_table_put(struct dm_table *t); | |
2289 | + | |
2290 | +int dm_table_add_target(struct dm_table *t, const char *type, | |
2291 | + sector_t start, sector_t len, char *params); | |
2292 | +int dm_table_complete(struct dm_table *t); | |
2293 | +void dm_table_event_callback(struct dm_table *t, | |
2294 | + void (*fn)(void *), void *context); | |
2295 | +void dm_table_event(struct dm_table *t); | |
2296 | +sector_t dm_table_get_size(struct dm_table *t); | |
2297 | +struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); | |
2298 | +struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); | |
2299 | +unsigned int dm_table_get_num_targets(struct dm_table *t); | |
2300 | +struct list_head *dm_table_get_devices(struct dm_table *t); | |
2301 | +int dm_table_get_mode(struct dm_table *t); | |
2302 | +void dm_table_suspend_targets(struct dm_table *t); | |
2303 | +void dm_table_resume_targets(struct dm_table *t); | |
2304 | + | |
2305 | +/*----------------------------------------------------------------- | |
2306 | + * A registry of target types. | |
2307 | + *---------------------------------------------------------------*/ | |
2308 | +int dm_target_init(void); | |
2309 | +void dm_target_exit(void); | |
2310 | +struct target_type *dm_get_target_type(const char *name); | |
2311 | +void dm_put_target_type(struct target_type *t); | |
2312 | + | |
2313 | + | |
2314 | +/*----------------------------------------------------------------- | |
2315 | + * Useful inlines. | |
2316 | + *---------------------------------------------------------------*/ | |
2317 | +static inline int array_too_big(unsigned long fixed, unsigned long obj, | |
2318 | + unsigned long num) | |
2319 | +{ | |
2320 | + return (num > (ULONG_MAX - fixed) / obj); | |
2321 | +} | |
2322 | + | |
2323 | +/* | |
2324 | + * ceiling(n / size) * size | |
2325 | + */ | |
2326 | +static inline unsigned long dm_round_up(unsigned long n, unsigned long size) | |
2327 | +{ | |
2328 | + unsigned long r = n % size; | |
2329 | + return n + (r ? (size - r) : 0); | |
2330 | +} | |
2331 | + | |
2332 | +/* | |
2333 | + * Ceiling(n / size) | |
2334 | + */ | |
2335 | +static inline unsigned long dm_div_up(unsigned long n, unsigned long size) | |
2336 | +{ | |
2337 | + return dm_round_up(n, size) / size; | |
2338 | +} | |
2339 | + | |
2340 | +const char *dm_kdevname(kdev_t dev); | |
2341 | +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size); | |
2342 | + | |
2343 | +/* | |
2344 | + * The device-mapper can be driven through one of two interfaces; | |
2345 | + * ioctl or filesystem, depending which patch you have applied. | |
2346 | + */ | |
2347 | +int dm_interface_init(void); | |
2348 | +void dm_interface_exit(void); | |
2349 | + | |
2350 | +/* | |
2351 | + * Targets for linear and striped mappings | |
2352 | + */ | |
2353 | +int dm_linear_init(void); | |
2354 | +void dm_linear_exit(void); | |
2355 | + | |
2356 | +int dm_stripe_init(void); | |
2357 | +void dm_stripe_exit(void); | |
2358 | + | |
2359 | +int dm_snapshot_init(void); | |
2360 | +void dm_snapshot_exit(void); | |
2361 | + | |
2362 | +#endif | |
2363 | diff -urN linux-2.4.24.org/drivers/md/dm-io.c linux-2.4.24/drivers/md/dm-io.c | |
2364 | --- linux-2.4.24.org/drivers/md/dm-io.c 1970-01-01 01:00:00.000000000 +0100 | |
2365 | +++ linux-2.4.24/drivers/md/dm-io.c 2004-01-18 15:01:25.790191115 +0100 | |
2366 | @@ -0,0 +1,361 @@ | |
2367 | +/* | |
2368 | + * Copyright (C) 2003 Sistina Software | |
2369 | + * | |
2370 | + * This file is released under the GPL. | |
2371 | + */ | |
2372 | + | |
2373 | +#include "dm-io.h" | |
2374 | + | |
2375 | +#include <linux/mempool.h> | |
2376 | +#include <linux/module.h> | |
2377 | +#include <linux/slab.h> | |
2378 | +#include <linux/sched.h> | |
2379 | +#include <linux/bitops.h> | |
2380 | + | |
2381 | +/* FIXME: can we shrink this ? */ | |
2382 | +struct io_context { | |
2383 | + int rw; | |
2384 | + unsigned int error; | |
2385 | + atomic_t count; | |
2386 | + struct task_struct *sleeper; | |
2387 | + io_notify_fn callback; | |
2388 | + void *context; | |
2389 | +}; | |
2390 | + | |
2391 | +/* | |
2392 | + * We maintain a pool of buffer heads for dispatching the io. | |
2393 | + */ | |
2394 | +static unsigned int _num_bhs; | |
2395 | +static mempool_t *_buffer_pool; | |
2396 | + | |
2397 | +/* | |
2398 | + * io contexts are only dynamically allocated for asynchronous | |
2399 | + * io. Since async io is likely to be the majority of io we'll | |
2400 | + * have the same number of io contexts as buffer heads ! (FIXME: | |
2401 | + * must reduce this). | |
2402 | + */ | |
2403 | +mempool_t *_io_pool; | |
2404 | + | |
2405 | +static void *alloc_bh(int gfp_mask, void *pool_data) | |
2406 | +{ | |
2407 | + struct buffer_head *bh; | |
2408 | + | |
2409 | + bh = kmem_cache_alloc(bh_cachep, gfp_mask); | |
2410 | + if (bh) { | |
2411 | + bh->b_reqnext = NULL; | |
2412 | + init_waitqueue_head(&bh->b_wait); | |
2413 | + INIT_LIST_HEAD(&bh->b_inode_buffers); | |
2414 | + } | |
2415 | + | |
2416 | + return bh; | |
2417 | +} | |
2418 | + | |
2419 | +static void *alloc_io(int gfp_mask, void *pool_data) | |
2420 | +{ | |
2421 | + return kmalloc(sizeof(struct io_context), gfp_mask); | |
2422 | +} | |
2423 | + | |
2424 | +static void free_io(void *element, void *pool_data) | |
2425 | +{ | |
2426 | + kfree(element); | |
2427 | +} | |
2428 | + | |
2429 | +static unsigned int pages_to_buffers(unsigned int pages) | |
2430 | +{ | |
2431 | + return 4 * pages; /* too many ? */ | |
2432 | +} | |
2433 | + | |
2434 | +static int resize_pool(unsigned int new_bhs) | |
2435 | +{ | |
2436 | + int r = 0; | |
2437 | + | |
2438 | + if (_buffer_pool) { | |
2439 | + if (new_bhs == 0) { | |
2440 | + /* free off the pools */ | |
2441 | + mempool_destroy(_buffer_pool); | |
2442 | + mempool_destroy(_io_pool); | |
2443 | + _buffer_pool = _io_pool = NULL; | |
2444 | + } else { | |
2445 | + /* resize the pools */ | |
2446 | + r = mempool_resize(_buffer_pool, new_bhs, GFP_KERNEL); | |
2447 | + if (!r) | |
2448 | + r = mempool_resize(_io_pool, | |
2449 | + new_bhs, GFP_KERNEL); | |
2450 | + } | |
2451 | + } else { | |
2452 | + /* create new pools */ | |
2453 | + _buffer_pool = mempool_create(new_bhs, alloc_bh, | |
2454 | + mempool_free_slab, bh_cachep); | |
2455 | + if (!_buffer_pool) | |
2456 | + r = -ENOMEM; | |
2457 | + | |
2458 | + _io_pool = mempool_create(new_bhs, alloc_io, free_io, NULL); | |
2459 | + if (!_io_pool) { | |
2460 | + mempool_destroy(_buffer_pool); | |
2461 | + _buffer_pool = NULL; | |
2462 | + r = -ENOMEM; | |
2463 | + } | |
2464 | + } | |
2465 | + | |
2466 | + if (!r) | |
2467 | + _num_bhs = new_bhs; | |
2468 | + | |
2469 | + return r; | |
2470 | +} | |
2471 | + | |
2472 | +int dm_io_get(unsigned int num_pages) | |
2473 | +{ | |
2474 | + return resize_pool(_num_bhs + pages_to_buffers(num_pages)); | |
2475 | +} | |
2476 | + | |
2477 | +void dm_io_put(unsigned int num_pages) | |
2478 | +{ | |
2479 | + resize_pool(_num_bhs - pages_to_buffers(num_pages)); | |
2480 | +} | |
2481 | + | |
2482 | +/*----------------------------------------------------------------- | |
2483 | + * We need to keep track of which region a buffer is doing io | |
2484 | + * for. In order to save a memory allocation we store this in an | |
2485 | + * unused field of the buffer head, and provide these access | |
2486 | + * functions. | |
2487 | + * | |
2488 | + * FIXME: add compile time check that an unsigned int can fit | |
2489 | + * into a pointer. | |
2490 | + * | |
2491 | + *---------------------------------------------------------------*/ | |
2492 | +static inline void bh_set_region(struct buffer_head *bh, unsigned int region) | |
2493 | +{ | |
2494 | + bh->b_journal_head = (void *) region; | |
2495 | +} | |
2496 | + | |
2497 | +static inline int bh_get_region(struct buffer_head *bh) | |
2498 | +{ | |
2499 | + return (unsigned int) bh->b_journal_head; | |
2500 | +} | |
2501 | + | |
2502 | +/*----------------------------------------------------------------- | |
2503 | + * We need an io object to keep track of the number of bhs that | |
2504 | + * have been dispatched for a particular io. | |
2505 | + *---------------------------------------------------------------*/ | |
2506 | +static void dec_count(struct io_context *io, unsigned int region, int error) | |
2507 | +{ | |
2508 | + if (error) | |
2509 | + set_bit(region, &io->error); | |
2510 | + | |
2511 | + if (atomic_dec_and_test(&io->count)) { | |
2512 | + if (io->sleeper) | |
2513 | + wake_up_process(io->sleeper); | |
2514 | + | |
2515 | + else { | |
2516 | + int r = io->error; | |
2517 | + io_notify_fn fn = io->callback; | |
2518 | + void *context = io->context; | |
2519 | + | |
2520 | + mempool_free(io, _io_pool); | |
2521 | + fn(r, context); | |
2522 | + } | |
2523 | + } | |
2524 | +} | |
2525 | + | |
2526 | +static void endio(struct buffer_head *bh, int uptodate) | |
2527 | +{ | |
2528 | + struct io_context *io = (struct io_context *) bh->b_private; | |
2529 | + | |
2530 | + if (!uptodate && io->rw != WRITE) { | |
2531 | + /* | |
2532 | + * We need to zero this region, otherwise people | |
2533 | + * like kcopyd may write the arbitrary contents | |
2534 | + * of the page. | |
2535 | + */ | |
2536 | + memset(bh->b_data, 0, bh->b_size); | |
2537 | + } | |
2538 | + | |
2539 | + dec_count((struct io_context *) bh->b_private, | |
2540 | + bh_get_region(bh), !uptodate); | |
2541 | + mempool_free(bh, _buffer_pool); | |
2542 | +} | |
2543 | + | |
2544 | +/* | |
2545 | + * Primitives for alignment calculations. | |
2546 | + */ | |
2547 | +int fls(unsigned n) | |
2548 | +{ | |
2549 | + return generic_fls32(n); | |
2550 | +} | |
2551 | + | |
2552 | +static inline int log2_floor(unsigned n) | |
2553 | +{ | |
2554 | + return ffs(n) - 1; | |
2555 | +} | |
2556 | + | |
2557 | +static inline int log2_align(unsigned n) | |
2558 | +{ | |
2559 | + return fls(n) - 1; | |
2560 | +} | |
2561 | + | |
2562 | +/* | |
2563 | + * Returns the next block for io. | |
2564 | + */ | |
2565 | +static int do_page(kdev_t dev, sector_t *block, sector_t end_block, | |
2566 | + unsigned int block_size, | |
2567 | + struct page *p, unsigned int offset, | |
2568 | + unsigned int region, struct io_context *io) | |
2569 | +{ | |
2570 | + struct buffer_head *bh; | |
2571 | + sector_t b = *block; | |
2572 | + sector_t blocks_per_page = PAGE_SIZE / block_size; | |
2573 | + unsigned int this_size; /* holds the size of the current io */ | |
2574 | + sector_t len; | |
2575 | + | |
2576 | + if (!blocks_per_page) { | |
2577 | + DMERR("dm-io: PAGE_SIZE (%lu) < block_size (%u) unsupported", | |
2578 | + PAGE_SIZE, block_size); | |
2579 | + return 0; | |
2580 | + } | |
2581 | + | |
2582 | + while ((offset < PAGE_SIZE) && (b != end_block)) { | |
2583 | + bh = mempool_alloc(_buffer_pool, GFP_NOIO); | |
2584 | + init_buffer(bh, endio, io); | |
2585 | + bh_set_region(bh, region); | |
2586 | + | |
2587 | + /* | |
2588 | + * Block size must be a power of 2 and aligned | |
2589 | + * correctly. | |
2590 | + */ | |
2591 | + | |
2592 | + len = min(end_block - b, blocks_per_page); | |
2593 | + len = min(len, blocks_per_page - offset / block_size); | |
2594 | + | |
2595 | + if (!len) { | |
2596 | + DMERR("dm-io: Invalid offset/block_size (%u/%u).", | |
2597 | + offset, block_size); | |
2598 | + return 0; | |
2599 | + } | |
2600 | + | |
2601 | + this_size = 1 << log2_align(len); | |
2602 | + if (b) | |
2603 | + this_size = min(this_size, | |
2604 | + (unsigned) 1 << log2_floor(b)); | |
2605 | + | |
2606 | + /* | |
2607 | + * Add in the job offset. | |
2608 | + */ | |
2609 | + bh->b_blocknr = (b / this_size); | |
2610 | + bh->b_size = block_size * this_size; | |
2611 | + set_bh_page(bh, p, offset); | |
2612 | + bh->b_this_page = bh; | |
2613 | + | |
2614 | + bh->b_dev = dev; | |
2615 | + atomic_set(&bh->b_count, 1); | |
2616 | + | |
2617 | + bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) | | |
2618 | + (1 << BH_Lock)); | |
2619 | + | |
2620 | + if (io->rw == WRITE) | |
2621 | + clear_bit(BH_Dirty, &bh->b_state); | |
2622 | + | |
2623 | + atomic_inc(&io->count); | |
2624 | + submit_bh(io->rw, bh); | |
2625 | + | |
2626 | + b += this_size; | |
2627 | + offset += block_size * this_size; | |
2628 | + } | |
2629 | + | |
2630 | + *block = b; | |
2631 | + return (b == end_block); | |
2632 | +} | |
2633 | + | |
2634 | +static void do_region(unsigned int region, struct io_region *where, | |
2635 | + struct page *page, unsigned int offset, | |
2636 | + struct io_context *io) | |
2637 | +{ | |
2638 | + unsigned int block_size = get_hardsect_size(where->dev); | |
2639 | + unsigned int sblock_size = block_size >> 9; | |
2640 | + sector_t block = where->sector / sblock_size; | |
2641 | + sector_t end_block = (where->sector + where->count) / sblock_size; | |
2642 | + | |
2643 | + while (1) { | |
2644 | + if (do_page(where->dev, &block, end_block, block_size, | |
2645 | + page, offset, region, io)) | |
2646 | + break; | |
2647 | + | |
2648 | + offset = 0; /* only offset the first page */ | |
2649 | + | |
2650 | + page = list_entry(page->list.next, struct page, list); | |
2651 | + } | |
2652 | +} | |
2653 | + | |
2654 | +static void dispatch_io(unsigned int num_regions, struct io_region *where, | |
2655 | + struct page *pages, unsigned int offset, | |
2656 | + struct io_context *io) | |
2657 | +{ | |
2658 | + int i; | |
2659 | + | |
2660 | + for (i = 0; i < num_regions; i++) | |
2661 | + if (where[i].count) | |
2662 | + do_region(i, where + i, pages, offset, io); | |
2663 | + | |
2664 | + /* | |
2665 | + * Drop the extra refence that we were holding to avoid | |
2666 | + * the io being completed too early. | |
2667 | + */ | |
2668 | + dec_count(io, 0, 0); | |
2669 | +} | |
2670 | + | |
2671 | +/* | |
2672 | + * Synchronous io | |
2673 | + */ | |
2674 | +int dm_io_sync(unsigned int num_regions, struct io_region *where, | |
2675 | + int rw, struct page *pages, unsigned int offset, | |
2676 | + unsigned int *error_bits) | |
2677 | +{ | |
2678 | + struct io_context io; | |
2679 | + | |
2680 | + BUG_ON(num_regions > 1 && rw != WRITE); | |
2681 | + | |
2682 | + io.rw = rw; | |
2683 | + io.error = 0; | |
2684 | + atomic_set(&io.count, 1); /* see dispatch_io() */ | |
2685 | + io.sleeper = current; | |
2686 | + | |
2687 | + dispatch_io(num_regions, where, pages, offset, &io); | |
2688 | + run_task_queue(&tq_disk); | |
2689 | + | |
2690 | + while (1) { | |
2691 | + set_current_state(TASK_UNINTERRUPTIBLE); | |
2692 | + | |
2693 | + if (!atomic_read(&io.count)) | |
2694 | + break; | |
2695 | + | |
2696 | + schedule(); | |
2697 | + } | |
2698 | + set_current_state(TASK_RUNNING); | |
2699 | + | |
2700 | + *error_bits = io.error; | |
2701 | + return io.error ? -EIO : 0; | |
2702 | +} | |
2703 | + | |
2704 | +/* | |
2705 | + * Asynchronous io | |
2706 | + */ | |
2707 | +int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, | |
2708 | + struct page *pages, unsigned int offset, | |
2709 | + io_notify_fn fn, void *context) | |
2710 | +{ | |
2711 | + struct io_context *io = mempool_alloc(_io_pool, GFP_NOIO); | |
2712 | + | |
2713 | + io->rw = rw; | |
2714 | + io->error = 0; | |
2715 | + atomic_set(&io->count, 1); /* see dispatch_io() */ | |
2716 | + io->sleeper = NULL; | |
2717 | + io->callback = fn; | |
2718 | + io->context = context; | |
2719 | + | |
2720 | + dispatch_io(num_regions, where, pages, offset, io); | |
2721 | + return 0; | |
2722 | +} | |
2723 | + | |
2724 | +EXPORT_SYMBOL(dm_io_get); | |
2725 | +EXPORT_SYMBOL(dm_io_put); | |
2726 | +EXPORT_SYMBOL(dm_io_sync); | |
2727 | +EXPORT_SYMBOL(dm_io_async); | |
2728 | diff -urN linux-2.4.24.org/drivers/md/dm-ioctl.c linux-2.4.24/drivers/md/dm-ioctl.c | |
2729 | --- linux-2.4.24.org/drivers/md/dm-ioctl.c 1970-01-01 01:00:00.000000000 +0100 | |
2730 | +++ linux-2.4.24/drivers/md/dm-ioctl.c 2004-01-18 15:01:17.790869761 +0100 | |
2731 | @@ -0,0 +1,1284 @@ | |
2732 | +/* | |
2733 | + * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. | |
2734 | + * | |
2735 | + * This file is released under the GPL. | |
2736 | + */ | |
2737 | + | |
2738 | +#include "dm.h" | |
2739 | + | |
2740 | +#include <linux/module.h> | |
2741 | +#include <linux/vmalloc.h> | |
2742 | +#include <linux/miscdevice.h> | |
2743 | +#include <linux/dm-ioctl.h> | |
2744 | +#include <linux/init.h> | |
2745 | +#include <linux/wait.h> | |
2746 | +#include <linux/blk.h> | |
2747 | +#include <linux/slab.h> | |
2748 | + | |
2749 | +#include <asm/uaccess.h> | |
2750 | + | |
2751 | +#define DM_DRIVER_EMAIL "dm@uk.sistina.com" | |
2752 | + | |
2753 | +/*----------------------------------------------------------------- | |
2754 | + * The ioctl interface needs to be able to look up devices by | |
2755 | + * name or uuid. | |
2756 | + *---------------------------------------------------------------*/ | |
2757 | +struct hash_cell { | |
2758 | + struct list_head name_list; | |
2759 | + struct list_head uuid_list; | |
2760 | + | |
2761 | + char *name; | |
2762 | + char *uuid; | |
2763 | + struct mapped_device *md; | |
2764 | + struct dm_table *new_map; | |
2765 | + | |
2766 | + /* I hate devfs */ | |
2767 | + devfs_handle_t devfs_entry; | |
2768 | +}; | |
2769 | + | |
2770 | +#define NUM_BUCKETS 64 | |
2771 | +#define MASK_BUCKETS (NUM_BUCKETS - 1) | |
2772 | +static struct list_head _name_buckets[NUM_BUCKETS]; | |
2773 | +static struct list_head _uuid_buckets[NUM_BUCKETS]; | |
2774 | + | |
2775 | +static devfs_handle_t _dev_dir; | |
2776 | +void dm_hash_remove_all(void); | |
2777 | + | |
2778 | +/* | |
2779 | + * Guards access to both hash tables. | |
2780 | + */ | |
2781 | +static DECLARE_RWSEM(_hash_lock); | |
2782 | + | |
2783 | +static void init_buckets(struct list_head *buckets) | |
2784 | +{ | |
2785 | + unsigned int i; | |
2786 | + | |
2787 | + for (i = 0; i < NUM_BUCKETS; i++) | |
2788 | + INIT_LIST_HEAD(buckets + i); | |
2789 | +} | |
2790 | + | |
2791 | +int dm_hash_init(void) | |
2792 | +{ | |
2793 | + init_buckets(_name_buckets); | |
2794 | + init_buckets(_uuid_buckets); | |
2795 | + _dev_dir = devfs_mk_dir(0, DM_DIR, NULL); | |
2796 | + return 0; | |
2797 | +} | |
2798 | + | |
2799 | +void dm_hash_exit(void) | |
2800 | +{ | |
2801 | + dm_hash_remove_all(); | |
2802 | + devfs_unregister(_dev_dir); | |
2803 | +} | |
2804 | + | |
2805 | +/*----------------------------------------------------------------- | |
2806 | + * Hash function: | |
2807 | + * We're not really concerned with the str hash function being | |
2808 | + * fast since it's only used by the ioctl interface. | |
2809 | + *---------------------------------------------------------------*/ | |
2810 | +static unsigned int hash_str(const char *str) | |
2811 | +{ | |
2812 | + const unsigned int hash_mult = 2654435387U; | |
2813 | + unsigned int h = 0; | |
2814 | + | |
2815 | + while (*str) | |
2816 | + h = (h + (unsigned int) *str++) * hash_mult; | |
2817 | + | |
2818 | + return h & MASK_BUCKETS; | |
2819 | +} | |
2820 | + | |
2821 | +/*----------------------------------------------------------------- | |
2822 | + * Code for looking up a device by name | |
2823 | + *---------------------------------------------------------------*/ | |
2824 | +static struct hash_cell *__get_name_cell(const char *str) | |
2825 | +{ | |
2826 | + struct list_head *tmp; | |
2827 | + struct hash_cell *hc; | |
2828 | + unsigned int h = hash_str(str); | |
2829 | + | |
2830 | + list_for_each (tmp, _name_buckets + h) { | |
2831 | + hc = list_entry(tmp, struct hash_cell, name_list); | |
2832 | + if (!strcmp(hc->name, str)) | |
2833 | + return hc; | |
2834 | + } | |
2835 | + | |
2836 | + return NULL; | |
2837 | +} | |
2838 | + | |
2839 | +static struct hash_cell *__get_uuid_cell(const char *str) | |
2840 | +{ | |
2841 | + struct list_head *tmp; | |
2842 | + struct hash_cell *hc; | |
2843 | + unsigned int h = hash_str(str); | |
2844 | + | |
2845 | + list_for_each (tmp, _uuid_buckets + h) { | |
2846 | + hc = list_entry(tmp, struct hash_cell, uuid_list); | |
2847 | + if (!strcmp(hc->uuid, str)) | |
2848 | + return hc; | |
2849 | + } | |
2850 | + | |
2851 | + return NULL; | |
2852 | +} | |
2853 | + | |
2854 | +/*----------------------------------------------------------------- | |
2855 | + * Inserting, removing and renaming a device. | |
2856 | + *---------------------------------------------------------------*/ | |
2857 | +static inline char *kstrdup(const char *str) | |
2858 | +{ | |
2859 | + char *r = kmalloc(strlen(str) + 1, GFP_KERNEL); | |
2860 | + if (r) | |
2861 | + strcpy(r, str); | |
2862 | + return r; | |
2863 | +} | |
2864 | + | |
2865 | +static struct hash_cell *alloc_cell(const char *name, const char *uuid, | |
2866 | + struct mapped_device *md) | |
2867 | +{ | |
2868 | + struct hash_cell *hc; | |
2869 | + | |
2870 | + hc = kmalloc(sizeof(*hc), GFP_KERNEL); | |
2871 | + if (!hc) | |
2872 | + return NULL; | |
2873 | + | |
2874 | + hc->name = kstrdup(name); | |
2875 | + if (!hc->name) { | |
2876 | + kfree(hc); | |
2877 | + return NULL; | |
2878 | + } | |
2879 | + | |
2880 | + if (!uuid) | |
2881 | + hc->uuid = NULL; | |
2882 | + | |
2883 | + else { | |
2884 | + hc->uuid = kstrdup(uuid); | |
2885 | + if (!hc->uuid) { | |
2886 | + kfree(hc->name); | |
2887 | + kfree(hc); | |
2888 | + return NULL; | |
2889 | + } | |
2890 | + } | |
2891 | + | |
2892 | + INIT_LIST_HEAD(&hc->name_list); | |
2893 | + INIT_LIST_HEAD(&hc->uuid_list); | |
2894 | + hc->md = md; | |
2895 | + hc->new_map = NULL; | |
2896 | + return hc; | |
2897 | +} | |
2898 | + | |
2899 | +static void free_cell(struct hash_cell *hc) | |
2900 | +{ | |
2901 | + if (hc) { | |
2902 | + kfree(hc->name); | |
2903 | + kfree(hc->uuid); | |
2904 | + kfree(hc); | |
2905 | + } | |
2906 | +} | |
2907 | + | |
2908 | +/* | |
2909 | + * devfs stuff. | |
2910 | + */ | |
2911 | +static int register_with_devfs(struct hash_cell *hc) | |
2912 | +{ | |
2913 | + kdev_t dev = dm_kdev(hc->md); | |
2914 | + | |
2915 | + hc->devfs_entry = | |
2916 | + devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER, | |
2917 | + major(dev), minor(dev), | |
2918 | + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, | |
2919 | + &dm_blk_dops, NULL); | |
2920 | + | |
2921 | + return 0; | |
2922 | +} | |
2923 | + | |
2924 | +static int unregister_with_devfs(struct hash_cell *hc) | |
2925 | +{ | |
2926 | + devfs_unregister(hc->devfs_entry); | |
2927 | + return 0; | |
2928 | +} | |
2929 | + | |
2930 | +/* | |
2931 | + * The kdev_t and uuid of a device can never change once it is | |
2932 | + * initially inserted. | |
2933 | + */ | |
2934 | +int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md) | |
2935 | +{ | |
2936 | + struct hash_cell *cell; | |
2937 | + | |
2938 | + /* | |
2939 | + * Allocate the new cells. | |
2940 | + */ | |
2941 | + cell = alloc_cell(name, uuid, md); | |
2942 | + if (!cell) | |
2943 | + return -ENOMEM; | |
2944 | + | |
2945 | + /* | |
2946 | + * Insert the cell into both hash tables. | |
2947 | + */ | |
2948 | + down_write(&_hash_lock); | |
2949 | + if (__get_name_cell(name)) | |
2950 | + goto bad; | |
2951 | + | |
2952 | + list_add(&cell->name_list, _name_buckets + hash_str(name)); | |
2953 | + | |
2954 | + if (uuid) { | |
2955 | + if (__get_uuid_cell(uuid)) { | |
2956 | + list_del(&cell->name_list); | |
2957 | + goto bad; | |
2958 | + } | |
2959 | + list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid)); | |
2960 | + } | |
2961 | + register_with_devfs(cell); | |
2962 | + dm_get(md); | |
2963 | + up_write(&_hash_lock); | |
2964 | + | |
2965 | + return 0; | |
2966 | + | |
2967 | + bad: | |
2968 | + up_write(&_hash_lock); | |
2969 | + free_cell(cell); | |
2970 | + return -EBUSY; | |
2971 | +} | |
2972 | + | |
2973 | +void __hash_remove(struct hash_cell *hc) | |
2974 | +{ | |
2975 | + /* remove from the dev hash */ | |
2976 | + list_del(&hc->uuid_list); | |
2977 | + list_del(&hc->name_list); | |
2978 | + unregister_with_devfs(hc); | |
2979 | + dm_put(hc->md); | |
2980 | + if (hc->new_map) | |
2981 | + dm_table_put(hc->new_map); | |
2982 | + free_cell(hc); | |
2983 | +} | |
2984 | + | |
2985 | +void dm_hash_remove_all(void) | |
2986 | +{ | |
2987 | + int i; | |
2988 | + struct hash_cell *hc; | |
2989 | + struct list_head *tmp, *n; | |
2990 | + | |
2991 | + down_write(&_hash_lock); | |
2992 | + for (i = 0; i < NUM_BUCKETS; i++) { | |
2993 | + list_for_each_safe (tmp, n, _name_buckets + i) { | |
2994 | + hc = list_entry(tmp, struct hash_cell, name_list); | |
2995 | + __hash_remove(hc); | |
2996 | + } | |
2997 | + } | |
2998 | + up_write(&_hash_lock); | |
2999 | +} | |
3000 | + | |
3001 | +int dm_hash_rename(const char *old, const char *new) | |
3002 | +{ | |
3003 | + char *new_name, *old_name; | |
3004 | + struct hash_cell *hc; | |
3005 | + | |
3006 | + /* | |
3007 | + * duplicate new. | |
3008 | + */ | |
3009 | + new_name = kstrdup(new); | |
3010 | + if (!new_name) | |
3011 | + return -ENOMEM; | |
3012 | + | |
3013 | + down_write(&_hash_lock); | |
3014 | + | |
3015 | + /* | |
3016 | + * Is new free ? | |
3017 | + */ | |
3018 | + hc = __get_name_cell(new); | |
3019 | + if (hc) { | |
3020 | + DMWARN("asked to rename to an already existing name %s -> %s", | |
3021 | + old, new); | |
3022 | + up_write(&_hash_lock); | |
3023 | + kfree(new_name); | |
3024 | + return -EBUSY; | |
3025 | + } | |
3026 | + | |
3027 | + /* | |
3028 | + * Is there such a device as 'old' ? | |
3029 | + */ | |
3030 | + hc = __get_name_cell(old); | |
3031 | + if (!hc) { | |
3032 | + DMWARN("asked to rename a non existent device %s -> %s", | |
3033 | + old, new); | |
3034 | + up_write(&_hash_lock); | |
3035 | + kfree(new_name); | |
3036 | + return -ENXIO; | |
3037 | + } | |
3038 | + | |
3039 | + /* | |
3040 | + * rename and move the name cell. | |
3041 | + */ | |
3042 | + list_del(&hc->name_list); | |
3043 | + old_name = hc->name; | |
3044 | + hc->name = new_name; | |
3045 | + list_add(&hc->name_list, _name_buckets + hash_str(new_name)); | |
3046 | + | |
3047 | + /* rename the device node in devfs */ | |
3048 | + unregister_with_devfs(hc); | |
3049 | + register_with_devfs(hc); | |
3050 | + | |
3051 | + up_write(&_hash_lock); | |
3052 | + kfree(old_name); | |
3053 | + return 0; | |
3054 | +} | |
3055 | + | |
3056 | +/*----------------------------------------------------------------- | |
3057 | + * Implementation of the ioctl commands | |
3058 | + *---------------------------------------------------------------*/ | |
3059 | +/* | |
3060 | + * All the ioctl commands get dispatched to functions with this | |
3061 | + * prototype. | |
3062 | + */ | |
3063 | +typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size); | |
3064 | + | |
3065 | +static int remove_all(struct dm_ioctl *param, size_t param_size) | |
3066 | +{ | |
3067 | + dm_hash_remove_all(); | |
3068 | + param->data_size = 0; | |
3069 | + return 0; | |
3070 | +} | |
3071 | + | |
3072 | +/* | |
3073 | + * Round up the ptr to an 8-byte boundary. | |
3074 | + */ | |
3075 | +#define ALIGN_MASK 7 | |
3076 | +static inline void *align_ptr(void *ptr) | |
3077 | +{ | |
3078 | + return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK); | |
3079 | +} | |
3080 | + | |
3081 | +/* | |
3082 | + * Retrieves the data payload buffer from an already allocated | |
3083 | + * struct dm_ioctl. | |
3084 | + */ | |
3085 | +static void *get_result_buffer(struct dm_ioctl *param, size_t param_size, | |
3086 | + size_t *len) | |
3087 | +{ | |
3088 | + param->data_start = align_ptr(param + 1) - (void *) param; | |
3089 | + | |
3090 | + if (param->data_start < param_size) | |
3091 | + *len = param_size - param->data_start; | |
3092 | + else | |
3093 | + *len = 0; | |
3094 | + | |
3095 | + return ((void *) param) + param->data_start; | |
3096 | +} | |
3097 | + | |
3098 | +static int list_devices(struct dm_ioctl *param, size_t param_size) | |
3099 | +{ | |
3100 | + unsigned int i; | |
3101 | + struct hash_cell *hc; | |
3102 | + size_t len, needed = 0; | |
3103 | + struct dm_name_list *nl, *old_nl = NULL; | |
3104 | + | |
3105 | + down_write(&_hash_lock); | |
3106 | + | |
3107 | + /* | |
3108 | + * Loop through all the devices working out how much | |
3109 | + * space we need. | |
3110 | + */ | |
3111 | + for (i = 0; i < NUM_BUCKETS; i++) { | |
3112 | + list_for_each_entry (hc, _name_buckets + i, name_list) { | |
3113 | + needed += sizeof(struct dm_name_list); | |
3114 | + needed += strlen(hc->name); | |
3115 | + needed += ALIGN_MASK; | |
3116 | + } | |
3117 | + } | |
3118 | + | |
3119 | + /* | |
3120 | + * Grab our output buffer. | |
3121 | + */ | |
3122 | + nl = get_result_buffer(param, param_size, &len); | |
3123 | + if (len < needed) { | |
3124 | + param->flags |= DM_BUFFER_FULL_FLAG; | |
3125 | + goto out; | |
3126 | + } | |
3127 | + param->data_size = param->data_start + needed; | |
3128 | + | |
3129 | + nl->dev = 0; /* Flags no data */ | |
3130 | + | |
3131 | + /* | |
3132 | + * Now loop through filling out the names. | |
3133 | + */ | |
3134 | + for (i = 0; i < NUM_BUCKETS; i++) { | |
3135 | + list_for_each_entry (hc, _name_buckets + i, name_list) { | |
3136 | + if (old_nl) | |
3137 | + old_nl->next = (uint32_t) ((void *) nl - | |
3138 | + (void *) old_nl); | |
3139 | + | |
3140 | + nl->dev = dm_kdev(hc->md); | |
3141 | + nl->next = 0; | |
3142 | + strcpy(nl->name, hc->name); | |
3143 | + | |
3144 | + old_nl = nl; | |
3145 | + nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1); | |
3146 | + } | |
3147 | + } | |
3148 | + | |
3149 | + out: | |
3150 | + up_write(&_hash_lock); | |
3151 | + return 0; | |
3152 | +} | |
3153 | + | |
3154 | +static int check_name(const char *name) | |
3155 | +{ | |
3156 | + if (strchr(name, '/')) { | |
3157 | + DMWARN("invalid device name"); | |
3158 | + return -EINVAL; | |
3159 | + } | |
3160 | + | |
3161 | + return 0; | |
3162 | +} | |
3163 | + | |
3164 | +/* | |
3165 | + * Fills in a dm_ioctl structure, ready for sending back to | |
3166 | + * userland. | |
3167 | + */ | |
3168 | +static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) | |
3169 | +{ | |
3170 | + kdev_t dev = dm_kdev(md); | |
3171 | + struct dm_table *table; | |
3172 | + struct block_device *bdev; | |
3173 | + | |
3174 | + param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG | | |
3175 | + DM_ACTIVE_PRESENT_FLAG); | |
3176 | + | |
3177 | + if (dm_suspended(md)) | |
3178 | + param->flags |= DM_SUSPEND_FLAG; | |
3179 | + | |
3180 | + param->dev = kdev_t_to_nr(dev); | |
3181 | + | |
3182 | + if (is_read_only(dev)) | |
3183 | + param->flags |= DM_READONLY_FLAG; | |
3184 | + | |
3185 | + param->event_nr = dm_get_event_nr(md); | |
3186 | + | |
3187 | + table = dm_get_table(md); | |
3188 | + if (table) { | |
3189 | + param->flags |= DM_ACTIVE_PRESENT_FLAG; | |
3190 | + param->target_count = dm_table_get_num_targets(table); | |
3191 | + dm_table_put(table); | |
3192 | + } else | |
3193 | + param->target_count = 0; | |
3194 | + | |
3195 | + bdev = bdget(param->dev); | |
3196 | + if (!bdev) | |
3197 | + return -ENXIO; | |
3198 | + param->open_count = bdev->bd_openers; | |
3199 | + bdput(bdev); | |
3200 | + | |
3201 | + return 0; | |
3202 | +} | |
3203 | + | |
3204 | +static int dev_create(struct dm_ioctl *param, size_t param_size) | |
3205 | +{ | |
3206 | + int r; | |
3207 | + kdev_t dev = 0; | |
3208 | + struct mapped_device *md; | |
3209 | + | |
3210 | + r = check_name(param->name); | |
3211 | + if (r) | |
3212 | + return r; | |
3213 | + | |
3214 | + if (param->flags & DM_PERSISTENT_DEV_FLAG) | |
3215 | + dev = to_kdev_t(param->dev); | |
3216 | + | |
3217 | + r = dm_create(dev, &md); | |
3218 | + if (r) | |
3219 | + return r; | |
3220 | + | |
3221 | + r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md); | |
3222 | + if (r) { | |
3223 | + dm_put(md); | |
3224 | + return r; | |
3225 | + } | |
3226 | + | |
3227 | + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | |
3228 | + | |
3229 | + r = __dev_status(md, param); | |
3230 | + dm_put(md); | |
3231 | + | |
3232 | + return r; | |
3233 | +} | |
3234 | + | |
3235 | +/* | |
3236 | + * Always use UUID for lookups if it's present, otherwise use name. | |
3237 | + */ | |
3238 | +static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) | |
3239 | +{ | |
3240 | + return *param->uuid ? | |
3241 | + __get_uuid_cell(param->uuid) : __get_name_cell(param->name); | |
3242 | +} | |
3243 | + | |
3244 | +static inline struct mapped_device *find_device(struct dm_ioctl *param) | |
3245 | +{ | |
3246 | + struct hash_cell *hc; | |
3247 | + struct mapped_device *md = NULL; | |
3248 | + | |
3249 | + down_read(&_hash_lock); | |
3250 | + hc = __find_device_hash_cell(param); | |
3251 | + if (hc) { | |
3252 | + md = hc->md; | |
3253 | + | |
3254 | + /* | |
3255 | + * Sneakily write in both the name and the uuid | |
3256 | + * while we have the cell. | |
3257 | + */ | |
3258 | + strncpy(param->name, hc->name, sizeof(param->name)); | |
3259 | + if (hc->uuid) | |
3260 | + strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1); | |
3261 | + else | |
3262 | + param->uuid[0] = '\0'; | |
3263 | + | |
3264 | + if (hc->new_map) | |
3265 | + param->flags |= DM_INACTIVE_PRESENT_FLAG; | |
3266 | + else | |
3267 | + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | |
3268 | + | |
3269 | + dm_get(md); | |
3270 | + } | |
3271 | + up_read(&_hash_lock); | |
3272 | + | |
3273 | + return md; | |
3274 | +} | |
3275 | + | |
3276 | +static int dev_remove(struct dm_ioctl *param, size_t param_size) | |
3277 | +{ | |
3278 | + struct hash_cell *hc; | |
3279 | + | |
3280 | + down_write(&_hash_lock); | |
3281 | + hc = __find_device_hash_cell(param); | |
3282 | + | |
3283 | + if (!hc) { | |
3284 | + DMWARN("device doesn't appear to be in the dev hash table."); | |
3285 | + up_write(&_hash_lock); | |
3286 | + return -ENXIO; | |
3287 | + } | |
3288 | + | |
3289 | + __hash_remove(hc); | |
3290 | + up_write(&_hash_lock); | |
3291 | + param->data_size = 0; | |
3292 | + return 0; | |
3293 | +} | |
3294 | + | |
3295 | +/* | |
3296 | + * Check a string doesn't overrun the chunk of | |
3297 | + * memory we copied from userland. | |
3298 | + */ | |
3299 | +static int invalid_str(char *str, void *end) | |
3300 | +{ | |
3301 | + while ((void *) str < end) | |
3302 | + if (!*str++) | |
3303 | + return 0; | |
3304 | + | |
3305 | + return -EINVAL; | |
3306 | +} | |
3307 | + | |
3308 | +static int dev_rename(struct dm_ioctl *param, size_t param_size) | |
3309 | +{ | |
3310 | + int r; | |
3311 | + char *new_name = (char *) param + param->data_start; | |
3312 | + | |
3313 | + if (new_name < (char *) (param + 1) || | |
3314 | + invalid_str(new_name, (void *) param + param_size)) { | |
3315 | + DMWARN("Invalid new logical volume name supplied."); | |
3316 | + return -EINVAL; | |
3317 | + } | |
3318 | + | |
3319 | + r = check_name(new_name); | |
3320 | + if (r) | |
3321 | + return r; | |
3322 | + | |
3323 | + param->data_size = 0; | |
3324 | + return dm_hash_rename(param->name, new_name); | |
3325 | +} | |
3326 | + | |
3327 | +static int do_suspend(struct dm_ioctl *param) | |
3328 | +{ | |
3329 | + int r = 0; | |
3330 | + struct mapped_device *md; | |
3331 | + | |
3332 | + md = find_device(param); | |
3333 | + if (!md) | |
3334 | + return -ENXIO; | |
3335 | + | |
3336 | + if (!dm_suspended(md)) | |
3337 | + r = dm_suspend(md); | |
3338 | + | |
3339 | + if (!r) | |
3340 | + r = __dev_status(md, param); | |
3341 | + | |
3342 | + dm_put(md); | |
3343 | + return r; | |
3344 | +} | |
3345 | + | |
3346 | +static int do_resume(struct dm_ioctl *param) | |
3347 | +{ | |
3348 | + int r = 0; | |
3349 | + struct hash_cell *hc; | |
3350 | + struct mapped_device *md; | |
3351 | + struct dm_table *new_map; | |
3352 | + | |
3353 | + down_write(&_hash_lock); | |
3354 | + | |
3355 | + hc = __find_device_hash_cell(param); | |
3356 | + if (!hc) { | |
3357 | + DMWARN("device doesn't appear to be in the dev hash table."); | |
3358 | + up_write(&_hash_lock); | |
3359 | + return -ENXIO; | |
3360 | + } | |
3361 | + | |
3362 | + md = hc->md; | |
3363 | + dm_get(md); | |
3364 | + | |
3365 | + new_map = hc->new_map; | |
3366 | + hc->new_map = NULL; | |
3367 | + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | |
3368 | + | |
3369 | + up_write(&_hash_lock); | |
3370 | + | |
3371 | + /* Do we need to load a new map ? */ | |
3372 | + if (new_map) { | |
3373 | + /* Suspend if it isn't already suspended */ | |
3374 | + if (!dm_suspended(md)) | |
3375 | + dm_suspend(md); | |
3376 | + | |
3377 | + r = dm_swap_table(md, new_map); | |
3378 | + if (r) { | |
3379 | + dm_put(md); | |
3380 | + dm_table_put(new_map); | |
3381 | + return r; | |
3382 | + } | |
3383 | + | |
3384 | + if (dm_table_get_mode(new_map) & FMODE_WRITE) | |
3385 | + set_device_ro(dm_kdev(md), 0); | |
3386 | + else | |
3387 | + set_device_ro(dm_kdev(md), 1); | |
3388 | + | |
3389 | + dm_table_put(new_map); | |
3390 | + } | |
3391 | + | |
3392 | + if (dm_suspended(md)) | |
3393 | + r = dm_resume(md); | |
3394 | + | |
3395 | + if (!r) | |
3396 | + r = __dev_status(md, param); | |
3397 | + | |
3398 | + dm_put(md); | |
3399 | + return r; | |
3400 | +} | |
3401 | + | |
3402 | +/* | |
3403 | + * Set or unset the suspension state of a device. | |
3404 | + * If the device already is in the requested state we just return its status. | |
3405 | + */ | |
3406 | +static int dev_suspend(struct dm_ioctl *param, size_t param_size) | |
3407 | +{ | |
3408 | + if (param->flags & DM_SUSPEND_FLAG) | |
3409 | + return do_suspend(param); | |
3410 | + | |
3411 | + return do_resume(param); | |
3412 | +} | |
3413 | + | |
3414 | +/* | |
3415 | + * Copies device info back to user space, used by | |
3416 | + * the create and info ioctls. | |
3417 | + */ | |
3418 | +static int dev_status(struct dm_ioctl *param, size_t param_size) | |
3419 | +{ | |
3420 | + int r; | |
3421 | + struct mapped_device *md; | |
3422 | + | |
3423 | + md = find_device(param); | |
3424 | + if (!md) | |
3425 | + return -ENXIO; | |
3426 | + | |
3427 | + r = __dev_status(md, param); | |
3428 | + dm_put(md); | |
3429 | + return r; | |
3430 | +} | |
3431 | + | |
3432 | +/* | |
3433 | + * Build up the status struct for each target | |
3434 | + */ | |
3435 | +static void retrieve_status(struct dm_table *table, struct dm_ioctl *param, | |
3436 | + size_t param_size) | |
3437 | +{ | |
3438 | + unsigned int i, num_targets; | |
3439 | + struct dm_target_spec *spec; | |
3440 | + char *outbuf, *outptr; | |
3441 | + status_type_t type; | |
3442 | + size_t remaining, len, used = 0; | |
3443 | + | |
3444 | + outptr = outbuf = get_result_buffer(param, param_size, &len); | |
3445 | + | |
3446 | + if (param->flags & DM_STATUS_TABLE_FLAG) | |
3447 | + type = STATUSTYPE_TABLE; | |
3448 | + else | |
3449 | + type = STATUSTYPE_INFO; | |
3450 | + | |
3451 | + /* Get all the target info */ | |
3452 | + num_targets = dm_table_get_num_targets(table); | |
3453 | + for (i = 0; i < num_targets; i++) { | |
3454 | + struct dm_target *ti = dm_table_get_target(table, i); | |
3455 | + | |
3456 | + remaining = len - (outptr - outbuf); | |
3457 | + if (remaining < sizeof(struct dm_target_spec)) { | |
3458 | + param->flags |= DM_BUFFER_FULL_FLAG; | |
3459 | + break; | |
3460 | + } | |
3461 | + | |
3462 | + spec = (struct dm_target_spec *) outptr; | |
3463 | + | |
3464 | + spec->status = 0; | |
3465 | + spec->sector_start = ti->begin; | |
3466 | + spec->length = ti->len; | |
3467 | + strncpy(spec->target_type, ti->type->name, | |
3468 | + sizeof(spec->target_type)); | |
3469 | + | |
3470 | + outptr += sizeof(struct dm_target_spec); | |
3471 | + remaining = len - (outptr - outbuf); | |
3472 | + | |
3473 | + /* Get the status/table string from the target driver */ | |
3474 | + if (ti->type->status) { | |
3475 | + if (ti->type->status(ti, type, outptr, remaining)) { | |
3476 | + param->flags |= DM_BUFFER_FULL_FLAG; | |
3477 | + break; | |
3478 | + } | |
3479 | + } else | |
3480 | + outptr[0] = '\0'; | |
3481 | + | |
3482 | + outptr += strlen(outptr) + 1; | |
3483 | + used = param->data_start + (outptr - outbuf); | |
3484 | + | |
3485 | + align_ptr(outptr); | |
3486 | + spec->next = outptr - outbuf; | |
3487 | + } | |
3488 | + | |
3489 | + if (used) | |
3490 | + param->data_size = used; | |
3491 | + | |
3492 | + param->target_count = num_targets; | |
3493 | +} | |
3494 | + | |
3495 | +/* | |
3496 | + * Wait for a device to report an event | |
3497 | + */ | |
3498 | +static int dev_wait(struct dm_ioctl *param, size_t param_size) | |
3499 | +{ | |
3500 | + int r; | |
3501 | + struct mapped_device *md; | |
3502 | + struct dm_table *table; | |
3503 | + DECLARE_WAITQUEUE(wq, current); | |
3504 | + | |
3505 | + md = find_device(param); | |
3506 | + if (!md) | |
3507 | + return -ENXIO; | |
3508 | + | |
3509 | + /* | |
3510 | + * Wait for a notification event | |
3511 | + */ | |
3512 | + set_current_state(TASK_INTERRUPTIBLE); | |
3513 | + if (!dm_add_wait_queue(md, &wq, param->event_nr)) { | |
3514 | + schedule(); | |
3515 | + dm_remove_wait_queue(md, &wq); | |
3516 | + } | |
3517 | + set_current_state(TASK_RUNNING); | |
3518 | + | |
3519 | + /* | |
3520 | + * The userland program is going to want to know what | |
3521 | + * changed to trigger the event, so we may as well tell | |
3522 | + * him and save an ioctl. | |
3523 | + */ | |
3524 | + r = __dev_status(md, param); | |
3525 | + if (r) | |
3526 | + goto out; | |
3527 | + | |
3528 | + table = dm_get_table(md); | |
3529 | + if (table) { | |
3530 | + retrieve_status(table, param, param_size); | |
3531 | + dm_table_put(table); | |
3532 | + } | |
3533 | + | |
3534 | + out: | |
3535 | + dm_put(md); | |
3536 | + return r; | |
3537 | +} | |
3538 | + | |
3539 | +static inline int get_mode(struct dm_ioctl *param) | |
3540 | +{ | |
3541 | + int mode = FMODE_READ | FMODE_WRITE; | |
3542 | + | |
3543 | + if (param->flags & DM_READONLY_FLAG) | |
3544 | + mode = FMODE_READ; | |
3545 | + | |
3546 | + return mode; | |
3547 | +} | |
3548 | + | |
3549 | +static int next_target(struct dm_target_spec *last, uint32_t next, void *end, | |
3550 | + struct dm_target_spec **spec, char **target_params) | |
3551 | +{ | |
3552 | + *spec = (struct dm_target_spec *) ((unsigned char *) last + next); | |
3553 | + *target_params = (char *) (*spec + 1); | |
3554 | + | |
3555 | + if (*spec < (last + 1)) | |
3556 | + return -EINVAL; | |
3557 | + | |
3558 | + return invalid_str(*target_params, end); | |
3559 | +} | |
3560 | + | |
3561 | +static int populate_table(struct dm_table *table, struct dm_ioctl *param, | |
3562 | + size_t param_size) | |
3563 | +{ | |
3564 | + int r; | |
3565 | + unsigned int i = 0; | |
3566 | + struct dm_target_spec *spec = (struct dm_target_spec *) param; | |
3567 | + uint32_t next = param->data_start; | |
3568 | + void *end = (void *) param + param_size; | |
3569 | + char *target_params; | |
3570 | + | |
3571 | + if (!param->target_count) { | |
3572 | + DMWARN("populate_table: no targets specified"); | |
3573 | + return -EINVAL; | |
3574 | + } | |
3575 | + | |
3576 | + for (i = 0; i < param->target_count; i++) { | |
3577 | + | |
3578 | + r = next_target(spec, next, end, &spec, &target_params); | |
3579 | + if (r) { | |
3580 | + DMWARN("unable to find target"); | |
3581 | + return r; | |
3582 | + } | |
3583 | + | |
3584 | + r = dm_table_add_target(table, spec->target_type, | |
3585 | + (sector_t) spec->sector_start, | |
3586 | + (sector_t) spec->length, | |
3587 | + target_params); | |
3588 | + if (r) { | |
3589 | + DMWARN("error adding target to table"); | |
3590 | + return r; | |
3591 | + } | |
3592 | + | |
3593 | + next = spec->next; | |
3594 | + } | |
3595 | + | |
3596 | + return dm_table_complete(table); | |
3597 | +} | |
3598 | + | |
3599 | +static int table_load(struct dm_ioctl *param, size_t param_size) | |
3600 | +{ | |
3601 | + int r; | |
3602 | + struct hash_cell *hc; | |
3603 | + struct dm_table *t; | |
3604 | + | |
3605 | + r = dm_table_create(&t, get_mode(param), param->target_count); | |
3606 | + if (r) | |
3607 | + return r; | |
3608 | + | |
3609 | + r = populate_table(t, param, param_size); | |
3610 | + if (r) { | |
3611 | + dm_table_put(t); | |
3612 | + return r; | |
3613 | + } | |
3614 | + | |
3615 | + down_write(&_hash_lock); | |
3616 | + hc = __find_device_hash_cell(param); | |
3617 | + if (!hc) { | |
3618 | + DMWARN("device doesn't appear to be in the dev hash table."); | |
3619 | + up_write(&_hash_lock); | |
3620 | + return -ENXIO; | |
3621 | + } | |
3622 | + | |
3623 | + if (hc->new_map) | |
3624 | + dm_table_put(hc->new_map); | |
3625 | + hc->new_map = t; | |
3626 | + param->flags |= DM_INACTIVE_PRESENT_FLAG; | |
3627 | + | |
3628 | + r = __dev_status(hc->md, param); | |
3629 | + up_write(&_hash_lock); | |
3630 | + return r; | |
3631 | +} | |
3632 | + | |
3633 | +static int table_clear(struct dm_ioctl *param, size_t param_size) | |
3634 | +{ | |
3635 | + int r; | |
3636 | + struct hash_cell *hc; | |
3637 | + | |
3638 | + down_write(&_hash_lock); | |
3639 | + | |
3640 | + hc = __find_device_hash_cell(param); | |
3641 | + if (!hc) { | |
3642 | + DMWARN("device doesn't appear to be in the dev hash table."); | |
3643 | + up_write(&_hash_lock); | |
3644 | + return -ENXIO; | |
3645 | + } | |
3646 | + | |
3647 | + if (hc->new_map) { | |
3648 | + dm_table_put(hc->new_map); | |
3649 | + hc->new_map = NULL; | |
3650 | + } | |
3651 | + | |
3652 | + param->flags &= ~DM_INACTIVE_PRESENT_FLAG; | |
3653 | + | |
3654 | + r = __dev_status(hc->md, param); | |
3655 | + up_write(&_hash_lock); | |
3656 | + return r; | |
3657 | +} | |
3658 | + | |
3659 | +/* | |
3660 | + * Retrieves a list of devices used by a particular dm device. | |
3661 | + */ | |
3662 | +static void retrieve_deps(struct dm_table *table, struct dm_ioctl *param, | |
3663 | + size_t param_size) | |
3664 | +{ | |
3665 | + unsigned int count = 0; | |
3666 | + struct list_head *tmp; | |
3667 | + size_t len, needed; | |
3668 | + struct dm_target_deps *deps; | |
3669 | + | |
3670 | + deps = get_result_buffer(param, param_size, &len); | |
3671 | + | |
3672 | + /* | |
3673 | + * Count the devices. | |
3674 | + */ | |
3675 | + list_for_each(tmp, dm_table_get_devices(table)) | |
3676 | + count++; | |
3677 | + | |
3678 | + /* | |
3679 | + * Check we have enough space. | |
3680 | + */ | |
3681 | + needed = sizeof(*deps) + (sizeof(*deps->dev) * count); | |
3682 | + if (len < needed) { | |
3683 | + param->flags |= DM_BUFFER_FULL_FLAG; | |
3684 | + return; | |
3685 | + } | |
3686 | + | |
3687 | + /* | |
3688 | + * Fill in the devices. | |
3689 | + */ | |
3690 | + deps->count = count; | |
3691 | + count = 0; | |
3692 | + list_for_each(tmp, dm_table_get_devices(table)) { | |
3693 | + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); | |
3694 | + deps->dev[count++] = dd->bdev->bd_dev; | |
3695 | + } | |
3696 | + | |
3697 | + param->data_size = param->data_start + needed; | |
3698 | +} | |
3699 | + | |
3700 | +static int table_deps(struct dm_ioctl *param, size_t param_size) | |
3701 | +{ | |
3702 | + int r; | |
3703 | + struct mapped_device *md; | |
3704 | + struct dm_table *table; | |
3705 | + | |
3706 | + md = find_device(param); | |
3707 | + if (!md) | |
3708 | + return -ENXIO; | |
3709 | + | |
3710 | + r = __dev_status(md, param); | |
3711 | + if (r) | |
3712 | + goto out; | |
3713 | + | |
3714 | + table = dm_get_table(md); | |
3715 | + if (table) { | |
3716 | + retrieve_deps(table, param, param_size); | |
3717 | + dm_table_put(table); | |
3718 | + } | |
3719 | + | |
3720 | + out: | |
3721 | + dm_put(md); | |
3722 | + return r; | |
3723 | +} | |
3724 | + | |
3725 | +/* | |
3726 | + * Return the status of a device as a text string for each | |
3727 | + * target. | |
3728 | + */ | |
3729 | +static int table_status(struct dm_ioctl *param, size_t param_size) | |
3730 | +{ | |
3731 | + int r; | |
3732 | + struct mapped_device *md; | |
3733 | + struct dm_table *table; | |
3734 | + | |
3735 | + md = find_device(param); | |
3736 | + if (!md) | |
3737 | + return -ENXIO; | |
3738 | + | |
3739 | + r = __dev_status(md, param); | |
3740 | + if (r) | |
3741 | + goto out; | |
3742 | + | |
3743 | + table = dm_get_table(md); | |
3744 | + if (table) { | |
3745 | + retrieve_status(table, param, param_size); | |
3746 | + dm_table_put(table); | |
3747 | + } | |
3748 | + | |
3749 | + out: | |
3750 | + dm_put(md); | |
3751 | + return r; | |
3752 | +} | |
3753 | + | |
3754 | +/*----------------------------------------------------------------- | |
3755 | + * Implementation of open/close/ioctl on the special char | |
3756 | + * device. | |
3757 | + *---------------------------------------------------------------*/ | |
3758 | +static ioctl_fn lookup_ioctl(unsigned int cmd) | |
3759 | +{ | |
3760 | + static struct { | |
3761 | + int cmd; | |
3762 | + ioctl_fn fn; | |
3763 | + } _ioctls[] = { | |
3764 | + {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */ | |
3765 | + {DM_REMOVE_ALL_CMD, remove_all}, | |
3766 | + {DM_LIST_DEVICES_CMD, list_devices}, | |
3767 | + | |
3768 | + {DM_DEV_CREATE_CMD, dev_create}, | |
3769 | + {DM_DEV_REMOVE_CMD, dev_remove}, | |
3770 | + {DM_DEV_RENAME_CMD, dev_rename}, | |
3771 | + {DM_DEV_SUSPEND_CMD, dev_suspend}, | |
3772 | + {DM_DEV_STATUS_CMD, dev_status}, | |
3773 | + {DM_DEV_WAIT_CMD, dev_wait}, | |
3774 | + | |
3775 | + {DM_TABLE_LOAD_CMD, table_load}, | |
3776 | + {DM_TABLE_CLEAR_CMD, table_clear}, | |
3777 | + {DM_TABLE_DEPS_CMD, table_deps}, | |
3778 | + {DM_TABLE_STATUS_CMD, table_status} | |
3779 | + }; | |
3780 | + | |
3781 | + return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; | |
3782 | +} | |
3783 | + | |
3784 | +/* | |
3785 | + * As well as checking the version compatibility this always | |
3786 | + * copies the kernel interface version out. | |
3787 | + */ | |
3788 | +static int check_version(unsigned int cmd, struct dm_ioctl *user) | |
3789 | +{ | |
3790 | + uint32_t version[3]; | |
3791 | + int r = 0; | |
3792 | + | |
3793 | + if (copy_from_user(version, user->version, sizeof(version))) | |
3794 | + return -EFAULT; | |
3795 | + | |
3796 | + if ((DM_VERSION_MAJOR != version[0]) || | |
3797 | + (DM_VERSION_MINOR < version[1])) { | |
3798 | + DMWARN("ioctl interface mismatch: " | |
3799 | + "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)", | |
3800 | + DM_VERSION_MAJOR, DM_VERSION_MINOR, | |
3801 | + DM_VERSION_PATCHLEVEL, | |
3802 | + version[0], version[1], version[2], cmd); | |
3803 | + r = -EINVAL; | |
3804 | + } | |
3805 | + | |
3806 | + /* | |
3807 | + * Fill in the kernel version. | |
3808 | + */ | |
3809 | + version[0] = DM_VERSION_MAJOR; | |
3810 | + version[1] = DM_VERSION_MINOR; | |
3811 | + version[2] = DM_VERSION_PATCHLEVEL; | |
3812 | + if (copy_to_user(user->version, version, sizeof(version))) | |
3813 | + return -EFAULT; | |
3814 | + | |
3815 | + return r; | |
3816 | +} | |
3817 | + | |
3818 | +static void free_params(struct dm_ioctl *param) | |
3819 | +{ | |
3820 | + vfree(param); | |
3821 | +} | |
3822 | + | |
3823 | +static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param) | |
3824 | +{ | |
3825 | + struct dm_ioctl tmp, *dmi; | |
3826 | + | |
3827 | + if (copy_from_user(&tmp, user, sizeof(tmp))) | |
3828 | + return -EFAULT; | |
3829 | + | |
3830 | + if (tmp.data_size < sizeof(tmp)) | |
3831 | + return -EINVAL; | |
3832 | + | |
3833 | + dmi = (struct dm_ioctl *) vmalloc(tmp.data_size); | |
3834 | + if (!dmi) | |
3835 | + return -ENOMEM; | |
3836 | + | |
3837 | + if (copy_from_user(dmi, user, tmp.data_size)) { | |
3838 | + vfree(dmi); | |
3839 | + return -EFAULT; | |
3840 | + } | |
3841 | + | |
3842 | + *param = dmi; | |
3843 | + return 0; | |
3844 | +} | |
3845 | + | |
3846 | +static int validate_params(uint cmd, struct dm_ioctl *param) | |
3847 | +{ | |
3848 | + /* Always clear this flag */ | |
3849 | + param->flags &= ~DM_BUFFER_FULL_FLAG; | |
3850 | + | |
3851 | + /* Ignores parameters */ | |
3852 | + if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD) | |
3853 | + return 0; | |
3854 | + | |
3855 | + /* Unless creating, either name or uuid but not both */ | |
3856 | + if (cmd != DM_DEV_CREATE_CMD) { | |
3857 | + if ((!*param->uuid && !*param->name) || | |
3858 | + (*param->uuid && *param->name)) { | |
3859 | + DMWARN("one of name or uuid must be supplied, cmd(%u)", | |
3860 | + cmd); | |
3861 | + return -EINVAL; | |
3862 | + } | |
3863 | + } | |
3864 | + | |
3865 | + /* Ensure strings are terminated */ | |
3866 | + param->name[DM_NAME_LEN - 1] = '\0'; | |
3867 | + param->uuid[DM_UUID_LEN - 1] = '\0'; | |
3868 | + | |
3869 | + return 0; | |
3870 | +} | |
3871 | + | |
3872 | +static int ctl_ioctl(struct inode *inode, struct file *file, | |
3873 | + uint command, ulong u) | |
3874 | +{ | |
3875 | + int r = 0; | |
3876 | + unsigned int cmd; | |
3877 | + struct dm_ioctl *param; | |
3878 | + struct dm_ioctl *user = (struct dm_ioctl *) u; | |
3879 | + ioctl_fn fn = NULL; | |
3880 | + size_t param_size; | |
3881 | + | |
3882 | + /* only root can play with this */ | |
3883 | + if (!capable(CAP_SYS_ADMIN)) | |
3884 | + return -EACCES; | |
3885 | + | |
3886 | + if (_IOC_TYPE(command) != DM_IOCTL) | |
3887 | + return -ENOTTY; | |
3888 | + | |
3889 | + cmd = _IOC_NR(command); | |
3890 | + | |
3891 | + /* | |
3892 | + * Check the interface version passed in. This also | |
3893 | + * writes out the kernel's interface version. | |
3894 | + */ | |
3895 | + r = check_version(cmd, user); | |
3896 | + if (r) | |
3897 | + return r; | |
3898 | + | |
3899 | + /* | |
3900 | + * Nothing more to do for the version command. | |
3901 | + */ | |
3902 | + if (cmd == DM_VERSION_CMD) | |
3903 | + return 0; | |
3904 | + | |
3905 | + fn = lookup_ioctl(cmd); | |
3906 | + if (!fn) { | |
3907 | + DMWARN("dm_ctl_ioctl: unknown command 0x%x", command); | |
3908 | + return -ENOTTY; | |
3909 | + } | |
3910 | + | |
3911 | + /* | |
3912 | + * FIXME: I don't like this, we're trying to avoid low | |
3913 | + * memory issues when a device is suspended. | |
3914 | + */ | |
3915 | + current->flags |= PF_MEMALLOC; | |
3916 | + | |
3917 | + /* | |
3918 | + * Copy the parameters into kernel space. | |
3919 | + */ | |
3920 | + r = copy_params(user, ¶m); | |
3921 | + if (r) { | |
3922 | + current->flags &= ~PF_MEMALLOC; | |
3923 | + return r; | |
3924 | + } | |
3925 | + | |
3926 | + r = validate_params(cmd, param); | |
3927 | + if (r) | |
3928 | + goto out; | |
3929 | + | |
3930 | + param_size = param->data_size; | |
3931 | + param->data_size = sizeof(*param); | |
3932 | + r = fn(param, param_size); | |
3933 | + | |
3934 | + /* | |
3935 | + * Copy the results back to userland. | |
3936 | + */ | |
3937 | + if (!r && copy_to_user(user, param, param->data_size)) | |
3938 | + r = -EFAULT; | |
3939 | + | |
3940 | + out: | |
3941 | + free_params(param); | |
3942 | + current->flags &= ~PF_MEMALLOC; | |
3943 | + return r; | |
3944 | +} | |
3945 | + | |
3946 | +static struct file_operations _ctl_fops = { | |
3947 | + .ioctl = ctl_ioctl, | |
3948 | + .owner = THIS_MODULE, | |
3949 | +}; | |
3950 | + | |
3951 | +static devfs_handle_t _ctl_handle; | |
3952 | + | |
3953 | +static struct miscdevice _dm_misc = { | |
3954 | + .minor = MISC_DYNAMIC_MINOR, | |
3955 | + .name = DM_NAME, | |
3956 | + .fops = &_ctl_fops | |
3957 | +}; | |
3958 | + | |
3959 | +/* | |
3960 | + * Create misc character device and link to DM_DIR/control. | |
3961 | + */ | |
3962 | +int __init dm_interface_init(void) | |
3963 | +{ | |
3964 | + int r; | |
3965 | + char rname[64]; | |
3966 | + | |
3967 | + r = dm_hash_init(); | |
3968 | + if (r) | |
3969 | + return r; | |
3970 | + | |
3971 | + r = misc_register(&_dm_misc); | |
3972 | + if (r) { | |
3973 | + DMERR("misc_register failed for control device"); | |
3974 | + dm_hash_exit(); | |
3975 | + return r; | |
3976 | + } | |
3977 | + | |
3978 | + r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3, | |
3979 | + sizeof rname - 3); | |
3980 | + if (r == -ENOSYS) | |
3981 | + goto done; /* devfs not present */ | |
3982 | + | |
3983 | + if (r < 0) { | |
3984 | + DMERR("devfs_generate_path failed for control device"); | |
3985 | + goto failed; | |
3986 | + } | |
3987 | + | |
3988 | + strncpy(rname + r, "../", 3); | |
3989 | + r = devfs_mk_symlink(NULL, DM_DIR "/control", | |
3990 | + DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL); | |
3991 | + if (r) { | |
3992 | + DMERR("devfs_mk_symlink failed for control device"); | |
3993 | + goto failed; | |
3994 | + } | |
3995 | + devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle); | |
3996 | + | |
3997 | + done: | |
3998 | + DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR, | |
3999 | + DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA, | |
4000 | + DM_DRIVER_EMAIL); | |
4001 | + return 0; | |
4002 | + | |
4003 | + failed: | |
4004 | + misc_deregister(&_dm_misc); | |
4005 | + dm_hash_exit(); | |
4006 | + return r; | |
4007 | +} | |
4008 | + | |
4009 | +void dm_interface_exit(void) | |
4010 | +{ | |
4011 | + if (misc_deregister(&_dm_misc) < 0) | |
4012 | + DMERR("misc_deregister failed for control device"); | |
4013 | + | |
4014 | + dm_hash_exit(); | |
4015 | +} | |
4016 | diff -urN linux-2.4.24.org/drivers/md/dm-io.h linux-2.4.24/drivers/md/dm-io.h | |
4017 | --- linux-2.4.24.org/drivers/md/dm-io.h 1970-01-01 01:00:00.000000000 +0100 | |
4018 | +++ linux-2.4.24/drivers/md/dm-io.h 2004-01-18 15:01:25.794190275 +0100 | |
4019 | @@ -0,0 +1,86 @@ | |
4020 | +/* | |
4021 | + * Copyright (C) 2003 Sistina Software | |
4022 | + * | |
4023 | + * This file is released under the GPL. | |
4024 | + */ | |
4025 | + | |
4026 | +#ifndef _DM_IO_H | |
4027 | +#define _DM_IO_H | |
4028 | + | |
4029 | +#include "dm.h" | |
4030 | + | |
4031 | +#include <linux/list.h> | |
4032 | + | |
4033 | +/* Move these to bitops.h eventually */ | |
4034 | +/* Improved generic_fls algorithm (in 2.4 there is no generic_fls so far) */ | |
4035 | +/* (c) 2002, D.Phillips and Sistina Software */ | |
4036 | +/* Licensed under Version 2 of the GPL */ | |
4037 | + | |
4038 | +static unsigned generic_fls8(unsigned n) | |
4039 | +{ | |
4040 | + return n & 0xf0 ? | |
4041 | + n & 0xc0 ? (n >> 7) + 7 : (n >> 5) + 5: | |
4042 | + n & 0x0c ? (n >> 3) + 3 : n - ((n + 1) >> 2); | |
4043 | +} | |
4044 | + | |
4045 | +static inline unsigned generic_fls16(unsigned n) | |
4046 | +{ | |
4047 | + return n & 0xff00? generic_fls8(n >> 8) + 8 : generic_fls8(n); | |
4048 | +} | |
4049 | + | |
4050 | +static inline unsigned generic_fls32(unsigned n) | |
4051 | +{ | |
4052 | + return n & 0xffff0000 ? generic_fls16(n >> 16) + 16 : generic_fls16(n); | |
4053 | +} | |
4054 | + | |
4055 | +/* FIXME make this configurable */ | |
4056 | +#define DM_MAX_IO_REGIONS 8 | |
4057 | + | |
4058 | +struct io_region { | |
4059 | + kdev_t dev; | |
4060 | + sector_t sector; | |
4061 | + sector_t count; | |
4062 | +}; | |
4063 | + | |
4064 | + | |
4065 | +/* | |
4066 | + * 'error' is a bitset, with each bit indicating whether an error | |
4067 | + * occurred doing io to the corresponding region. | |
4068 | + */ | |
4069 | +typedef void (*io_notify_fn)(unsigned int error, void *context); | |
4070 | + | |
4071 | + | |
4072 | +/* | |
4073 | + * Before anyone uses the IO interface they should call | |
4074 | + * dm_io_get(), specifying roughly how many pages they are | |
4075 | + * expecting to perform io on concurrently. | |
4076 | + * | |
4077 | + * This function may block. | |
4078 | + */ | |
4079 | +int dm_io_get(unsigned int num_pages); | |
4080 | +void dm_io_put(unsigned int num_pages); | |
4081 | + | |
4082 | + | |
4083 | +/* | |
4084 | + * Synchronous IO. | |
4085 | + * | |
4086 | + * Please ensure that the rw flag in the next two functions is | |
4087 | + * either READ or WRITE, ie. we don't take READA. Any | |
4088 | + * regions with a zero count field will be ignored. | |
4089 | + */ | |
4090 | +int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, | |
4091 | + struct page *pages, unsigned int offset, | |
4092 | + unsigned int *error_bits); | |
4093 | + | |
4094 | + | |
4095 | +/* | |
4096 | + * Aynchronous IO. | |
4097 | + * | |
4098 | + * The 'where' array may be safely allocated on the stack since | |
4099 | + * the function takes a copy. | |
4100 | + */ | |
4101 | +int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, | |
4102 | + struct page *pages, unsigned int offset, | |
4103 | + io_notify_fn fn, void *context); | |
4104 | + | |
4105 | +#endif | |
4106 | diff -urN linux-2.4.24.org/drivers/md/dm-linear.c linux-2.4.24/drivers/md/dm-linear.c | |
4107 | --- linux-2.4.24.org/drivers/md/dm-linear.c 1970-01-01 01:00:00.000000000 +0100 | |
4108 | +++ linux-2.4.24/drivers/md/dm-linear.c 2004-01-18 15:01:13.777712209 +0100 | |
4109 | @@ -0,0 +1,123 @@ | |
4110 | +/* | |
4111 | + * Copyright (C) 2001 Sistina Software (UK) Limited. | |
4112 | + * | |
4113 | + * This file is released under the GPL. | |
4114 | + */ | |
4115 | + | |
4116 | +#include "dm.h" | |
4117 | + | |
4118 | +#include <linux/module.h> | |
4119 | +#include <linux/init.h> | |
4120 | +#include <linux/blkdev.h> | |
4121 | +#include <linux/slab.h> | |
4122 | + | |
4123 | +/* | |
4124 | + * Linear: maps a linear range of a device. | |
4125 | + */ | |
4126 | +struct linear_c { | |
4127 | + struct dm_dev *dev; | |
4128 | + sector_t start; | |
4129 | +}; | |
4130 | + | |
4131 | +/* | |
4132 | + * Construct a linear mapping: <dev_path> <offset> | |
4133 | + */ | |
4134 | +static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |
4135 | +{ | |
4136 | + struct linear_c *lc; | |
4137 | + | |
4138 | + if (argc != 2) { | |
4139 | + ti->error = "dm-linear: Invalid argument count"; | |
4140 | + return -EINVAL; | |
4141 | + } | |
4142 | + | |
4143 | + lc = kmalloc(sizeof(*lc), GFP_KERNEL); | |
4144 | + if (lc == NULL) { | |
4145 | + ti->error = "dm-linear: Cannot allocate linear context"; | |
4146 | + return -ENOMEM; | |
4147 | + } | |
4148 | + | |
4149 | + if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) { | |
4150 | + ti->error = "dm-linear: Invalid device sector"; | |
4151 | + goto bad; | |
4152 | + } | |
4153 | + | |
4154 | + if (dm_get_device(ti, argv[0], lc->start, ti->len, | |
4155 | + dm_table_get_mode(ti->table), &lc->dev)) { | |
4156 | + ti->error = "dm-linear: Device lookup failed"; | |
4157 | + goto bad; | |
4158 | + } | |
4159 | + | |
4160 | + ti->private = lc; | |
4161 | + return 0; | |
4162 | + | |
4163 | + bad: | |
4164 | + kfree(lc); | |
4165 | + return -EINVAL; | |
4166 | +} | |
4167 | + | |
4168 | +static void linear_dtr(struct dm_target *ti) | |
4169 | +{ | |
4170 | + struct linear_c *lc = (struct linear_c *) ti->private; | |
4171 | + | |
4172 | + dm_put_device(ti, lc->dev); | |
4173 | + kfree(lc); | |
4174 | +} | |
4175 | + | |
4176 | +static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw, | |
4177 | + union map_info *map_context) | |
4178 | +{ | |
4179 | + struct linear_c *lc = (struct linear_c *) ti->private; | |
4180 | + | |
4181 | + bh->b_rdev = lc->dev->dev; | |
4182 | + bh->b_rsector = lc->start + (bh->b_rsector - ti->begin); | |
4183 | + | |
4184 | + return 1; | |
4185 | +} | |
4186 | + | |
4187 | +static int linear_status(struct dm_target *ti, status_type_t type, | |
4188 | + char *result, unsigned int maxlen) | |
4189 | +{ | |
4190 | + struct linear_c *lc = (struct linear_c *) ti->private; | |
4191 | + kdev_t kdev; | |
4192 | + | |
4193 | + switch (type) { | |
4194 | + case STATUSTYPE_INFO: | |
4195 | + result[0] = '\0'; | |
4196 | + break; | |
4197 | + | |
4198 | + case STATUSTYPE_TABLE: | |
4199 | + kdev = to_kdev_t(lc->dev->bdev->bd_dev); | |
4200 | + snprintf(result, maxlen, "%s " SECTOR_FORMAT, | |
4201 | + dm_kdevname(kdev), lc->start); | |
4202 | + break; | |
4203 | + } | |
4204 | + return 0; | |
4205 | +} | |
4206 | + | |
4207 | +static struct target_type linear_target = { | |
4208 | + .name = "linear", | |
4209 | + .module = THIS_MODULE, | |
4210 | + .ctr = linear_ctr, | |
4211 | + .dtr = linear_dtr, | |
4212 | + .map = linear_map, | |
4213 | + .status = linear_status, | |
4214 | +}; | |
4215 | + | |
4216 | +int __init dm_linear_init(void) | |
4217 | +{ | |
4218 | + int r = dm_register_target(&linear_target); | |
4219 | + | |
4220 | + if (r < 0) | |
4221 | + DMERR("linear: register failed %d", r); | |
4222 | + | |
4223 | + return r; | |
4224 | +} | |
4225 | + | |
4226 | +void dm_linear_exit(void) | |
4227 | +{ | |
4228 | + int r = dm_unregister_target(&linear_target); | |
4229 | + | |
4230 | + if (r < 0) | |
4231 | + DMERR("linear: unregister failed %d", r); | |
4232 | +} | |
4233 | diff -urN linux-2.4.24.org/drivers/md/dm-snapshot.c linux-2.4.24/drivers/md/dm-snapshot.c | |
4234 | --- linux-2.4.24.org/drivers/md/dm-snapshot.c 1970-01-01 01:00:00.000000000 +0100 | |
4235 | +++ linux-2.4.24/drivers/md/dm-snapshot.c 2004-01-18 15:01:29.247465850 +0100 | |
4236 | @@ -0,0 +1,1235 @@ | |
4237 | +/* | |
4238 | + * dm-snapshot.c | |
4239 | + * | |
4240 | + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | |
4241 | + * | |
4242 | + * This file is released under the GPL. | |
4243 | + */ | |
4244 | + | |
4245 | +#include <linux/config.h> | |
4246 | +#include <linux/ctype.h> | |
4247 | +#include <linux/module.h> | |
4248 | +#include <linux/init.h> | |
4249 | +#include <linux/slab.h> | |
4250 | +#include <linux/list.h> | |
4251 | +#include <linux/fs.h> | |
4252 | +#include <linux/blkdev.h> | |
4253 | +#include <linux/mempool.h> | |
4254 | +#include <linux/device-mapper.h> | |
4255 | +#include <linux/vmalloc.h> | |
4256 | + | |
4257 | +#include "dm-snapshot.h" | |
4258 | +#include "kcopyd.h" | |
4259 | + | |
4260 | +/* | |
4261 | + * FIXME: Remove this before release. | |
4262 | + */ | |
4263 | +#if 0 | |
4264 | +#define DMDEBUG(x...) DMWARN( ## x) | |
4265 | +#else | |
4266 | +#define DMDEBUG(x...) | |
4267 | +#endif | |
4268 | + | |
4269 | +/* | |
4270 | + * The percentage increment we will wake up users at | |
4271 | + */ | |
4272 | +#define WAKE_UP_PERCENT 5 | |
4273 | + | |
4274 | +/* | |
4275 | + * kcopyd priority of snapshot operations | |
4276 | + */ | |
4277 | +#define SNAPSHOT_COPY_PRIORITY 2 | |
4278 | + | |
4279 | +/* | |
4280 | + * Each snapshot reserves this many pages for io | |
4281 | + * FIXME: calculate this | |
4282 | + */ | |
4283 | +#define SNAPSHOT_PAGES 256 | |
4284 | + | |
4285 | +struct pending_exception { | |
4286 | + struct exception e; | |
4287 | + | |
4288 | + /* | |
4289 | + * Origin buffers waiting for this to complete are held | |
4290 | + * in a list (using b_reqnext). | |
4291 | + */ | |
4292 | + struct buffer_head *origin_bhs; | |
4293 | + struct buffer_head *snapshot_bhs; | |
4294 | + | |
4295 | + /* | |
4296 | + * Other pending_exceptions that are processing this | |
4297 | + * chunk. When this list is empty, we know we can | |
4298 | + * complete the origins. | |
4299 | + */ | |
4300 | + struct list_head siblings; | |
4301 | + | |
4302 | + /* Pointer back to snapshot context */ | |
4303 | + struct dm_snapshot *snap; | |
4304 | + | |
4305 | + /* | |
4306 | + * 1 indicates the exception has already been sent to | |
4307 | + * kcopyd. | |
4308 | + */ | |
4309 | + int started; | |
4310 | +}; | |
4311 | + | |
4312 | +/* | |
4313 | + * Hash table mapping origin volumes to lists of snapshots and | |
4314 | + * a lock to protect it | |
4315 | + */ | |
4316 | +static kmem_cache_t *exception_cache; | |
4317 | +static kmem_cache_t *pending_cache; | |
4318 | +static mempool_t *pending_pool; | |
4319 | + | |
4320 | +/* | |
4321 | + * One of these per registered origin, held in the snapshot_origins hash | |
4322 | + */ | |
4323 | +struct origin { | |
4324 | + /* The origin device */ | |
4325 | + kdev_t dev; | |
4326 | + | |
4327 | + struct list_head hash_list; | |
4328 | + | |
4329 | + /* List of snapshots for this origin */ | |
4330 | + struct list_head snapshots; | |
4331 | +}; | |
4332 | + | |
4333 | +/* | |
4334 | + * Size of the hash table for origin volumes. If we make this | |
4335 | + * the size of the minors list then it should be nearly perfect | |
4336 | + */ | |
4337 | +#define ORIGIN_HASH_SIZE 256 | |
4338 | +#define ORIGIN_MASK 0xFF | |
4339 | +static struct list_head *_origins; | |
4340 | +static struct rw_semaphore _origins_lock; | |
4341 | + | |
4342 | +static int init_origin_hash(void) | |
4343 | +{ | |
4344 | + int i; | |
4345 | + | |
4346 | + _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head), | |
4347 | + GFP_KERNEL); | |
4348 | + if (!_origins) { | |
4349 | + DMERR("Device mapper: Snapshot: unable to allocate memory"); | |
4350 | + return -ENOMEM; | |
4351 | + } | |
4352 | + | |
4353 | + for (i = 0; i < ORIGIN_HASH_SIZE; i++) | |
4354 | + INIT_LIST_HEAD(_origins + i); | |
4355 | + init_rwsem(&_origins_lock); | |
4356 | + | |
4357 | + return 0; | |
4358 | +} | |
4359 | + | |
4360 | +static void exit_origin_hash(void) | |
4361 | +{ | |
4362 | + kfree(_origins); | |
4363 | +} | |
4364 | + | |
4365 | +static inline unsigned int origin_hash(kdev_t dev) | |
4366 | +{ | |
4367 | + return MINOR(dev) & ORIGIN_MASK; | |
4368 | +} | |
4369 | + | |
4370 | +static struct origin *__lookup_origin(kdev_t origin) | |
4371 | +{ | |
4372 | + struct list_head *slist; | |
4373 | + struct list_head *ol; | |
4374 | + struct origin *o; | |
4375 | + | |
4376 | + ol = &_origins[origin_hash(origin)]; | |
4377 | + list_for_each(slist, ol) { | |
4378 | + o = list_entry(slist, struct origin, hash_list); | |
4379 | + | |
4380 | + if (o->dev == origin) | |
4381 | + return o; | |
4382 | + } | |
4383 | + | |
4384 | + return NULL; | |
4385 | +} | |
4386 | + | |
4387 | +static void __insert_origin(struct origin *o) | |
4388 | +{ | |
4389 | + struct list_head *sl = &_origins[origin_hash(o->dev)]; | |
4390 | + list_add_tail(&o->hash_list, sl); | |
4391 | +} | |
4392 | + | |
4393 | +/* | |
4394 | + * Make a note of the snapshot and its origin so we can look it | |
4395 | + * up when the origin has a write on it. | |
4396 | + */ | |
4397 | +static int register_snapshot(struct dm_snapshot *snap) | |
4398 | +{ | |
4399 | + struct origin *o; | |
4400 | + kdev_t dev = snap->origin->dev; | |
4401 | + | |
4402 | + down_write(&_origins_lock); | |
4403 | + o = __lookup_origin(dev); | |
4404 | + | |
4405 | + if (!o) { | |
4406 | + /* New origin */ | |
4407 | + o = kmalloc(sizeof(*o), GFP_KERNEL); | |
4408 | + if (!o) { | |
4409 | + up_write(&_origins_lock); | |
4410 | + return -ENOMEM; | |
4411 | + } | |
4412 | + | |
4413 | + /* Initialise the struct */ | |
4414 | + INIT_LIST_HEAD(&o->snapshots); | |
4415 | + o->dev = dev; | |
4416 | + | |
4417 | + __insert_origin(o); | |
4418 | + } | |
4419 | + | |
4420 | + list_add_tail(&snap->list, &o->snapshots); | |
4421 | + | |
4422 | + up_write(&_origins_lock); | |
4423 | + return 0; | |
4424 | +} | |
4425 | + | |
4426 | +static void unregister_snapshot(struct dm_snapshot *s) | |
4427 | +{ | |
4428 | + struct origin *o; | |
4429 | + | |
4430 | + down_write(&_origins_lock); | |
4431 | + o = __lookup_origin(s->origin->dev); | |
4432 | + | |
4433 | + list_del(&s->list); | |
4434 | + if (list_empty(&o->snapshots)) { | |
4435 | + list_del(&o->hash_list); | |
4436 | + kfree(o); | |
4437 | + } | |
4438 | + | |
4439 | + up_write(&_origins_lock); | |
4440 | +} | |
4441 | + | |
4442 | +/* | |
4443 | + * Implementation of the exception hash tables. | |
4444 | + */ | |
4445 | +static int init_exception_table(struct exception_table *et, uint32_t size) | |
4446 | +{ | |
4447 | + unsigned int i; | |
4448 | + | |
4449 | + et->hash_mask = size - 1; | |
4450 | + et->table = dm_vcalloc(size, sizeof(struct list_head)); | |
4451 | + if (!et->table) | |
4452 | + return -ENOMEM; | |
4453 | + | |
4454 | + for (i = 0; i < size; i++) | |
4455 | + INIT_LIST_HEAD(et->table + i); | |
4456 | + | |
4457 | + return 0; | |
4458 | +} | |
4459 | + | |
4460 | +static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem) | |
4461 | +{ | |
4462 | + struct list_head *slot, *entry, *temp; | |
4463 | + struct exception *ex; | |
4464 | + int i, size; | |
4465 | + | |
4466 | + size = et->hash_mask + 1; | |
4467 | + for (i = 0; i < size; i++) { | |
4468 | + slot = et->table + i; | |
4469 | + | |
4470 | + list_for_each_safe(entry, temp, slot) { | |
4471 | + ex = list_entry(entry, struct exception, hash_list); | |
4472 | + kmem_cache_free(mem, ex); | |
4473 | + } | |
4474 | + } | |
4475 | + | |
4476 | + vfree(et->table); | |
4477 | +} | |
4478 | + | |
4479 | +/* | |
4480 | + * FIXME: check how this hash fn is performing. | |
4481 | + */ | |
4482 | +static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk) | |
4483 | +{ | |
4484 | + return chunk & et->hash_mask; | |
4485 | +} | |
4486 | + | |
4487 | +static void insert_exception(struct exception_table *eh, struct exception *e) | |
4488 | +{ | |
4489 | + struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)]; | |
4490 | + list_add(&e->hash_list, l); | |
4491 | +} | |
4492 | + | |
4493 | +static inline void remove_exception(struct exception *e) | |
4494 | +{ | |
4495 | + list_del(&e->hash_list); | |
4496 | +} | |
4497 | + | |
4498 | +/* | |
4499 | + * Return the exception data for a sector, or NULL if not | |
4500 | + * remapped. | |
4501 | + */ | |
4502 | +static struct exception *lookup_exception(struct exception_table *et, | |
4503 | + chunk_t chunk) | |
4504 | +{ | |
4505 | + struct list_head *slot, *el; | |
4506 | + struct exception *e; | |
4507 | + | |
4508 | + slot = &et->table[exception_hash(et, chunk)]; | |
4509 | + list_for_each(el, slot) { | |
4510 | + e = list_entry(el, struct exception, hash_list); | |
4511 | + if (e->old_chunk == chunk) | |
4512 | + return e; | |
4513 | + } | |
4514 | + | |
4515 | + return NULL; | |
4516 | +} | |
4517 | + | |
4518 | +static inline struct exception *alloc_exception(void) | |
4519 | +{ | |
4520 | + struct exception *e; | |
4521 | + | |
4522 | + e = kmem_cache_alloc(exception_cache, GFP_NOIO); | |
4523 | + if (!e) | |
4524 | + e = kmem_cache_alloc(exception_cache, GFP_ATOMIC); | |
4525 | + | |
4526 | + return e; | |
4527 | +} | |
4528 | + | |
4529 | +static inline void free_exception(struct exception *e) | |
4530 | +{ | |
4531 | + kmem_cache_free(exception_cache, e); | |
4532 | +} | |
4533 | + | |
4534 | +static inline struct pending_exception *alloc_pending_exception(void) | |
4535 | +{ | |
4536 | + return mempool_alloc(pending_pool, GFP_NOIO); | |
4537 | +} | |
4538 | + | |
4539 | +static inline void free_pending_exception(struct pending_exception *pe) | |
4540 | +{ | |
4541 | + mempool_free(pe, pending_pool); | |
4542 | +} | |
4543 | + | |
4544 | +int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) | |
4545 | +{ | |
4546 | + struct exception *e; | |
4547 | + | |
4548 | + e = alloc_exception(); | |
4549 | + if (!e) | |
4550 | + return -ENOMEM; | |
4551 | + | |
4552 | + e->old_chunk = old; | |
4553 | + e->new_chunk = new; | |
4554 | + insert_exception(&s->complete, e); | |
4555 | + return 0; | |
4556 | +} | |
4557 | + | |
4558 | +/* | |
4559 | + * Hard coded magic. | |
4560 | + */ | |
4561 | +static int calc_max_buckets(void) | |
4562 | +{ | |
4563 | + unsigned long mem; | |
4564 | + | |
4565 | + mem = num_physpages << PAGE_SHIFT; | |
4566 | + mem /= 50; | |
4567 | + mem /= sizeof(struct list_head); | |
4568 | + | |
4569 | + return mem; | |
4570 | +} | |
4571 | + | |
4572 | +/* | |
4573 | + * Rounds a number down to a power of 2. | |
4574 | + */ | |
4575 | +static inline uint32_t round_down(uint32_t n) | |
4576 | +{ | |
4577 | + while (n & (n - 1)) | |
4578 | + n &= (n - 1); | |
4579 | + return n; | |
4580 | +} | |
4581 | + | |
4582 | +/* | |
4583 | + * Allocate room for a suitable hash table. | |
4584 | + */ | |
4585 | +static int init_hash_tables(struct dm_snapshot *s) | |
4586 | +{ | |
4587 | + sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; | |
4588 | + | |
4589 | + /* | |
4590 | + * Calculate based on the size of the original volume or | |
4591 | + * the COW volume... | |
4592 | + */ | |
4593 | + cow_dev_size = get_dev_size(s->cow->dev); | |
4594 | + origin_dev_size = get_dev_size(s->origin->dev); | |
4595 | + max_buckets = calc_max_buckets(); | |
4596 | + | |
4597 | + hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size; | |
4598 | + hash_size = min(hash_size, max_buckets); | |
4599 | + | |
4600 | + /* Round it down to a power of 2 */ | |
4601 | + hash_size = round_down(hash_size); | |
4602 | + if (init_exception_table(&s->complete, hash_size)) | |
4603 | + return -ENOMEM; | |
4604 | + | |
4605 | + /* | |
4606 | + * Allocate hash table for in-flight exceptions | |
4607 | + * Make this smaller than the real hash table | |
4608 | + */ | |
4609 | + hash_size >>= 3; | |
4610 | + if (!hash_size) | |
4611 | + hash_size = 64; | |
4612 | + | |
4613 | + if (init_exception_table(&s->pending, hash_size)) { | |
4614 | + exit_exception_table(&s->complete, exception_cache); | |
4615 | + return -ENOMEM; | |
4616 | + } | |
4617 | + | |
4618 | + return 0; | |
4619 | +} | |
4620 | + | |
4621 | +/* | |
4622 | + * Round a number up to the nearest 'size' boundary. size must | |
4623 | + * be a power of 2. | |
4624 | + */ | |
4625 | +static inline ulong round_up(ulong n, ulong size) | |
4626 | +{ | |
4627 | + size--; | |
4628 | + return (n + size) & ~size; | |
4629 | +} | |
4630 | + | |
4631 | +/* | |
4632 | + * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size> | |
4633 | + */ | |
4634 | +static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |
4635 | +{ | |
4636 | + struct dm_snapshot *s; | |
4637 | + unsigned long chunk_size; | |
4638 | + int r = -EINVAL; | |
4639 | + char persistent; | |
4640 | + char *origin_path; | |
4641 | + char *cow_path; | |
4642 | + char *value; | |
4643 | + int blocksize; | |
4644 | + | |
4645 | + if (argc < 4) { | |
4646 | + ti->error = "dm-snapshot: requires exactly 4 arguments"; | |
4647 | + r = -EINVAL; | |
4648 | + goto bad1; | |
4649 | + } | |
4650 | + | |
4651 | + origin_path = argv[0]; | |
4652 | + cow_path = argv[1]; | |
4653 | + persistent = toupper(*argv[2]); | |
4654 | + | |
4655 | + if (persistent != 'P' && persistent != 'N') { | |
4656 | + ti->error = "Persistent flag is not P or N"; | |
4657 | + r = -EINVAL; | |
4658 | + goto bad1; | |
4659 | + } | |
4660 | + | |
4661 | + chunk_size = simple_strtoul(argv[3], &value, 10); | |
4662 | + if (chunk_size == 0 || value == NULL) { | |
4663 | + ti->error = "Invalid chunk size"; | |
4664 | + r = -EINVAL; | |
4665 | + goto bad1; | |
4666 | + } | |
4667 | + | |
4668 | + s = kmalloc(sizeof(*s), GFP_KERNEL); | |
4669 | + if (s == NULL) { | |
4670 | + ti->error = "Cannot allocate snapshot context private " | |
4671 | + "structure"; | |
4672 | + r = -ENOMEM; | |
4673 | + goto bad1; | |
4674 | + } | |
4675 | + | |
4676 | + r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); | |
4677 | + if (r) { | |
4678 | + ti->error = "Cannot get origin device"; | |
4679 | + goto bad2; | |
4680 | + } | |
4681 | + | |
4682 | + /* FIXME: get cow length */ | |
4683 | + r = dm_get_device(ti, cow_path, 0, 0, | |
4684 | + FMODE_READ | FMODE_WRITE, &s->cow); | |
4685 | + if (r) { | |
4686 | + dm_put_device(ti, s->origin); | |
4687 | + ti->error = "Cannot get COW device"; | |
4688 | + goto bad2; | |
4689 | + } | |
4690 | + | |
4691 | + /* | |
4692 | + * Chunk size must be multiple of page size. Silently | |
4693 | + * round up if it's not. | |
4694 | + */ | |
4695 | + chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE); | |
4696 | + | |
4697 | + /* Validate the chunk size against the device block size */ | |
4698 | + blocksize = get_hardsect_size(s->cow->dev); | |
4699 | + if (chunk_size % (blocksize / SECTOR_SIZE)) { | |
4700 | + ti->error = "Chunk size is not a multiple of device blocksize"; | |
4701 | + r = -EINVAL; | |
4702 | + goto bad3; | |
4703 | + } | |
4704 | + | |
4705 | + /* Check the sizes are small enough to fit in one kiovec */ | |
4706 | + if (chunk_size > KIO_MAX_SECTORS) { | |
4707 | + ti->error = "Chunk size is too big"; | |
4708 | + r = -EINVAL; | |
4709 | + goto bad3; | |
4710 | + } | |
4711 | + | |
4712 | + /* Check chunk_size is a power of 2 */ | |
4713 | + if (chunk_size & (chunk_size - 1)) { | |
4714 | + ti->error = "Chunk size is not a power of 2"; | |
4715 | + r = -EINVAL; | |
4716 | + goto bad3; | |
4717 | + } | |
4718 | + | |
4719 | + s->chunk_size = chunk_size; | |
4720 | + s->chunk_mask = chunk_size - 1; | |
4721 | + s->type = persistent; | |
4722 | + for (s->chunk_shift = 0; chunk_size; | |
4723 | + s->chunk_shift++, chunk_size >>= 1) | |
4724 | + ; | |
4725 | + s->chunk_shift--; | |
4726 | + | |
4727 | + s->valid = 1; | |
4728 | + s->have_metadata = 0; | |
4729 | + s->last_percent = 0; | |
4730 | + init_rwsem(&s->lock); | |
4731 | + s->table = ti->table; | |
4732 | + | |
4733 | + /* Allocate hash table for COW data */ | |
4734 | + if (init_hash_tables(s)) { | |
4735 | + ti->error = "Unable to allocate hash table space"; | |
4736 | + r = -ENOMEM; | |
4737 | + goto bad3; | |
4738 | + } | |
4739 | + | |
4740 | + /* | |
4741 | + * Check the persistent flag - done here because we need the iobuf | |
4742 | + * to check the LV header | |
4743 | + */ | |
4744 | + s->store.snap = s; | |
4745 | + | |
4746 | + if (persistent == 'P') | |
4747 | + r = dm_create_persistent(&s->store, s->chunk_size); | |
4748 | + else | |
4749 | + r = dm_create_transient(&s->store, s, blocksize); | |
4750 | + | |
4751 | + if (r) { | |
4752 | + ti->error = "Couldn't create exception store"; | |
4753 | + r = -EINVAL; | |
4754 | + goto bad4; | |
4755 | + } | |
4756 | + | |
4757 | + r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); | |
4758 | + if (r) { | |
4759 | + ti->error = "Could not create kcopyd client"; | |
4760 | + goto bad5; | |
4761 | + } | |
4762 | + | |
4763 | + /* Flush IO to the origin device */ | |
4764 | + fsync_dev(s->origin->dev); | |
4765 | + | |
4766 | + /* Add snapshot to the list of snapshots for this origin */ | |
4767 | + if (register_snapshot(s)) { | |
4768 | + r = -EINVAL; | |
4769 | + ti->error = "Cannot register snapshot origin"; | |
4770 | + goto bad6; | |
4771 | + } | |
4772 | + | |
4773 | + ti->private = s; | |
4774 | + return 0; | |
4775 | + | |
4776 | + bad6: | |
4777 | + kcopyd_client_destroy(s->kcopyd_client); | |
4778 | + | |
4779 | + bad5: | |
4780 | + s->store.destroy(&s->store); | |
4781 | + | |
4782 | + bad4: | |
4783 | + exit_exception_table(&s->pending, pending_cache); | |
4784 | + exit_exception_table(&s->complete, exception_cache); | |
4785 | + | |
4786 | + bad3: | |
4787 | + dm_put_device(ti, s->cow); | |
4788 | + dm_put_device(ti, s->origin); | |
4789 | + | |
4790 | + bad2: | |
4791 | + kfree(s); | |
4792 | + | |
4793 | + bad1: | |
4794 | + return r; | |
4795 | +} | |
4796 | + | |
4797 | +static void snapshot_dtr(struct dm_target *ti) | |
4798 | +{ | |
4799 | + struct dm_snapshot *s = (struct dm_snapshot *) ti->private; | |
4800 | + | |
4801 | + dm_table_event(ti->table); | |
4802 | + | |
4803 | + unregister_snapshot(s); | |
4804 | + | |
4805 | + exit_exception_table(&s->pending, pending_cache); | |
4806 | + exit_exception_table(&s->complete, exception_cache); | |
4807 | + | |
4808 | + /* Deallocate memory used */ | |
4809 | + s->store.destroy(&s->store); | |
4810 | + | |
4811 | + dm_put_device(ti, s->origin); | |
4812 | + dm_put_device(ti, s->cow); | |
4813 | + kcopyd_client_destroy(s->kcopyd_client); | |
4814 | + kfree(s); | |
4815 | +} | |
4816 | + | |
4817 | +/* | |
4818 | + * We hold lists of buffer_heads, using the b_reqnext field. | |
4819 | + */ | |
4820 | +static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh) | |
4821 | +{ | |
4822 | + bh->b_reqnext = *queue; | |
4823 | + *queue = bh; | |
4824 | +} | |
4825 | + | |
4826 | +/* | |
4827 | + * FIXME: inefficient. | |
4828 | + */ | |
4829 | +static void queue_buffers(struct buffer_head **queue, struct buffer_head *bhs) | |
4830 | +{ | |
4831 | + while (*queue) | |
4832 | + queue = &((*queue)->b_reqnext); | |
4833 | + | |
4834 | + *queue = bhs; | |
4835 | +} | |
4836 | + | |
4837 | +/* | |
4838 | + * Flush a list of buffers. | |
4839 | + */ | |
4840 | +static void flush_buffers(struct buffer_head *bh) | |
4841 | +{ | |
4842 | + struct buffer_head *n; | |
4843 | + | |
4844 | + DMDEBUG("begin flush"); | |
4845 | + while (bh) { | |
4846 | + n = bh->b_reqnext; | |
4847 | + bh->b_reqnext = NULL; | |
4848 | + DMDEBUG("flushing %p", bh); | |
4849 | + generic_make_request(WRITE, bh); | |
4850 | + bh = n; | |
4851 | + } | |
4852 | + | |
4853 | + run_task_queue(&tq_disk); | |
4854 | +} | |
4855 | + | |
4856 | +/* | |
4857 | + * Error a list of buffers. | |
4858 | + */ | |
4859 | +static void error_buffers(struct buffer_head *bh) | |
4860 | +{ | |
4861 | + struct buffer_head *n; | |
4862 | + | |
4863 | + while (bh) { | |
4864 | + n = bh->b_reqnext; | |
4865 | + bh->b_reqnext = NULL; | |
4866 | + buffer_IO_error(bh); | |
4867 | + bh = n; | |
4868 | + } | |
4869 | +} | |
4870 | + | |
4871 | +static struct buffer_head *__flush_bhs(struct pending_exception *pe) | |
4872 | +{ | |
4873 | + struct pending_exception *sibling; | |
4874 | + | |
4875 | + if (list_empty(&pe->siblings)) | |
4876 | + return pe->origin_bhs; | |
4877 | + | |
4878 | + sibling = list_entry(pe->siblings.next, | |
4879 | + struct pending_exception, siblings); | |
4880 | + | |
4881 | + list_del(&pe->siblings); | |
4882 | + | |
4883 | + /* FIXME: I think there's a race on SMP machines here, add spin lock */ | |
4884 | + queue_buffers(&sibling->origin_bhs, pe->origin_bhs); | |
4885 | + | |
4886 | + return NULL; | |
4887 | +} | |
4888 | + | |
4889 | +static void pending_complete(struct pending_exception *pe, int success) | |
4890 | +{ | |
4891 | + struct exception *e; | |
4892 | + struct dm_snapshot *s = pe->snap; | |
4893 | + struct buffer_head *flush = NULL; | |
4894 | + | |
4895 | + if (success) { | |
4896 | + e = alloc_exception(); | |
4897 | + if (!e) { | |
4898 | + DMWARN("Unable to allocate exception."); | |
4899 | + down_write(&s->lock); | |
4900 | + s->store.drop_snapshot(&s->store); | |
4901 | + s->valid = 0; | |
4902 | + flush = __flush_bhs(pe); | |
4903 | + up_write(&s->lock); | |
4904 | + | |
4905 | + error_buffers(pe->snapshot_bhs); | |
4906 | + goto out; | |
4907 | + } | |
4908 | + | |
4909 | + /* | |
4910 | + * Add a proper exception, and remove the | |
4911 | + * in-flight exception from the list. | |
4912 | + */ | |
4913 | + down_write(&s->lock); | |
4914 | + | |
4915 | + memcpy(e, &pe->e, sizeof(*e)); | |
4916 | + insert_exception(&s->complete, e); | |
4917 | + remove_exception(&pe->e); | |
4918 | + flush = __flush_bhs(pe); | |
4919 | + | |
4920 | + /* Submit any pending write BHs */ | |
4921 | + up_write(&s->lock); | |
4922 | + | |
4923 | + flush_buffers(pe->snapshot_bhs); | |
4924 | + DMDEBUG("Exception completed successfully."); | |
4925 | + | |
4926 | + /* Notify any interested parties */ | |
4927 | + if (s->store.fraction_full) { | |
4928 | + sector_t numerator, denominator; | |
4929 | + int pc; | |
4930 | + | |
4931 | + s->store.fraction_full(&s->store, &numerator, | |
4932 | + &denominator); | |
4933 | + pc = numerator * 100 / denominator; | |
4934 | + | |
4935 | + if (pc >= s->last_percent + WAKE_UP_PERCENT) { | |
4936 | + dm_table_event(s->table); | |
4937 | + s->last_percent = pc - pc % WAKE_UP_PERCENT; | |
4938 | + } | |
4939 | + } | |
4940 | + | |
4941 | + } else { | |
4942 | + /* Read/write error - snapshot is unusable */ | |
4943 | + down_write(&s->lock); | |
4944 | + if (s->valid) | |
4945 | + DMERR("Error reading/writing snapshot"); | |
4946 | + s->store.drop_snapshot(&s->store); | |
4947 | + s->valid = 0; | |
4948 | + remove_exception(&pe->e); | |
4949 | + flush = __flush_bhs(pe); | |
4950 | + up_write(&s->lock); | |
4951 | + | |
4952 | + error_buffers(pe->snapshot_bhs); | |
4953 | + | |
4954 | + dm_table_event(s->table); | |
4955 | + DMDEBUG("Exception failed."); | |
4956 | + } | |
4957 | + | |
4958 | + out: | |
4959 | + if (flush) | |
4960 | + flush_buffers(flush); | |
4961 | + | |
4962 | + free_pending_exception(pe); | |
4963 | +} | |
4964 | + | |
4965 | +static void commit_callback(void *context, int success) | |
4966 | +{ | |
4967 | + struct pending_exception *pe = (struct pending_exception *) context; | |
4968 | + pending_complete(pe, success); | |
4969 | +} | |
4970 | + | |
4971 | +/* | |
4972 | + * Called when the copy I/O has finished. kcopyd actually runs | |
4973 | + * this code so don't block. | |
4974 | + */ | |
4975 | +static void copy_callback(int read_err, unsigned int write_err, void *context) | |
4976 | +{ | |
4977 | + struct pending_exception *pe = (struct pending_exception *) context; | |
4978 | + struct dm_snapshot *s = pe->snap; | |
4979 | + | |
4980 | + if (read_err || write_err) | |
4981 | + pending_complete(pe, 0); | |
4982 | + | |
4983 | + else | |
4984 | + /* Update the metadata if we are persistent */ | |
4985 | + s->store.commit_exception(&s->store, &pe->e, commit_callback, | |
4986 | + pe); | |
4987 | +} | |
4988 | + | |
4989 | +/* | |
4990 | + * Dispatches the copy operation to kcopyd. | |
4991 | + */ | |
4992 | +static inline void start_copy(struct pending_exception *pe) | |
4993 | +{ | |
4994 | + struct dm_snapshot *s = pe->snap; | |
4995 | + struct io_region src, dest; | |
4996 | + kdev_t dev = s->origin->dev; | |
4997 | + int *sizes = blk_size[major(dev)]; | |
4998 | + sector_t dev_size = (sector_t) -1; | |
4999 | + | |
5000 | + if (pe->started) | |
5001 | + return; | |
5002 | + | |
5003 | + /* this is protected by snap->lock */ | |
5004 | + pe->started = 1; | |
5005 | + | |
5006 | + if (sizes && sizes[minor(dev)]) | |
5007 | + dev_size = sizes[minor(dev)] << 1; | |
5008 | + | |
5009 | + src.dev = dev; | |
5010 | + src.sector = chunk_to_sector(s, pe->e.old_chunk); | |
5011 | + src.count = min(s->chunk_size, dev_size - src.sector); | |
5012 | + | |
5013 | + dest.dev = s->cow->dev; | |
5014 | + dest.sector = chunk_to_sector(s, pe->e.new_chunk); | |
5015 | + dest.count = src.count; | |
5016 | + | |
5017 | + /* Hand over to kcopyd */ | |
5018 | + kcopyd_copy(s->kcopyd_client, | |
5019 | + &src, 1, &dest, 0, copy_callback, pe); | |
5020 | +} | |
5021 | + | |
5022 | +/* | |
5023 | + * Looks to see if this snapshot already has a pending exception | |
5024 | + * for this chunk, otherwise it allocates a new one and inserts | |
5025 | + * it into the pending table. | |
5026 | + */ | |
5027 | +static struct pending_exception *find_pending_exception(struct dm_snapshot *s, | |
5028 | + struct buffer_head *bh) | |
5029 | +{ | |
5030 | + struct exception *e; | |
5031 | + struct pending_exception *pe; | |
5032 | + chunk_t chunk = sector_to_chunk(s, bh->b_rsector); | |
5033 | + | |
5034 | + /* | |
5035 | + * Is there a pending exception for this already ? | |
5036 | + */ | |
5037 | + e = lookup_exception(&s->pending, chunk); | |
5038 | + if (e) { | |
5039 | + /* cast the exception to a pending exception */ | |
5040 | + pe = list_entry(e, struct pending_exception, e); | |
5041 | + | |
5042 | + } else { | |
5043 | + /* Create a new pending exception */ | |
5044 | + pe = alloc_pending_exception(); | |
5045 | + pe->e.old_chunk = chunk; | |
5046 | + pe->origin_bhs = pe->snapshot_bhs = NULL; | |
5047 | + INIT_LIST_HEAD(&pe->siblings); | |
5048 | + pe->snap = s; | |
5049 | + pe->started = 0; | |
5050 | + | |
5051 | + if (s->store.prepare_exception(&s->store, &pe->e)) { | |
5052 | + free_pending_exception(pe); | |
5053 | + s->valid = 0; | |
5054 | + return NULL; | |
5055 | + } | |
5056 | + | |
5057 | + insert_exception(&s->pending, &pe->e); | |
5058 | + } | |
5059 | + | |
5060 | + return pe; | |
5061 | +} | |
5062 | + | |
5063 | +static inline void remap_exception(struct dm_snapshot *s, struct exception *e, | |
5064 | + struct buffer_head *bh) | |
5065 | +{ | |
5066 | + bh->b_rdev = s->cow->dev; | |
5067 | + bh->b_rsector = chunk_to_sector(s, e->new_chunk) + | |
5068 | + (bh->b_rsector & s->chunk_mask); | |
5069 | +} | |
5070 | + | |
5071 | +static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw, | |
5072 | + union map_info *map_context) | |
5073 | +{ | |
5074 | + struct exception *e; | |
5075 | + struct dm_snapshot *s = (struct dm_snapshot *) ti->private; | |
5076 | + int r = 1; | |
5077 | + chunk_t chunk; | |
5078 | + struct pending_exception *pe; | |
5079 | + | |
5080 | + chunk = sector_to_chunk(s, bh->b_rsector); | |
5081 | + | |
5082 | + /* Full snapshots are not usable */ | |
5083 | + if (!s->valid) | |
5084 | + return -1; | |
5085 | + | |
5086 | + /* | |
5087 | + * Write to snapshot - higher level takes care of RW/RO | |
5088 | + * flags so we should only get this if we are | |
5089 | + * writeable. | |
5090 | + */ | |
5091 | + if (rw == WRITE) { | |
5092 | + | |
5093 | + down_write(&s->lock); | |
5094 | + | |
5095 | + /* If the block is already remapped - use that, else remap it */ | |
5096 | + e = lookup_exception(&s->complete, chunk); | |
5097 | + if (e) | |
5098 | + remap_exception(s, e, bh); | |
5099 | + | |
5100 | + else { | |
5101 | + pe = find_pending_exception(s, bh); | |
5102 | + | |
5103 | + if (!pe) { | |
5104 | + s->store.drop_snapshot(&s->store); | |
5105 | + s->valid = 0; | |
5106 | + r = -EIO; | |
5107 | + } else { | |
5108 | + remap_exception(s, &pe->e, bh); | |
5109 | + queue_buffer(&pe->snapshot_bhs, bh); | |
5110 | + start_copy(pe); | |
5111 | + r = 0; | |
5112 | + } | |
5113 | + } | |
5114 | + | |
5115 | + up_write(&s->lock); | |
5116 | + | |
5117 | + } else { | |
5118 | + /* | |
5119 | + * FIXME: this read path scares me because we | |
5120 | + * always use the origin when we have a pending | |
5121 | + * exception. However I can't think of a | |
5122 | + * situation where this is wrong - ejt. | |
5123 | + */ | |
5124 | + | |
5125 | + /* Do reads */ | |
5126 | + down_read(&s->lock); | |
5127 | + | |
5128 | + /* See if it it has been remapped */ | |
5129 | + e = lookup_exception(&s->complete, chunk); | |
5130 | + if (e) | |
5131 | + remap_exception(s, e, bh); | |
5132 | + else | |
5133 | + bh->b_rdev = s->origin->dev; | |
5134 | + | |
5135 | + up_read(&s->lock); | |
5136 | + } | |
5137 | + | |
5138 | + return r; | |
5139 | +} | |
5140 | + | |
5141 | +void snapshot_resume(struct dm_target *ti) | |
5142 | +{ | |
5143 | + struct dm_snapshot *s = (struct dm_snapshot *) ti->private; | |
5144 | + | |
5145 | + if (s->have_metadata) | |
5146 | + return; | |
5147 | + | |
5148 | + if (s->store.read_metadata(&s->store)) { | |
5149 | + down_write(&s->lock); | |
5150 | + s->valid = 0; | |
5151 | + up_write(&s->lock); | |
5152 | + } | |
5153 | + | |
5154 | + s->have_metadata = 1; | |
5155 | +} | |
5156 | + | |
5157 | +static int snapshot_status(struct dm_target *ti, status_type_t type, | |
5158 | + char *result, unsigned int maxlen) | |
5159 | +{ | |
5160 | + struct dm_snapshot *snap = (struct dm_snapshot *) ti->private; | |
5161 | + char cow[16]; | |
5162 | + char org[16]; | |
5163 | + | |
5164 | + switch (type) { | |
5165 | + case STATUSTYPE_INFO: | |
5166 | + if (!snap->valid) | |
5167 | + snprintf(result, maxlen, "Invalid"); | |
5168 | + else { | |
5169 | + if (snap->store.fraction_full) { | |
5170 | + sector_t numerator, denominator; | |
5171 | + snap->store.fraction_full(&snap->store, | |
5172 | + &numerator, | |
5173 | + &denominator); | |
5174 | + snprintf(result, maxlen, | |
5175 | + SECTOR_FORMAT "/" SECTOR_FORMAT, | |
5176 | + numerator, denominator); | |
5177 | + } | |
5178 | + else | |
5179 | + snprintf(result, maxlen, "Unknown"); | |
5180 | + } | |
5181 | + break; | |
5182 | + | |
5183 | + case STATUSTYPE_TABLE: | |
5184 | + /* | |
5185 | + * kdevname returns a static pointer so we need | |
5186 | + * to make private copies if the output is to | |
5187 | + * make sense. | |
5188 | + */ | |
5189 | + strncpy(cow, dm_kdevname(snap->cow->dev), sizeof(cow)); | |
5190 | + strncpy(org, dm_kdevname(snap->origin->dev), sizeof(org)); | |
5191 | + snprintf(result, maxlen, "%s %s %c %ld", org, cow, | |
5192 | + snap->type, snap->chunk_size); | |
5193 | + break; | |
5194 | + } | |
5195 | + | |
5196 | + return 0; | |
5197 | +} | |
5198 | + | |
5199 | +/*----------------------------------------------------------------- | |
5200 | + * Origin methods | |
5201 | + *---------------------------------------------------------------*/ | |
5202 | +static void list_merge(struct list_head *l1, struct list_head *l2) | |
5203 | +{ | |
5204 | + struct list_head *l1_n, *l2_p; | |
5205 | + | |
5206 | + l1_n = l1->next; | |
5207 | + l2_p = l2->prev; | |
5208 | + | |
5209 | + l1->next = l2; | |
5210 | + l2->prev = l1; | |
5211 | + | |
5212 | + l2_p->next = l1_n; | |
5213 | + l1_n->prev = l2_p; | |
5214 | +} | |
5215 | + | |
5216 | +static int __origin_write(struct list_head *snapshots, struct buffer_head *bh) | |
5217 | +{ | |
5218 | + int r = 1, first = 1; | |
5219 | + struct list_head *sl; | |
5220 | + struct dm_snapshot *snap; | |
5221 | + struct exception *e; | |
5222 | + struct pending_exception *pe, *last = NULL; | |
5223 | + chunk_t chunk; | |
5224 | + | |
5225 | + /* Do all the snapshots on this origin */ | |
5226 | + list_for_each(sl, snapshots) { | |
5227 | + snap = list_entry(sl, struct dm_snapshot, list); | |
5228 | + | |
5229 | + /* Only deal with valid snapshots */ | |
5230 | + if (!snap->valid) | |
5231 | + continue; | |
5232 | + | |
5233 | + down_write(&snap->lock); | |
5234 | + | |
5235 | + /* | |
5236 | + * Remember, different snapshots can have | |
5237 | + * different chunk sizes. | |
5238 | + */ | |
5239 | + chunk = sector_to_chunk(snap, bh->b_rsector); | |
5240 | + | |
5241 | + /* | |
5242 | + * Check exception table to see if block | |
5243 | + * is already remapped in this snapshot | |
5244 | + * and trigger an exception if not. | |
5245 | + */ | |
5246 | + e = lookup_exception(&snap->complete, chunk); | |
5247 | + if (!e) { | |
5248 | + pe = find_pending_exception(snap, bh); | |
5249 | + if (!pe) { | |
5250 | + snap->store.drop_snapshot(&snap->store); | |
5251 | + snap->valid = 0; | |
5252 | + | |
5253 | + } else { | |
5254 | + if (last) | |
5255 | + list_merge(&pe->siblings, | |
5256 | + &last->siblings); | |
5257 | + | |
5258 | + last = pe; | |
5259 | + r = 0; | |
5260 | + } | |
5261 | + } | |
5262 | + | |
5263 | + up_write(&snap->lock); | |
5264 | + } | |
5265 | + | |
5266 | + /* | |
5267 | + * Now that we have a complete pe list we can start the copying. | |
5268 | + */ | |
5269 | + if (last) { | |
5270 | + pe = last; | |
5271 | + do { | |
5272 | + down_write(&pe->snap->lock); | |
5273 | + if (first) | |
5274 | + queue_buffer(&pe->origin_bhs, bh); | |
5275 | + start_copy(pe); | |
5276 | + up_write(&pe->snap->lock); | |
5277 | + first = 0; | |
5278 | + pe = list_entry(pe->siblings.next, | |
5279 | + struct pending_exception, siblings); | |
5280 | + | |
5281 | + } while (pe != last); | |
5282 | + } | |
5283 | + | |
5284 | + return r; | |
5285 | +} | |
5286 | + | |
5287 | +/* | |
5288 | + * Called on a write from the origin driver. | |
5289 | + */ | |
5290 | +int do_origin(struct dm_dev *origin, struct buffer_head *bh) | |
5291 | +{ | |
5292 | + struct origin *o; | |
5293 | + int r; | |
5294 | + | |
5295 | + down_read(&_origins_lock); | |
5296 | + o = __lookup_origin(origin->dev); | |
5297 | + if (!o) | |
5298 | + BUG(); | |
5299 | + | |
5300 | + r = __origin_write(&o->snapshots, bh); | |
5301 | + up_read(&_origins_lock); | |
5302 | + | |
5303 | + return r; | |
5304 | +} | |
5305 | + | |
5306 | +/* | |
5307 | + * Origin: maps a linear range of a device, with hooks for snapshotting. | |
5308 | + */ | |
5309 | + | |
5310 | +/* | |
5311 | + * Construct an origin mapping: <dev_path> | |
5312 | + * The context for an origin is merely a 'struct dm_dev *' | |
5313 | + * pointing to the real device. | |
5314 | + */ | |
5315 | +static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |
5316 | +{ | |
5317 | + int r; | |
5318 | + struct dm_dev *dev; | |
5319 | + | |
5320 | + if (argc != 1) { | |
5321 | + ti->error = "dm-origin: incorrect number of arguments"; | |
5322 | + return -EINVAL; | |
5323 | + } | |
5324 | + | |
5325 | + r = dm_get_device(ti, argv[0], 0, ti->len, | |
5326 | + dm_table_get_mode(ti->table), &dev); | |
5327 | + if (r) { | |
5328 | + ti->error = "Cannot get target device"; | |
5329 | + return r; | |
5330 | + } | |
5331 | + | |
5332 | + ti->private = dev; | |
5333 | + return 0; | |
5334 | +} | |
5335 | + | |
5336 | +static void origin_dtr(struct dm_target *ti) | |
5337 | +{ | |
5338 | + struct dm_dev *dev = (struct dm_dev *) ti->private; | |
5339 | + dm_put_device(ti, dev); | |
5340 | +} | |
5341 | + | |
5342 | +static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw, | |
5343 | + union map_info *map_context) | |
5344 | +{ | |
5345 | + struct dm_dev *dev = (struct dm_dev *) ti->private; | |
5346 | + bh->b_rdev = dev->dev; | |
5347 | + | |
5348 | + /* Only tell snapshots if this is a write */ | |
5349 | + return (rw == WRITE) ? do_origin(dev, bh) : 1; | |
5350 | +} | |
5351 | + | |
5352 | +static int origin_status(struct dm_target *ti, status_type_t type, char *result, | |
5353 | + unsigned int maxlen) | |
5354 | +{ | |
5355 | + struct dm_dev *dev = (struct dm_dev *) ti->private; | |
5356 | + | |
5357 | + switch (type) { | |
5358 | + case STATUSTYPE_INFO: | |
5359 | + result[0] = '\0'; | |
5360 | + break; | |
5361 | + | |
5362 | + case STATUSTYPE_TABLE: | |
5363 | + snprintf(result, maxlen, "%s", dm_kdevname(dev->dev)); | |
5364 | + break; | |
5365 | + } | |
5366 | + | |
5367 | + return 0; | |
5368 | +} | |
5369 | + | |
5370 | +static struct target_type origin_target = { | |
5371 | + name: "snapshot-origin", | |
5372 | + module: THIS_MODULE, | |
5373 | + ctr: origin_ctr, | |
5374 | + dtr: origin_dtr, | |
5375 | + map: origin_map, | |
5376 | + status: origin_status, | |
5377 | +}; | |
5378 | + | |
5379 | +static struct target_type snapshot_target = { | |
5380 | + name: "snapshot", | |
5381 | + module: THIS_MODULE, | |
5382 | + ctr: snapshot_ctr, | |
5383 | + dtr: snapshot_dtr, | |
5384 | + map: snapshot_map, | |
5385 | + resume: snapshot_resume, | |
5386 | + status: snapshot_status, | |
5387 | +}; | |
5388 | + | |
5389 | +int __init dm_snapshot_init(void) | |
5390 | +{ | |
5391 | + int r; | |
5392 | + | |
5393 | + r = dm_register_target(&snapshot_target); | |
5394 | + if (r) { | |
5395 | + DMERR("snapshot target register failed %d", r); | |
5396 | + return r; | |
5397 | + } | |
5398 | + | |
5399 | + r = dm_register_target(&origin_target); | |
5400 | + if (r < 0) { | |
5401 | + DMERR("Device mapper: Origin: register failed %d\n", r); | |
5402 | + goto bad1; | |
5403 | + } | |
5404 | + | |
5405 | + r = init_origin_hash(); | |
5406 | + if (r) { | |
5407 | + DMERR("init_origin_hash failed."); | |
5408 | + goto bad2; | |
5409 | + } | |
5410 | + | |
5411 | + exception_cache = kmem_cache_create("dm-snapshot-ex", | |
5412 | + sizeof(struct exception), | |
5413 | + __alignof__(struct exception), | |
5414 | + 0, NULL, NULL); | |
5415 | + if (!exception_cache) { | |
5416 | + DMERR("Couldn't create exception cache."); | |
5417 | + r = -ENOMEM; | |
5418 | + goto bad3; | |
5419 | + } | |
5420 | + | |
5421 | + pending_cache = | |
5422 | + kmem_cache_create("dm-snapshot-in", | |
5423 | + sizeof(struct pending_exception), | |
5424 | + __alignof__(struct pending_exception), | |
5425 | + 0, NULL, NULL); | |
5426 | + if (!pending_cache) { | |
5427 | + DMERR("Couldn't create pending cache."); | |
5428 | + r = -ENOMEM; | |
5429 | + goto bad4; | |
5430 | + } | |
5431 | + | |
5432 | + pending_pool = mempool_create(128, mempool_alloc_slab, | |
5433 | + mempool_free_slab, pending_cache); | |
5434 | + if (!pending_pool) { | |
5435 | + DMERR("Couldn't create pending pool."); | |
5436 | + r = -ENOMEM; | |
5437 | + goto bad5; | |
5438 | + } | |
5439 | + | |
5440 | + return 0; | |
5441 | + | |
5442 | + bad5: | |
5443 | + kmem_cache_destroy(pending_cache); | |
5444 | + bad4: | |
5445 | + kmem_cache_destroy(exception_cache); | |
5446 | + bad3: | |
5447 | + exit_origin_hash(); | |
5448 | + bad2: | |
5449 | + dm_unregister_target(&origin_target); | |
5450 | + bad1: | |
5451 | + dm_unregister_target(&snapshot_target); | |
5452 | + return r; | |
5453 | +} | |
5454 | + | |
5455 | +void dm_snapshot_exit(void) | |
5456 | +{ | |
5457 | + int r; | |
5458 | + | |
5459 | + r = dm_unregister_target(&snapshot_target); | |
5460 | + if (r) | |
5461 | + DMERR("snapshot unregister failed %d", r); | |
5462 | + | |
5463 | + r = dm_unregister_target(&origin_target); | |
5464 | + if (r) | |
5465 | + DMERR("origin unregister failed %d", r); | |
5466 | + | |
5467 | + exit_origin_hash(); | |
5468 | + mempool_destroy(pending_pool); | |
5469 | + kmem_cache_destroy(pending_cache); | |
5470 | + kmem_cache_destroy(exception_cache); | |
5471 | +} | |
5472 | diff -urN linux-2.4.24.org/drivers/md/dm-snapshot.h linux-2.4.24/drivers/md/dm-snapshot.h | |
5473 | --- linux-2.4.24.org/drivers/md/dm-snapshot.h 1970-01-01 01:00:00.000000000 +0100 | |
5474 | +++ linux-2.4.24/drivers/md/dm-snapshot.h 2004-01-18 15:01:29.250465221 +0100 | |
5475 | @@ -0,0 +1,158 @@ | |
5476 | +/* | |
5477 | + * dm-snapshot.c | |
5478 | + * | |
5479 | + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. | |
5480 | + * | |
5481 | + * This file is released under the GPL. | |
5482 | + */ | |
5483 | + | |
5484 | +#ifndef DM_SNAPSHOT_H | |
5485 | +#define DM_SNAPSHOT_H | |
5486 | + | |
5487 | +#include "dm.h" | |
5488 | +#include <linux/blkdev.h> | |
5489 | + | |
5490 | +struct exception_table { | |
5491 | + uint32_t hash_mask; | |
5492 | + struct list_head *table; | |
5493 | +}; | |
5494 | + | |
5495 | +/* | |
5496 | + * The snapshot code deals with largish chunks of the disk at a | |
5497 | + * time. Typically 64k - 256k. | |
5498 | + */ | |
5499 | +/* FIXME: can we get away with limiting these to a uint32_t ? */ | |
5500 | +typedef sector_t chunk_t; | |
5501 | + | |
5502 | +/* | |
5503 | + * An exception is used where an old chunk of data has been | |
5504 | + * replaced by a new one. | |
5505 | + */ | |
5506 | +struct exception { | |
5507 | + struct list_head hash_list; | |
5508 | + | |
5509 | + chunk_t old_chunk; | |
5510 | + chunk_t new_chunk; | |
5511 | +}; | |
5512 | + | |
5513 | +/* | |
5514 | + * Abstraction to handle the meta/layout of exception stores (the | |
5515 | + * COW device). | |
5516 | + */ | |
5517 | +struct exception_store { | |
5518 | + | |
5519 | + /* | |
5520 | + * Destroys this object when you've finished with it. | |
5521 | + */ | |
5522 | + void (*destroy) (struct exception_store *store); | |
5523 | + | |
5524 | + /* | |
5525 | + * The target shouldn't read the COW device until this is | |
5526 | + * called. | |
5527 | + */ | |
5528 | + int (*read_metadata) (struct exception_store *store); | |
5529 | + | |
5530 | + /* | |
5531 | + * Find somewhere to store the next exception. | |
5532 | + */ | |
5533 | + int (*prepare_exception) (struct exception_store *store, | |
5534 | + struct exception *e); | |
5535 | + | |
5536 | + /* | |
5537 | + * Update the metadata with this exception. | |
5538 | + */ | |
5539 | + void (*commit_exception) (struct exception_store *store, | |
5540 | + struct exception *e, | |
5541 | + void (*callback) (void *, int success), | |
5542 | + void *callback_context); | |
5543 | + | |
5544 | + /* | |
5545 | + * The snapshot is invalid, note this in the metadata. | |
5546 | + */ | |
5547 | + void (*drop_snapshot) (struct exception_store *store); | |
5548 | + | |
5549 | + /* | |
5550 | + * Return how full the snapshot is. | |
5551 | + */ | |
5552 | + void (*fraction_full) (struct exception_store *store, | |
5553 | + sector_t *numerator, | |
5554 | + sector_t *denominator); | |
5555 | + | |
5556 | + struct dm_snapshot *snap; | |
5557 | + void *context; | |
5558 | +}; | |
5559 | + | |
5560 | +struct dm_snapshot { | |
5561 | + struct rw_semaphore lock; | |
5562 | + struct dm_table *table; | |
5563 | + | |
5564 | + struct dm_dev *origin; | |
5565 | + struct dm_dev *cow; | |
5566 | + | |
5567 | + /* List of snapshots per Origin */ | |
5568 | + struct list_head list; | |
5569 | + | |
5570 | + /* Size of data blocks saved - must be a power of 2 */ | |
5571 | + chunk_t chunk_size; | |
5572 | + chunk_t chunk_mask; | |
5573 | + chunk_t chunk_shift; | |
5574 | + | |
5575 | + /* You can't use a snapshot if this is 0 (e.g. if full) */ | |
5576 | + int valid; | |
5577 | + int have_metadata; | |
5578 | + | |
5579 | + /* Used for display of table */ | |
5580 | + char type; | |
5581 | + | |
5582 | + /* The last percentage we notified */ | |
5583 | + int last_percent; | |
5584 | + | |
5585 | + struct exception_table pending; | |
5586 | + struct exception_table complete; | |
5587 | + | |
5588 | + /* The on disk metadata handler */ | |
5589 | + struct exception_store store; | |
5590 | + | |
5591 | + struct kcopyd_client *kcopyd_client; | |
5592 | +}; | |
5593 | + | |
5594 | +/* | |
5595 | + * Used by the exception stores to load exceptions hen | |
5596 | + * initialising. | |
5597 | + */ | |
5598 | +int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); | |
5599 | + | |
5600 | +/* | |
5601 | + * Constructor and destructor for the default persistent | |
5602 | + * store. | |
5603 | + */ | |
5604 | +int dm_create_persistent(struct exception_store *store, uint32_t chunk_size); | |
5605 | + | |
5606 | +int dm_create_transient(struct exception_store *store, | |
5607 | + struct dm_snapshot *s, int blocksize); | |
5608 | + | |
5609 | +/* | |
5610 | + * Return the number of sectors in the device. | |
5611 | + */ | |
5612 | +static inline sector_t get_dev_size(kdev_t dev) | |
5613 | +{ | |
5614 | + int *sizes; | |
5615 | + | |
5616 | + sizes = blk_size[MAJOR(dev)]; | |
5617 | + if (sizes) | |
5618 | + return sizes[MINOR(dev)] << 1; | |
5619 | + | |
5620 | + return 0; | |
5621 | +} | |
5622 | + | |
5623 | +static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector) | |
5624 | +{ | |
5625 | + return (sector & ~s->chunk_mask) >> s->chunk_shift; | |
5626 | +} | |
5627 | + | |
5628 | +static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) | |
5629 | +{ | |
5630 | + return chunk << s->chunk_shift; | |
5631 | +} | |
5632 | + | |
5633 | +#endif | |
5634 | diff -urN linux-2.4.24.org/drivers/md/dm-stripe.c linux-2.4.24/drivers/md/dm-stripe.c | |
5635 | --- linux-2.4.24.org/drivers/md/dm-stripe.c 1970-01-01 01:00:00.000000000 +0100 | |
5636 | +++ linux-2.4.24/drivers/md/dm-stripe.c 2004-01-18 15:01:13.781711369 +0100 | |
5637 | @@ -0,0 +1,258 @@ | |
5638 | +/* | |
5639 | + * Copyright (C) 2001 Sistina Software (UK) Limited. | |
5640 | + * | |
5641 | + * This file is released under the GPL. | |
5642 | + */ | |
5643 | + | |
5644 | +#include "dm.h" | |
5645 | + | |
5646 | +#include <linux/module.h> | |
5647 | +#include <linux/init.h> | |
5648 | +#include <linux/blkdev.h> | |
5649 | +#include <linux/slab.h> | |
5650 | + | |
5651 | +struct stripe { | |
5652 | + struct dm_dev *dev; | |
5653 | + sector_t physical_start; | |
5654 | +}; | |
5655 | + | |
5656 | +struct stripe_c { | |
5657 | + uint32_t stripes; | |
5658 | + | |
5659 | + /* The size of this target / num. stripes */ | |
5660 | + uint32_t stripe_width; | |
5661 | + | |
5662 | + /* stripe chunk size */ | |
5663 | + uint32_t chunk_shift; | |
5664 | + sector_t chunk_mask; | |
5665 | + | |
5666 | + struct stripe stripe[0]; | |
5667 | +}; | |
5668 | + | |
5669 | +static inline struct stripe_c *alloc_context(unsigned int stripes) | |
5670 | +{ | |
5671 | + size_t len; | |
5672 | + | |
5673 | + if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe), | |
5674 | + stripes)) | |
5675 | + return NULL; | |
5676 | + | |
5677 | + len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes); | |
5678 | + | |
5679 | + return kmalloc(len, GFP_KERNEL); | |
5680 | +} | |
5681 | + | |
5682 | +/* | |
5683 | + * Parse a single <dev> <sector> pair | |
5684 | + */ | |
5685 | +static int get_stripe(struct dm_target *ti, struct stripe_c *sc, | |
5686 | + unsigned int stripe, char **argv) | |
5687 | +{ | |
5688 | + sector_t start; | |
5689 | + | |
5690 | + if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1) | |
5691 | + return -EINVAL; | |
5692 | + | |
5693 | + if (dm_get_device(ti, argv[0], start, sc->stripe_width, | |
5694 | + dm_table_get_mode(ti->table), | |
5695 | + &sc->stripe[stripe].dev)) | |
5696 | + return -ENXIO; | |
5697 | + | |
5698 | + sc->stripe[stripe].physical_start = start; | |
5699 | + return 0; | |
5700 | +} | |
5701 | + | |
5702 | +/* | |
5703 | + * FIXME: Nasty function, only present because we can't link | |
5704 | + * against __moddi3 and __divdi3. | |
5705 | + * | |
5706 | + * returns a == b * n | |
5707 | + */ | |
5708 | +static int multiple(sector_t a, sector_t b, sector_t *n) | |
5709 | +{ | |
5710 | + sector_t acc, prev, i; | |
5711 | + | |
5712 | + *n = 0; | |
5713 | + while (a >= b) { | |
5714 | + for (acc = b, prev = 0, i = 1; | |
5715 | + acc <= a; | |
5716 | + prev = acc, acc <<= 1, i <<= 1) | |
5717 | + ; | |
5718 | + | |
5719 | + a -= prev; | |
5720 | + *n += i >> 1; | |
5721 | + } | |
5722 | + | |
5723 | + return a == 0; | |
5724 | +} | |
5725 | + | |
5726 | +/* | |
5727 | + * Construct a striped mapping. | |
5728 | + * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+ | |
5729 | + */ | |
5730 | +static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) | |
5731 | +{ | |
5732 | + struct stripe_c *sc; | |
5733 | + sector_t width; | |
5734 | + uint32_t stripes; | |
5735 | + uint32_t chunk_size; | |
5736 | + char *end; | |
5737 | + int r; | |
5738 | + unsigned int i; | |
5739 | + | |
5740 | + if (argc < 2) { | |
5741 | + ti->error = "dm-stripe: Not enough arguments"; | |
5742 | + return -EINVAL; | |
5743 | + } | |
5744 | + | |
5745 | + stripes = simple_strtoul(argv[0], &end, 10); | |
5746 | + if (*end) { | |
5747 | + ti->error = "dm-stripe: Invalid stripe count"; | |
5748 | + return -EINVAL; | |
5749 | + } | |
5750 | + | |
5751 | + chunk_size = simple_strtoul(argv[1], &end, 10); | |
5752 | + if (*end) { | |
5753 | + ti->error = "dm-stripe: Invalid chunk_size"; | |
5754 | + return -EINVAL; | |
5755 | + } | |
5756 | + | |
5757 | + /* | |
5758 | + * chunk_size is a power of two | |
5759 | + */ | |
5760 | + if (!chunk_size || (chunk_size & (chunk_size - 1))) { | |
5761 | + ti->error = "dm-stripe: Invalid chunk size"; | |
5762 | + return -EINVAL; | |
5763 | + } | |
5764 | + | |
5765 | + if (!multiple(ti->len, stripes, &width)) { | |
5766 | + ti->error = "dm-stripe: Target length not divisable by " | |
5767 | + "number of stripes"; | |
5768 | + return -EINVAL; | |
5769 | + } | |
5770 | + | |
5771 | + /* | |
5772 | + * Do we have enough arguments for that many stripes ? | |
5773 | + */ | |
5774 | + if (argc != (2 + 2 * stripes)) { | |
5775 | + ti->error = "dm-stripe: Not enough destinations specified"; | |
5776 | + return -EINVAL; | |
5777 | + } | |
5778 | + | |
5779 | + sc = alloc_context(stripes); | |
5780 | + if (!sc) { | |
5781 | + ti->error = "dm-stripe: Memory allocation for striped context " | |
5782 | + "failed"; | |
5783 | + return -ENOMEM; | |
5784 | + } | |
5785 | + | |
5786 | + sc->stripes = stripes; | |
5787 | + sc->stripe_width = width; | |
5788 | + | |
5789 | + sc->chunk_mask = ((sector_t) chunk_size) - 1; | |
5790 | + for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++) | |
5791 | + chunk_size >>= 1; | |
5792 | + sc->chunk_shift--; | |
5793 | + | |
5794 | + /* | |
5795 | + * Get the stripe destinations. | |
5796 | + */ | |
5797 | + for (i = 0; i < stripes; i++) { | |
5798 | + argv += 2; | |
5799 | + | |
5800 | + r = get_stripe(ti, sc, i, argv); | |
5801 | + if (r < 0) { | |
5802 | + ti->error = "dm-stripe: Couldn't parse stripe " | |
5803 | + "destination"; | |
5804 | + while (i--) | |
5805 | + dm_put_device(ti, sc->stripe[i].dev); | |
5806 | + kfree(sc); | |
5807 | + return r; | |
5808 | + } | |
5809 | + } | |
5810 | + | |
5811 | + ti->private = sc; | |
5812 | + return 0; | |
5813 | +} | |
5814 | + | |
5815 | +static void stripe_dtr(struct dm_target *ti) | |
5816 | +{ | |
5817 | + unsigned int i; | |
5818 | + struct stripe_c *sc = (struct stripe_c *) ti->private; | |
5819 | + | |
5820 | + for (i = 0; i < sc->stripes; i++) | |
5821 | + dm_put_device(ti, sc->stripe[i].dev); | |
5822 | + | |
5823 | + kfree(sc); | |
5824 | +} | |
5825 | + | |
5826 | +static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw, | |
5827 | + union map_info *context) | |
5828 | +{ | |
5829 | + struct stripe_c *sc = (struct stripe_c *) ti->private; | |
5830 | + | |
5831 | + sector_t offset = bh->b_rsector - ti->begin; | |
5832 | + uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift); | |
5833 | + uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */ | |
5834 | + chunk = chunk / sc->stripes; | |
5835 | + | |
5836 | + bh->b_rdev = sc->stripe[stripe].dev->dev; | |
5837 | + bh->b_rsector = sc->stripe[stripe].physical_start + | |
5838 | + (chunk << sc->chunk_shift) + (offset & sc->chunk_mask); | |
5839 | + return 1; | |
5840 | +} | |
5841 | + | |
5842 | +static int stripe_status(struct dm_target *ti, status_type_t type, | |
5843 | + char *result, unsigned int maxlen) | |
5844 | +{ | |
5845 | + struct stripe_c *sc = (struct stripe_c *) ti->private; | |
5846 | + int offset; | |
5847 | + unsigned int i; | |
5848 | + | |
5849 | + switch (type) { | |
5850 | + case STATUSTYPE_INFO: | |
5851 | + result[0] = '\0'; | |
5852 | + break; | |
5853 | + | |
5854 | + case STATUSTYPE_TABLE: | |
5855 | + offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT, | |
5856 | + sc->stripes, sc->chunk_mask + 1); | |
5857 | + for (i = 0; i < sc->stripes; i++) { | |
5858 | + offset += | |
5859 | + snprintf(result + offset, maxlen - offset, | |
5860 | + " %s " SECTOR_FORMAT, | |
5861 | + dm_kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)), | |
5862 | + sc->stripe[i].physical_start); | |
5863 | + } | |
5864 | + break; | |
5865 | + } | |
5866 | + return 0; | |
5867 | +} | |
5868 | + | |
5869 | +static struct target_type stripe_target = { | |
5870 | + .name = "striped", | |
5871 | + .module = THIS_MODULE, | |
5872 | + .ctr = stripe_ctr, | |
5873 | + .dtr = stripe_dtr, | |
5874 | + .map = stripe_map, | |
5875 | + .status = stripe_status, | |
5876 | +}; | |
5877 | + | |
5878 | +int __init dm_stripe_init(void) | |
5879 | +{ | |
5880 | + int r; | |
5881 | + | |
5882 | + r = dm_register_target(&stripe_target); | |
5883 | + if (r < 0) | |
5884 | + DMWARN("striped target registration failed"); | |
5885 | + | |
5886 | + return r; | |
5887 | +} | |
5888 | + | |
5889 | +void dm_stripe_exit(void) | |
5890 | +{ | |
5891 | + if (dm_unregister_target(&stripe_target)) | |
5892 | + DMWARN("striped target unregistration failed"); | |
5893 | + | |
5894 | + return; | |
5895 | +} | |
5896 | diff -urN linux-2.4.24.org/drivers/md/dm-table.c linux-2.4.24/drivers/md/dm-table.c | |
5897 | --- linux-2.4.24.org/drivers/md/dm-table.c 1970-01-01 01:00:00.000000000 +0100 | |
5898 | +++ linux-2.4.24/drivers/md/dm-table.c 2004-01-18 15:01:13.786710320 +0100 | |
5899 | @@ -0,0 +1,696 @@ | |
5900 | +/* | |
5901 | + * Copyright (C) 2001 Sistina Software (UK) Limited. | |
5902 | + * | |
5903 | + * This file is released under the GPL. | |
5904 | + */ | |
5905 | + | |
5906 | +#include "dm.h" | |
5907 | + | |
5908 | +#include <linux/module.h> | |
5909 | +#include <linux/vmalloc.h> | |
5910 | +#include <linux/blkdev.h> | |
5911 | +#include <linux/ctype.h> | |
5912 | +#include <linux/slab.h> | |
5913 | +#include <asm/atomic.h> | |
5914 | + | |
5915 | +#define MAX_DEPTH 16 | |
5916 | +#define NODE_SIZE L1_CACHE_BYTES | |
5917 | +#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) | |
5918 | +#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) | |
5919 | + | |
5920 | +struct dm_table { | |
5921 | + atomic_t holders; | |
5922 | + | |
5923 | + /* btree table */ | |
5924 | + unsigned int depth; | |
5925 | + unsigned int counts[MAX_DEPTH]; /* in nodes */ | |
5926 | + sector_t *index[MAX_DEPTH]; | |
5927 | + | |
5928 | + unsigned int num_targets; | |
5929 | + unsigned int num_allocated; | |
5930 | + sector_t *highs; | |
5931 | + struct dm_target *targets; | |
5932 | + | |
5933 | + /* | |
5934 | + * Indicates the rw permissions for the new logical | |
5935 | + * device. This should be a combination of FMODE_READ | |
5936 | + * and FMODE_WRITE. | |
5937 | + */ | |
5938 | + int mode; | |
5939 | + | |
5940 | + /* a list of devices used by this table */ | |
5941 | + struct list_head devices; | |
5942 | + | |
5943 | + /* events get handed up using this callback */ | |
5944 | + void (*event_fn)(void *); | |
5945 | + void *event_context; | |
5946 | +}; | |
5947 | + | |
5948 | +/* | |
5949 | + * Similar to ceiling(log_size(n)) | |
5950 | + */ | |
5951 | +static unsigned int int_log(unsigned long n, unsigned long base) | |
5952 | +{ | |
5953 | + int result = 0; | |
5954 | + | |
5955 | + while (n > 1) { | |
5956 | + n = dm_div_up(n, base); | |
5957 | + result++; | |
5958 | + } | |
5959 | + | |
5960 | + return result; | |
5961 | +} | |
5962 | + | |
5963 | +/* | |
5964 | + * Calculate the index of the child node of the n'th node k'th key. | |
5965 | + */ | |
5966 | +static inline unsigned int get_child(unsigned int n, unsigned int k) | |
5967 | +{ | |
5968 | + return (n * CHILDREN_PER_NODE) + k; | |
5969 | +} | |
5970 | + | |
5971 | +/* | |
5972 | + * Return the n'th node of level l from table t. | |
5973 | + */ | |
5974 | +static inline sector_t *get_node(struct dm_table *t, unsigned int l, | |
5975 | + unsigned int n) | |
5976 | +{ | |
5977 | + return t->index[l] + (n * KEYS_PER_NODE); | |
5978 | +} | |
5979 | + | |
5980 | +/* | |
5981 | + * Return the highest key that you could lookup from the n'th | |
5982 | + * node on level l of the btree. | |
5983 | + */ | |
5984 | +static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) | |
5985 | +{ | |
5986 | + for (; l < t->depth - 1; l++) | |
5987 | + n = get_child(n, CHILDREN_PER_NODE - 1); | |
5988 | + | |
5989 | + if (n >= t->counts[l]) | |
5990 | + return (sector_t) - 1; | |
5991 | + | |
5992 | + return get_node(t, l, n)[KEYS_PER_NODE - 1]; | |
5993 | +} | |
5994 | + | |
5995 | +/* | |
5996 | + * Fills in a level of the btree based on the highs of the level | |
5997 | + * below it. | |
5998 | + */ | |
5999 | +static int setup_btree_index(unsigned int l, struct dm_table *t) | |
6000 | +{ | |
6001 | + unsigned int n, k; | |
6002 | + sector_t *node; | |
6003 | + | |
6004 | + for (n = 0U; n < t->counts[l]; n++) { | |
6005 | + node = get_node(t, l, n); | |
6006 | + | |
6007 | + for (k = 0U; k < KEYS_PER_NODE; k++) | |
6008 | + node[k] = high(t, l + 1, get_child(n, k)); | |
6009 | + } | |
6010 | + | |
6011 | + return 0; | |
6012 | +} | |
6013 | + | |
6014 | +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) | |
6015 | +{ | |
6016 | + unsigned long size; | |
6017 | + void *addr; | |
6018 | + | |
6019 | + /* | |
6020 | + * Check that we're not going to overflow. | |
6021 | + */ | |
6022 | + if (nmemb > (ULONG_MAX / elem_size)) | |
6023 | + return NULL; | |
6024 | + | |
6025 | + size = nmemb * elem_size; | |
6026 | + addr = vmalloc(size); | |
6027 | + if (addr) | |
6028 | + memset(addr, 0, size); | |
6029 | + | |
6030 | + return addr; | |
6031 | +} | |
6032 | + | |
6033 | +int dm_table_create(struct dm_table **result, int mode, unsigned num_targets) | |
6034 | +{ | |
6035 | + struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL); | |
6036 | + | |
6037 | + if (!t) | |
6038 | + return -ENOMEM; | |
6039 | + | |
6040 | + memset(t, 0, sizeof(*t)); | |
6041 | + INIT_LIST_HEAD(&t->devices); | |
6042 | + atomic_set(&t->holders, 1); | |
6043 | + | |
6044 | + num_targets = dm_round_up(num_targets, KEYS_PER_NODE); | |
6045 | + | |
6046 | + /* Allocate both the target array and offset array at once. */ | |
6047 | + t->highs = (sector_t *) dm_vcalloc(sizeof(struct dm_target) + | |
6048 | + sizeof(sector_t), num_targets); | |
6049 | + if (!t->highs) { | |
6050 | + kfree(t); | |
6051 | + return -ENOMEM; | |
6052 | + } | |
6053 | + | |
6054 | + memset(t->highs, -1, sizeof(*t->highs) * num_targets); | |
6055 | + | |
6056 | + t->targets = (struct dm_target *) (t->highs + num_targets); | |
6057 | + t->num_allocated = num_targets; | |
6058 | + t->mode = mode; | |
6059 | + *result = t; | |
6060 | + return 0; | |
6061 | +} | |
6062 | + | |
6063 | +static void free_devices(struct list_head *devices) | |
6064 | +{ | |
6065 | + struct list_head *tmp, *next; | |
6066 | + | |
6067 | + for (tmp = devices->next; tmp != devices; tmp = next) { | |
6068 | + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); | |
6069 | + next = tmp->next; | |
6070 | + kfree(dd); | |
6071 | + } | |
6072 | +} | |
6073 | + | |
6074 | +void table_destroy(struct dm_table *t) | |
6075 | +{ | |
6076 | + unsigned int i; | |
6077 | + | |
6078 | + /* free the indexes (see dm_table_complete) */ | |
6079 | + if (t->depth >= 2) | |
6080 | + vfree(t->index[t->depth - 2]); | |
6081 | + | |
6082 | + /* free the targets */ | |
6083 | + for (i = 0; i < t->num_targets; i++) { | |
6084 | + struct dm_target *tgt = t->targets + i; | |
6085 | + | |
6086 | + if (tgt->type->dtr) | |
6087 | + tgt->type->dtr(tgt); | |
6088 | + | |
6089 | + dm_put_target_type(tgt->type); | |
6090 | + } | |
6091 | + | |
6092 | + vfree(t->highs); | |
6093 | + | |
6094 | + /* free the device list */ | |
6095 | + if (t->devices.next != &t->devices) { | |
6096 | + DMWARN("devices still present during destroy: " | |
6097 | + "dm_table_remove_device calls missing"); | |
6098 | + | |
6099 | + free_devices(&t->devices); | |
6100 | + } | |
6101 | + | |
6102 | + kfree(t); | |
6103 | +} | |
6104 | + | |
6105 | +void dm_table_get(struct dm_table *t) | |
6106 | +{ | |
6107 | + atomic_inc(&t->holders); | |
6108 | +} | |
6109 | + | |
6110 | +void dm_table_put(struct dm_table *t) | |
6111 | +{ | |
6112 | + if (atomic_dec_and_test(&t->holders)) | |
6113 | + table_destroy(t); | |
6114 | +} | |
6115 | + | |
6116 | +/* | |
6117 | + * Convert a device path to a dev_t. | |
6118 | + */ | |
6119 | +static int lookup_device(const char *path, kdev_t *dev) | |
6120 | +{ | |
6121 | + int r; | |
6122 | + struct nameidata nd; | |
6123 | + struct inode *inode; | |
6124 | + | |
6125 | + if (!path_init(path, LOOKUP_FOLLOW, &nd)) | |
6126 | + return 0; | |
6127 | + | |
6128 | + if ((r = path_walk(path, &nd))) | |
6129 | + goto out; | |
6130 | + | |
6131 | + inode = nd.dentry->d_inode; | |
6132 | + if (!inode) { | |
6133 | + r = -ENOENT; | |
6134 | + goto out; | |
6135 | + } | |
6136 | + | |
6137 | + if (!S_ISBLK(inode->i_mode)) { | |
6138 | + r = -ENOTBLK; | |
6139 | + goto out; | |
6140 | + } | |
6141 | + | |
6142 | + *dev = inode->i_rdev; | |
6143 | + | |
6144 | + out: | |
6145 | + path_release(&nd); | |
6146 | + return r; | |
6147 | +} | |
6148 | + | |
6149 | +/* | |
6150 | + * See if we've already got a device in the list. | |
6151 | + */ | |
6152 | +static struct dm_dev *find_device(struct list_head *l, kdev_t dev) | |
6153 | +{ | |
6154 | + struct list_head *tmp; | |
6155 | + | |
6156 | + list_for_each(tmp, l) { | |
6157 | + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list); | |
6158 | + if (kdev_same(dd->dev, dev)) | |
6159 | + return dd; | |
6160 | + } | |
6161 | + | |
6162 | + return NULL; | |
6163 | +} | |
6164 | + | |
6165 | +/* | |
6166 | + * Open a device so we can use it as a map destination. | |
6167 | + */ | |
6168 | +static int open_dev(struct dm_dev *dd) | |
6169 | +{ | |
6170 | + if (dd->bdev) | |
6171 | + BUG(); | |
6172 | + | |
6173 | + dd->bdev = bdget(kdev_t_to_nr(dd->dev)); | |
6174 | + if (!dd->bdev) | |
6175 | + return -ENOMEM; | |
6176 | + | |
6177 | + return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW); | |
6178 | +} | |
6179 | + | |
6180 | +/* | |
6181 | + * Close a device that we've been using. | |
6182 | + */ | |
6183 | +static void close_dev(struct dm_dev *dd) | |
6184 | +{ | |
6185 | + if (!dd->bdev) | |
6186 | + return; | |
6187 | + | |
6188 | + blkdev_put(dd->bdev, BDEV_RAW); | |
6189 | + dd->bdev = NULL; | |
6190 | +} | |
6191 | + | |
6192 | +/* | |
6193 | + * If possible (ie. blk_size[major] is set), this checks an area | |
6194 | + * of a destination device is valid. | |
6195 | + */ | |
6196 | +static int check_device_area(kdev_t dev, sector_t start, sector_t len) | |
6197 | +{ | |
6198 | + int *sizes; | |
6199 | + sector_t dev_size; | |
6200 | + | |
6201 | + if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)])) | |
6202 | + /* we don't know the device details, | |
6203 | + * so give the benefit of the doubt */ | |
6204 | + return 1; | |
6205 | + | |
6206 | + /* convert to 512-byte sectors */ | |
6207 | + dev_size <<= 1; | |
6208 | + | |
6209 | + return ((start < dev_size) && (len <= (dev_size - start))); | |
6210 | +} | |
6211 | + | |
6212 | +/* | |
6213 | + * This upgrades the mode on an already open dm_dev. Being | |
6214 | + * careful to leave things as they were if we fail to reopen the | |
6215 | + * device. | |
6216 | + */ | |
6217 | +static int upgrade_mode(struct dm_dev *dd, int new_mode) | |
6218 | +{ | |
6219 | + int r; | |
6220 | + struct dm_dev dd_copy; | |
6221 | + | |
6222 | + memcpy(&dd_copy, dd, sizeof(dd_copy)); | |
6223 | + | |
6224 | + dd->mode |= new_mode; | |
6225 | + dd->bdev = NULL; | |
6226 | + r = open_dev(dd); | |
6227 | + if (!r) | |
6228 | + close_dev(&dd_copy); | |
6229 | + else | |
6230 | + memcpy(dd, &dd_copy, sizeof(dd_copy)); | |
6231 | + | |
6232 | + return r; | |
6233 | +} | |
6234 | + | |
6235 | +/* | |
6236 | + * Add a device to the list, or just increment the usage count if | |
6237 | + * it's already present. | |
6238 | + */ | |
6239 | +int dm_get_device(struct dm_target *ti, const char *path, sector_t start, | |
6240 | + sector_t len, int mode, struct dm_dev **result) | |
6241 | +{ | |
6242 | + int r; | |
6243 | + kdev_t dev; | |
6244 | + struct dm_dev *dd; | |
6245 | + unsigned major, minor; | |
6246 | + struct dm_table *t = ti->table; | |
6247 | + | |
6248 | + if (!t) | |
6249 | + BUG(); | |
6250 | + | |
6251 | + if (sscanf(path, "%u:%u", &major, &minor) == 2) { | |
6252 | + /* Extract the major/minor numbers */ | |
6253 | + dev = mk_kdev(major, minor); | |
6254 | + } else { | |
6255 | + /* convert the path to a device */ | |
6256 | + if ((r = lookup_device(path, &dev))) | |
6257 | + return r; | |
6258 | + } | |
6259 | + | |
6260 | + dd = find_device(&t->devices, dev); | |
6261 | + if (!dd) { | |
6262 | + dd = kmalloc(sizeof(*dd), GFP_KERNEL); | |
6263 | + if (!dd) | |
6264 | + return -ENOMEM; | |
6265 | + | |
6266 | + dd->dev = dev; | |
6267 | + dd->mode = mode; | |
6268 | + dd->bdev = NULL; | |
6269 | + | |
6270 | + if ((r = open_dev(dd))) { | |
6271 | + kfree(dd); | |
6272 | + return r; | |
6273 | + } | |
6274 | + | |
6275 | + atomic_set(&dd->count, 0); | |
6276 | + list_add(&dd->list, &t->devices); | |
6277 | + | |
6278 | + } else if (dd->mode != (mode | dd->mode)) { | |
6279 | + r = upgrade_mode(dd, mode); | |
6280 | + if (r) | |
6281 | + return r; | |
6282 | + } | |
6283 | + atomic_inc(&dd->count); | |
6284 | + | |
6285 | + if (!check_device_area(dd->dev, start, len)) { | |
6286 | + DMWARN("device %s too small for target", path); | |
6287 | + dm_put_device(ti, dd); | |
6288 | + return -EINVAL; | |
6289 | + } | |
6290 | + | |
6291 | + *result = dd; | |
6292 | + | |
6293 | + return 0; | |
6294 | +} | |
6295 | + | |
6296 | +/* | |
6297 | + * Decrement a devices use count and remove it if neccessary. | |
6298 | + */ | |
6299 | +void dm_put_device(struct dm_target *ti, struct dm_dev *dd) | |
6300 | +{ | |
6301 | + if (atomic_dec_and_test(&dd->count)) { | |
6302 | + close_dev(dd); | |
6303 | + list_del(&dd->list); | |
6304 | + kfree(dd); | |
6305 | + } | |
6306 | +} | |
6307 | + | |
6308 | +/* | |
6309 | + * Checks to see if the target joins onto the end of the table. | |
6310 | + */ | |
6311 | +static int adjoin(struct dm_table *table, struct dm_target *ti) | |
6312 | +{ | |
6313 | + struct dm_target *prev; | |
6314 | + | |
6315 | + if (!table->num_targets) | |
6316 | + return !ti->begin; | |
6317 | + | |
6318 | + prev = &table->targets[table->num_targets - 1]; | |
6319 | + return (ti->begin == (prev->begin + prev->len)); | |
6320 | +} | |
6321 | + | |
6322 | +/* | |
6323 | + * Used to dynamically allocate the arg array. | |
6324 | + */ | |
6325 | +static char **realloc_argv(unsigned *array_size, char **old_argv) | |
6326 | +{ | |
6327 | + char **argv; | |
6328 | + unsigned new_size; | |
6329 | + | |
6330 | + new_size = *array_size ? *array_size * 2 : 64; | |
6331 | + argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL); | |
6332 | + if (argv) { | |
6333 | + memcpy(argv, old_argv, *array_size * sizeof(*argv)); | |
6334 | + *array_size = new_size; | |
6335 | + } | |
6336 | + | |
6337 | + kfree(old_argv); | |
6338 | + return argv; | |
6339 | +} | |
6340 | + | |
6341 | +/* | |
6342 | + * Destructively splits up the argument list to pass to ctr. | |
6343 | + */ | |
6344 | +static int split_args(int *argc, char ***argvp, char *input) | |
6345 | +{ | |
6346 | + char *start, *end = input, *out, **argv = NULL; | |
6347 | + unsigned array_size = 0; | |
6348 | + | |
6349 | + *argc = 0; | |
6350 | + argv = realloc_argv(&array_size, argv); | |
6351 | + if (!argv) | |
6352 | + return -ENOMEM; | |
6353 | + | |
6354 | + while (1) { | |
6355 | + start = end; | |
6356 | + | |
6357 | + /* Skip whitespace */ | |
6358 | + while (*start && isspace(*start)) | |
6359 | + start++; | |
6360 | + | |
6361 | + if (!*start) | |
6362 | + break; /* success, we hit the end */ | |
6363 | + | |
6364 | + /* 'out' is used to remove any back-quotes */ | |
6365 | + end = out = start; | |
6366 | + while (*end) { | |
6367 | + /* Everything apart from '\0' can be quoted */ | |
6368 | + if (*end == '\\' && *(end + 1)) { | |
6369 | + *out++ = *(end + 1); | |
6370 | + end += 2; | |
6371 | + continue; | |
6372 | + } | |
6373 | + | |
6374 | + if (isspace(*end)) | |
6375 | + break; /* end of token */ | |
6376 | + | |
6377 | + *out++ = *end++; | |
6378 | + } | |
6379 | + | |
6380 | + /* have we already filled the array ? */ | |
6381 | + if ((*argc + 1) > array_size) { | |
6382 | + argv = realloc_argv(&array_size, argv); | |
6383 | + if (!argv) | |
6384 | + return -ENOMEM; | |
6385 | + } | |
6386 | + | |
6387 | + /* we know this is whitespace */ | |
6388 | + if (*end) | |
6389 | + end++; | |
6390 | + | |
6391 | + /* terminate the string and put it in the array */ | |
6392 | + *out = '\0'; | |
6393 | + argv[*argc] = start; | |
6394 | + (*argc)++; | |
6395 | + } | |
6396 | + | |
6397 | + *argvp = argv; | |
6398 | + return 0; | |
6399 | +} | |
6400 | + | |
6401 | +int dm_table_add_target(struct dm_table *t, const char *type, | |
6402 | + sector_t start, sector_t len, char *params) | |
6403 | +{ | |
6404 | + int r = -EINVAL, argc; | |
6405 | + char **argv; | |
6406 | + struct dm_target *tgt; | |
6407 | + | |
6408 | + if (t->num_targets >= t->num_allocated) | |
6409 | + return -ENOMEM; | |
6410 | + | |
6411 | + tgt = t->targets + t->num_targets; | |
6412 | + memset(tgt, 0, sizeof(*tgt)); | |
6413 | + | |
6414 | + tgt->type = dm_get_target_type(type); | |
6415 | + if (!tgt->type) { | |
6416 | + tgt->error = "unknown target type"; | |
6417 | + return -EINVAL; | |
6418 | + } | |
6419 | + | |
6420 | + tgt->table = t; | |
6421 | + tgt->begin = start; | |
6422 | + tgt->len = len; | |
6423 | + tgt->error = "Unknown error"; | |
6424 | + | |
6425 | + /* | |
6426 | + * Does this target adjoin the previous one ? | |
6427 | + */ | |
6428 | + if (!adjoin(t, tgt)) { | |
6429 | + tgt->error = "Gap in table"; | |
6430 | + r = -EINVAL; | |
6431 | + goto bad; | |
6432 | + } | |
6433 | + | |
6434 | + r = split_args(&argc, &argv, params); | |
6435 | + if (r) { | |
6436 | + tgt->error = "couldn't split parameters (insufficient memory)"; | |
6437 | + goto bad; | |
6438 | + } | |
6439 | + | |
6440 | + r = tgt->type->ctr(tgt, argc, argv); | |
6441 | + kfree(argv); | |
6442 | + if (r) | |
6443 | + goto bad; | |
6444 | + | |
6445 | + t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; | |
6446 | + return 0; | |
6447 | + | |
6448 | + bad: | |
6449 | + printk(KERN_ERR DM_NAME ": %s\n", tgt->error); | |
6450 | + dm_put_target_type(tgt->type); | |
6451 | + return r; | |
6452 | +} | |
6453 | + | |
6454 | +static int setup_indexes(struct dm_table *t) | |
6455 | +{ | |
6456 | + int i; | |
6457 | + unsigned int total = 0; | |
6458 | + sector_t *indexes; | |
6459 | + | |
6460 | + /* allocate the space for *all* the indexes */ | |
6461 | + for (i = t->depth - 2; i >= 0; i--) { | |
6462 | + t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); | |
6463 | + total += t->counts[i]; | |
6464 | + } | |
6465 | + | |
6466 | + indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); | |
6467 | + if (!indexes) | |
6468 | + return -ENOMEM; | |
6469 | + | |
6470 | + /* set up internal nodes, bottom-up */ | |
6471 | + for (i = t->depth - 2, total = 0; i >= 0; i--) { | |
6472 | + t->index[i] = indexes; | |
6473 | + indexes += (KEYS_PER_NODE * t->counts[i]); | |
6474 | + setup_btree_index(i, t); | |
6475 | + } | |
6476 | + | |
6477 | + return 0; | |
6478 | +} | |
6479 | + | |
6480 | +/* | |
6481 | + * Builds the btree to index the map. | |
6482 | + */ | |
6483 | +int dm_table_complete(struct dm_table *t) | |
6484 | +{ | |
6485 | + int r = 0; | |
6486 | + unsigned int leaf_nodes; | |
6487 | + | |
6488 | + /* how many indexes will the btree have ? */ | |
6489 | + leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); | |
6490 | + t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); | |
6491 | + | |
6492 | + /* leaf layer has already been set up */ | |
6493 | + t->counts[t->depth - 1] = leaf_nodes; | |
6494 | + t->index[t->depth - 1] = t->highs; | |
6495 | + | |
6496 | + if (t->depth >= 2) | |
6497 | + r = setup_indexes(t); | |
6498 | + | |
6499 | + return r; | |
6500 | +} | |
6501 | + | |
6502 | +static spinlock_t _event_lock = SPIN_LOCK_UNLOCKED; | |
6503 | +void dm_table_event_callback(struct dm_table *t, | |
6504 | + void (*fn)(void *), void *context) | |
6505 | +{ | |
6506 | + spin_lock_irq(&_event_lock); | |
6507 | + t->event_fn = fn; | |
6508 | + t->event_context = context; | |
6509 | + spin_unlock_irq(&_event_lock); | |
6510 | +} | |
6511 | + | |
6512 | +void dm_table_event(struct dm_table *t) | |
6513 | +{ | |
6514 | + spin_lock(&_event_lock); | |
6515 | + if (t->event_fn) | |
6516 | + t->event_fn(t->event_context); | |
6517 | + spin_unlock(&_event_lock); | |
6518 | +} | |
6519 | + | |
6520 | +sector_t dm_table_get_size(struct dm_table *t) | |
6521 | +{ | |
6522 | + return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; | |
6523 | +} | |
6524 | + | |
6525 | +struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) | |
6526 | +{ | |
6527 | + if (index > t->num_targets) | |
6528 | + return NULL; | |
6529 | + | |
6530 | + return t->targets + index; | |
6531 | +} | |
6532 | + | |
6533 | +/* | |
6534 | + * Search the btree for the correct target. | |
6535 | + */ | |
6536 | +struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) | |
6537 | +{ | |
6538 | + unsigned int l, n = 0, k = 0; | |
6539 | + sector_t *node; | |
6540 | + | |
6541 | + for (l = 0; l < t->depth; l++) { | |
6542 | + n = get_child(n, k); | |
6543 | + node = get_node(t, l, n); | |
6544 | + | |
6545 | + for (k = 0; k < KEYS_PER_NODE; k++) | |
6546 | + if (node[k] >= sector) | |
6547 | + break; | |
6548 | + } | |
6549 | + | |
6550 | + return &t->targets[(KEYS_PER_NODE * n) + k]; | |
6551 | +} | |
6552 | + | |
6553 | +unsigned int dm_table_get_num_targets(struct dm_table *t) | |
6554 | +{ | |
6555 | + return t->num_targets; | |
6556 | +} | |
6557 | + | |
6558 | +struct list_head *dm_table_get_devices(struct dm_table *t) | |
6559 | +{ | |
6560 | + return &t->devices; | |
6561 | +} | |
6562 | + | |
6563 | +int dm_table_get_mode(struct dm_table *t) | |
6564 | +{ | |
6565 | + return t->mode; | |
6566 | +} | |
6567 | + | |
6568 | +void dm_table_suspend_targets(struct dm_table *t) | |
6569 | +{ | |
6570 | + int i; | |
6571 | + | |
6572 | + for (i = 0; i < t->num_targets; i++) { | |
6573 | + struct dm_target *ti = t->targets + i; | |
6574 | + | |
6575 | + if (ti->type->suspend) | |
6576 | + ti->type->suspend(ti); | |
6577 | + } | |
6578 | +} | |
6579 | + | |
6580 | +void dm_table_resume_targets(struct dm_table *t) | |
6581 | +{ | |
6582 | + int i; | |
6583 | + | |
6584 | + for (i = 0; i < t->num_targets; i++) { | |
6585 | + struct dm_target *ti = t->targets + i; | |
6586 | + | |
6587 | + if (ti->type->resume) | |
6588 | + ti->type->resume(ti); | |
6589 | + } | |
6590 | +} | |
6591 | + | |
6592 | +EXPORT_SYMBOL(dm_get_device); | |
6593 | +EXPORT_SYMBOL(dm_put_device); | |
6594 | +EXPORT_SYMBOL(dm_table_event); | |
6595 | +EXPORT_SYMBOL(dm_table_get_mode); | |
6596 | diff -urN linux-2.4.24.org/drivers/md/dm-target.c linux-2.4.24/drivers/md/dm-target.c | |
6597 | --- linux-2.4.24.org/drivers/md/dm-target.c 1970-01-01 01:00:00.000000000 +0100 | |
6598 | +++ linux-2.4.24/drivers/md/dm-target.c 2004-01-18 15:01:13.789709690 +0100 | |
6599 | @@ -0,0 +1,188 @@ | |
6600 | +/* | |
6601 | + * Copyright (C) 2001 Sistina Software (UK) Limited | |
6602 | + * | |
6603 | + * This file is released under the GPL. | |
6604 | + */ | |
6605 | + | |
6606 | +#include "dm.h" | |
6607 | + | |
6608 | +#include <linux/module.h> | |
6609 | +#include <linux/kmod.h> | |
6610 | +#include <linux/slab.h> | |
6611 | + | |
6612 | +struct tt_internal { | |
6613 | + struct target_type tt; | |
6614 | + | |
6615 | + struct list_head list; | |
6616 | + long use; | |
6617 | +}; | |
6618 | + | |
6619 | +static LIST_HEAD(_targets); | |
6620 | +static DECLARE_RWSEM(_lock); | |
6621 | + | |
6622 | +#define DM_MOD_NAME_SIZE 32 | |
6623 | + | |
6624 | +static inline struct tt_internal *__find_target_type(const char *name) | |
6625 | +{ | |
6626 | + struct list_head *tih; | |
6627 | + struct tt_internal *ti; | |
6628 | + | |
6629 | + list_for_each(tih, &_targets) { | |
6630 | + ti = list_entry(tih, struct tt_internal, list); | |
6631 | + | |
6632 | + if (!strcmp(name, ti->tt.name)) | |
6633 | + return ti; | |
6634 | + } | |
6635 | + | |
6636 | + return NULL; | |
6637 | +} | |
6638 | + | |
6639 | +static struct tt_internal *get_target_type(const char *name) | |
6640 | +{ | |
6641 | + struct tt_internal *ti; | |
6642 | + | |
6643 | + down_read(&_lock); | |
6644 | + ti = __find_target_type(name); | |
6645 | + | |
6646 | + if (ti) { | |
6647 | + if (ti->use == 0 && ti->tt.module) | |
6648 | + __MOD_INC_USE_COUNT(ti->tt.module); | |
6649 | + ti->use++; | |
6650 | + } | |
6651 | + up_read(&_lock); | |
6652 | + | |
6653 | + return ti; | |
6654 | +} | |
6655 | + | |
6656 | +static void load_module(const char *name) | |
6657 | +{ | |
6658 | + char module_name[DM_MOD_NAME_SIZE] = "dm-"; | |
6659 | + | |
6660 | + /* Length check for strcat() below */ | |
6661 | + if (strlen(name) > (DM_MOD_NAME_SIZE - 4)) | |
6662 | + return; | |
6663 | + | |
6664 | + strcat(module_name, name); | |
6665 | + request_module(module_name); | |
6666 | +} | |
6667 | + | |
6668 | +struct target_type *dm_get_target_type(const char *name) | |
6669 | +{ | |
6670 | + struct tt_internal *ti = get_target_type(name); | |
6671 | + | |
6672 | + if (!ti) { | |
6673 | + load_module(name); | |
6674 | + ti = get_target_type(name); | |
6675 | + } | |
6676 | + | |
6677 | + return ti ? &ti->tt : NULL; | |
6678 | +} | |
6679 | + | |
6680 | +void dm_put_target_type(struct target_type *t) | |
6681 | +{ | |
6682 | + struct tt_internal *ti = (struct tt_internal *) t; | |
6683 | + | |
6684 | + down_read(&_lock); | |
6685 | + if (--ti->use == 0 && ti->tt.module) | |
6686 | + __MOD_DEC_USE_COUNT(ti->tt.module); | |
6687 | + | |
6688 | + if (ti->use < 0) | |
6689 | + BUG(); | |
6690 | + up_read(&_lock); | |
6691 | + | |
6692 | + return; | |
6693 | +} | |
6694 | + | |
6695 | +static struct tt_internal *alloc_target(struct target_type *t) | |
6696 | +{ | |
6697 | + struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL); | |
6698 | + | |
6699 | + if (ti) { | |
6700 | + memset(ti, 0, sizeof(*ti)); | |
6701 | + ti->tt = *t; | |
6702 | + } | |
6703 | + | |
6704 | + return ti; | |
6705 | +} | |
6706 | + | |
6707 | +int dm_register_target(struct target_type *t) | |
6708 | +{ | |
6709 | + int rv = 0; | |
6710 | + struct tt_internal *ti = alloc_target(t); | |
6711 | + | |
6712 | + if (!ti) | |
6713 | + return -ENOMEM; | |
6714 | + | |
6715 | + down_write(&_lock); | |
6716 | + if (__find_target_type(t->name)) { | |
6717 | + kfree(ti); | |
6718 | + rv = -EEXIST; | |
6719 | + } else | |
6720 | + list_add(&ti->list, &_targets); | |
6721 | + | |
6722 | + up_write(&_lock); | |
6723 | + return rv; | |
6724 | +} | |
6725 | + | |
6726 | +int dm_unregister_target(struct target_type *t) | |
6727 | +{ | |
6728 | + struct tt_internal *ti; | |
6729 | + | |
6730 | + down_write(&_lock); | |
6731 | + if (!(ti = __find_target_type(t->name))) { | |
6732 | + up_write(&_lock); | |
6733 | + return -EINVAL; | |
6734 | + } | |
6735 | + | |
6736 | + if (ti->use) { | |
6737 | + up_write(&_lock); | |
6738 | + return -ETXTBSY; | |
6739 | + } | |
6740 | + | |
6741 | + list_del(&ti->list); | |
6742 | + kfree(ti); | |
6743 | + | |
6744 | + up_write(&_lock); | |
6745 | + return 0; | |
6746 | +} | |
6747 | + | |
6748 | +/* | |
6749 | + * io-err: always fails an io, useful for bringing | |
6750 | + * up LVs that have holes in them. | |
6751 | + */ | |
6752 | +static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args) | |
6753 | +{ | |
6754 | + return 0; | |
6755 | +} | |
6756 | + | |
6757 | +static void io_err_dtr(struct dm_target *ti) | |
6758 | +{ | |
6759 | + /* empty */ | |
6760 | +} | |
6761 | + | |
6762 | +static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw, | |
6763 | + union map_info *map_context) | |
6764 | +{ | |
6765 | + return -EIO; | |
6766 | +} | |
6767 | + | |
6768 | +static struct target_type error_target = { | |
6769 | + .name = "error", | |
6770 | + .ctr = io_err_ctr, | |
6771 | + .dtr = io_err_dtr, | |
6772 | + .map = io_err_map, | |
6773 | +}; | |
6774 | + | |
6775 | +int dm_target_init(void) | |
6776 | +{ | |
6777 | + return dm_register_target(&error_target); | |
6778 | +} | |
6779 | + | |
6780 | +void dm_target_exit(void) | |
6781 | +{ | |
6782 | + if (dm_unregister_target(&error_target)) | |
6783 | + DMWARN("error target unregistration failed"); | |
6784 | +} | |
6785 | + | |
6786 | +EXPORT_SYMBOL(dm_register_target); | |
6787 | +EXPORT_SYMBOL(dm_unregister_target); | |
6788 | diff -urN linux-2.4.24.org/drivers/md/kcopyd.c linux-2.4.24/drivers/md/kcopyd.c | |
6789 | --- linux-2.4.24.org/drivers/md/kcopyd.c 1970-01-01 01:00:00.000000000 +0100 | |
6790 | +++ linux-2.4.24/drivers/md/kcopyd.c 2004-01-18 15:01:25.797189646 +0100 | |
6791 | @@ -0,0 +1,666 @@ | |
6792 | +/* | |
6793 | + * Copyright (C) 2002 Sistina Software (UK) Limited. | |
6794 | + * | |
6795 | + * This file is released under the GPL. | |
6796 | + */ | |
6797 | + | |
6798 | +#include <asm/atomic.h> | |
6799 | + | |
6800 | +#include <linux/blkdev.h> | |
6801 | +#include <linux/config.h> | |
6802 | +#include <linux/device-mapper.h> | |
6803 | +#include <linux/fs.h> | |
6804 | +#include <linux/init.h> | |
6805 | +#include <linux/list.h> | |
6806 | +#include <linux/locks.h> | |
6807 | +#include <linux/mempool.h> | |
6808 | +#include <linux/module.h> | |
6809 | +#include <linux/pagemap.h> | |
6810 | +#include <linux/slab.h> | |
6811 | +#include <linux/vmalloc.h> | |
6812 | + | |
6813 | +#include "kcopyd.h" | |
6814 | +#include "dm-daemon.h" | |
6815 | + | |
6816 | +/* FIXME: this is only needed for the DMERR macros */ | |
6817 | +#include "dm.h" | |
6818 | + | |
6819 | +static struct dm_daemon _kcopyd; | |
6820 | + | |
6821 | +#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE) | |
6822 | +#define SUB_JOB_SIZE 128 | |
6823 | +#define PAGES_PER_SUB_JOB (SUB_JOB_SIZE / SECTORS_PER_PAGE) | |
6824 | +#define SUB_JOB_COUNT 8 | |
6825 | + | |
6826 | +/*----------------------------------------------------------------- | |
6827 | + * Each kcopyd client has its own little pool of preallocated | |
6828 | + * pages for kcopyd io. | |
6829 | + *---------------------------------------------------------------*/ | |
6830 | +struct kcopyd_client { | |
6831 | + struct list_head list; | |
6832 | + | |
6833 | + spinlock_t lock; | |
6834 | + struct list_head pages; | |
6835 | + unsigned int nr_pages; | |
6836 | + unsigned int nr_free_pages; | |
6837 | + unsigned int max_split; | |
6838 | +}; | |
6839 | + | |
6840 | +static inline void __push_page(struct kcopyd_client *kc, struct page *p) | |
6841 | +{ | |
6842 | + list_add(&p->list, &kc->pages); | |
6843 | + kc->nr_free_pages++; | |
6844 | +} | |
6845 | + | |
6846 | +static inline struct page *__pop_page(struct kcopyd_client *kc) | |
6847 | +{ | |
6848 | + struct page *p; | |
6849 | + | |
6850 | + p = list_entry(kc->pages.next, struct page, list); | |
6851 | + list_del(&p->list); | |
6852 | + kc->nr_free_pages--; | |
6853 | + | |
6854 | + return p; | |
6855 | +} | |
6856 | + | |
6857 | +static int kcopyd_get_pages(struct kcopyd_client *kc, | |
6858 | + unsigned int nr, struct list_head *pages) | |
6859 | +{ | |
6860 | + struct page *p; | |
6861 | + INIT_LIST_HEAD(pages); | |
6862 | + | |
6863 | + spin_lock(&kc->lock); | |
6864 | + if (kc->nr_free_pages < nr) { | |
6865 | + spin_unlock(&kc->lock); | |
6866 | + return -ENOMEM; | |
6867 | + } | |
6868 | + | |
6869 | + while (nr--) { | |
6870 | + p = __pop_page(kc); | |
6871 | + list_add(&p->list, pages); | |
6872 | + } | |
6873 | + spin_unlock(&kc->lock); | |
6874 | + | |
6875 | + return 0; | |
6876 | +} | |
6877 | + | |
6878 | +static void kcopyd_put_pages(struct kcopyd_client *kc, struct list_head *pages) | |
6879 | +{ | |
6880 | + struct list_head *tmp, *tmp2; | |
6881 | + | |
6882 | + spin_lock(&kc->lock); | |
6883 | + list_for_each_safe (tmp, tmp2, pages) | |
6884 | + __push_page(kc, list_entry(tmp, struct page, list)); | |
6885 | + spin_unlock(&kc->lock); | |
6886 | +} | |
6887 | + | |
6888 | +/* | |
6889 | + * These three functions resize the page pool. | |
6890 | + */ | |
6891 | +static void release_pages(struct list_head *pages) | |
6892 | +{ | |
6893 | + struct page *p; | |
6894 | + struct list_head *tmp, *tmp2; | |
6895 | + | |
6896 | + list_for_each_safe (tmp, tmp2, pages) { | |
6897 | + p = list_entry(tmp, struct page, list); | |
6898 | + UnlockPage(p); | |
6899 | + __free_page(p); | |
6900 | + } | |
6901 | +} | |
6902 | + | |
6903 | +static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) | |
6904 | +{ | |
6905 | + unsigned int i; | |
6906 | + struct page *p; | |
6907 | + LIST_HEAD(new); | |
6908 | + | |
6909 | + for (i = 0; i < nr; i++) { | |
6910 | + p = alloc_page(GFP_KERNEL); | |
6911 | + if (!p) { | |
6912 | + release_pages(&new); | |
6913 | + return -ENOMEM; | |
6914 | + } | |
6915 | + | |
6916 | + LockPage(p); | |
6917 | + list_add(&p->list, &new); | |
6918 | + } | |
6919 | + | |
6920 | + kcopyd_put_pages(kc, &new); | |
6921 | + kc->nr_pages += nr; | |
6922 | + kc->max_split = kc->nr_pages / PAGES_PER_SUB_JOB; | |
6923 | + if (kc->max_split > SUB_JOB_COUNT) | |
6924 | + kc->max_split = SUB_JOB_COUNT; | |
6925 | + | |
6926 | + return 0; | |
6927 | +} | |
6928 | + | |
6929 | +static void client_free_pages(struct kcopyd_client *kc) | |
6930 | +{ | |
6931 | + BUG_ON(kc->nr_free_pages != kc->nr_pages); | |
6932 | + release_pages(&kc->pages); | |
6933 | + kc->nr_free_pages = kc->nr_pages = 0; | |
6934 | +} | |
6935 | + | |
6936 | +/*----------------------------------------------------------------- | |
6937 | + * kcopyd_jobs need to be allocated by the *clients* of kcopyd, | |
6938 | + * for this reason we use a mempool to prevent the client from | |
6939 | + * ever having to do io (which could cause a deadlock). | |
6940 | + *---------------------------------------------------------------*/ | |
6941 | +struct kcopyd_job { | |
6942 | + struct kcopyd_client *kc; | |
6943 | + struct list_head list; | |
6944 | + unsigned int flags; | |
6945 | + | |
6946 | + /* | |
6947 | + * Error state of the job. | |
6948 | + */ | |
6949 | + int read_err; | |
6950 | + unsigned int write_err; | |
6951 | + | |
6952 | + /* | |
6953 | + * Either READ or WRITE | |
6954 | + */ | |
6955 | + int rw; | |
6956 | + struct io_region source; | |
6957 | + | |
6958 | + /* | |
6959 | + * The destinations for the transfer. | |
6960 | + */ | |
6961 | + unsigned int num_dests; | |
6962 | + struct io_region dests[KCOPYD_MAX_REGIONS]; | |
6963 | + | |
6964 | + sector_t offset; | |
6965 | + unsigned int nr_pages; | |
6966 | + struct list_head pages; | |
6967 | + | |
6968 | + /* | |
6969 | + * Set this to ensure you are notified when the job has | |
6970 | + * completed. 'context' is for callback to use. | |
6971 | + */ | |
6972 | + kcopyd_notify_fn fn; | |
6973 | + void *context; | |
6974 | + | |
6975 | + /* | |
6976 | + * These fields are only used if the job has been split | |
6977 | + * into more manageable parts. | |
6978 | + */ | |
6979 | + struct semaphore lock; | |
6980 | + atomic_t sub_jobs; | |
6981 | + sector_t progress; | |
6982 | +}; | |
6983 | + | |
6984 | +/* FIXME: this should scale with the number of pages */ | |
6985 | +#define MIN_JOBS 512 | |
6986 | + | |
6987 | +static kmem_cache_t *_job_cache; | |
6988 | +static mempool_t *_job_pool; | |
6989 | + | |
6990 | +/* | |
6991 | + * We maintain three lists of jobs: | |
6992 | + * | |
6993 | + * i) jobs waiting for pages | |
6994 | + * ii) jobs that have pages, and are waiting for the io to be issued. | |
6995 | + * iii) jobs that have completed. | |
6996 | + * | |
6997 | + * All three of these are protected by job_lock. | |
6998 | + */ | |
6999 | +static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED; | |
7000 | + | |
7001 | +static LIST_HEAD(_complete_jobs); | |
7002 | +static LIST_HEAD(_io_jobs); | |
7003 | +static LIST_HEAD(_pages_jobs); | |
7004 | + | |
7005 | +static int jobs_init(void) | |
7006 | +{ | |
7007 | + INIT_LIST_HEAD(&_complete_jobs); | |
7008 | + INIT_LIST_HEAD(&_io_jobs); | |
7009 | + INIT_LIST_HEAD(&_pages_jobs); | |
7010 | + | |
7011 | + _job_cache = kmem_cache_create("kcopyd-jobs", | |
7012 | + sizeof(struct kcopyd_job), | |
7013 | + __alignof__(struct kcopyd_job), | |
7014 | + 0, NULL, NULL); | |
7015 | + if (!_job_cache) | |
7016 | + return -ENOMEM; | |
7017 | + | |
7018 | + _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab, | |
7019 | + mempool_free_slab, _job_cache); | |
7020 | + if (!_job_pool) { | |
7021 | + kmem_cache_destroy(_job_cache); | |
7022 | + return -ENOMEM; | |
7023 | + } | |
7024 | + | |
7025 | + return 0; | |
7026 | +} | |
7027 | + | |
7028 | +static void jobs_exit(void) | |
7029 | +{ | |
7030 | + BUG_ON(!list_empty(&_complete_jobs)); | |
7031 | + BUG_ON(!list_empty(&_io_jobs)); | |
7032 | + BUG_ON(!list_empty(&_pages_jobs)); | |
7033 | + | |
7034 | + mempool_destroy(_job_pool); | |
7035 | + kmem_cache_destroy(_job_cache); | |
7036 | +} | |
7037 | + | |
7038 | +/* | |
7039 | + * Functions to push and pop a job onto the head of a given job | |
7040 | + * list. | |
7041 | + */ | |
7042 | +static inline struct kcopyd_job *pop(struct list_head *jobs) | |
7043 | +{ | |
7044 | + struct kcopyd_job *job = NULL; | |
7045 | + unsigned long flags; | |
7046 | + | |
7047 | + spin_lock_irqsave(&_job_lock, flags); | |
7048 | + | |
7049 | + if (!list_empty(jobs)) { | |
7050 | + job = list_entry(jobs->next, struct kcopyd_job, list); | |
7051 | + list_del(&job->list); | |
7052 | + } | |
7053 | + spin_unlock_irqrestore(&_job_lock, flags); | |
7054 | + | |
7055 | + return job; | |
7056 | +} | |
7057 | + | |
7058 | +static inline void push(struct list_head *jobs, struct kcopyd_job *job) | |
7059 | +{ | |
7060 | + unsigned long flags; | |
7061 | + | |
7062 | + spin_lock_irqsave(&_job_lock, flags); | |
7063 | + list_add_tail(&job->list, jobs); | |
7064 | + spin_unlock_irqrestore(&_job_lock, flags); | |
7065 | +} | |
7066 | + | |
7067 | +/* | |
7068 | + * These three functions process 1 item from the corresponding | |
7069 | + * job list. | |
7070 | + * | |
7071 | + * They return: | |
7072 | + * < 0: error | |
7073 | + * 0: success | |
7074 | + * > 0: can't process yet. | |
7075 | + */ | |
7076 | +static int run_complete_job(struct kcopyd_job *job) | |
7077 | +{ | |
7078 | + void *context = job->context; | |
7079 | + int read_err = job->read_err; | |
7080 | + unsigned int write_err = job->write_err; | |
7081 | + kcopyd_notify_fn fn = job->fn; | |
7082 | + | |
7083 | + kcopyd_put_pages(job->kc, &job->pages); | |
7084 | + mempool_free(job, _job_pool); | |
7085 | + fn(read_err, write_err, context); | |
7086 | + return 0; | |
7087 | +} | |
7088 | + | |
7089 | +static void complete_io(unsigned int error, void *context) | |
7090 | +{ | |
7091 | + struct kcopyd_job *job = (struct kcopyd_job *) context; | |
7092 | + | |
7093 | + if (error) { | |
7094 | + if (job->rw == WRITE) | |
7095 | + job->write_err &= error; | |
7096 | + else | |
7097 | + job->read_err = 1; | |
7098 | + | |
7099 | + if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | |
7100 | + push(&_complete_jobs, job); | |
7101 | + dm_daemon_wake(&_kcopyd); | |
7102 | + return; | |
7103 | + } | |
7104 | + } | |
7105 | + | |
7106 | + if (job->rw == WRITE) | |
7107 | + push(&_complete_jobs, job); | |
7108 | + | |
7109 | + else { | |
7110 | + job->rw = WRITE; | |
7111 | + push(&_io_jobs, job); | |
7112 | + } | |
7113 | + | |
7114 | + dm_daemon_wake(&_kcopyd); | |
7115 | +} | |
7116 | + | |
7117 | +/* | |
7118 | + * Request io on as many buffer heads as we can currently get for | |
7119 | + * a particular job. | |
7120 | + */ | |
7121 | +static int run_io_job(struct kcopyd_job *job) | |
7122 | +{ | |
7123 | + int r; | |
7124 | + | |
7125 | + if (job->rw == READ) | |
7126 | + r = dm_io_async(1, &job->source, job->rw, | |
7127 | + list_entry(job->pages.next, struct page, list), | |
7128 | + job->offset, complete_io, job); | |
7129 | + | |
7130 | + else | |
7131 | + r = dm_io_async(job->num_dests, job->dests, job->rw, | |
7132 | + list_entry(job->pages.next, struct page, list), | |
7133 | + job->offset, complete_io, job); | |
7134 | + | |
7135 | + return r; | |
7136 | +} | |
7137 | + | |
7138 | +static int run_pages_job(struct kcopyd_job *job) | |
7139 | +{ | |
7140 | + int r; | |
7141 | + | |
7142 | + job->nr_pages = dm_div_up(job->dests[0].count + job->offset, | |
7143 | + SECTORS_PER_PAGE); | |
7144 | + r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); | |
7145 | + if (!r) { | |
7146 | + /* this job is ready for io */ | |
7147 | + push(&_io_jobs, job); | |
7148 | + return 0; | |
7149 | + } | |
7150 | + | |
7151 | + if (r == -ENOMEM) | |
7152 | + /* can't complete now */ | |
7153 | + return 1; | |
7154 | + | |
7155 | + return r; | |
7156 | +} | |
7157 | + | |
7158 | +/* | |
7159 | + * Run through a list for as long as possible. Returns the count | |
7160 | + * of successful jobs. | |
7161 | + */ | |
7162 | +static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) | |
7163 | +{ | |
7164 | + struct kcopyd_job *job; | |
7165 | + int r, count = 0; | |
7166 | + | |
7167 | + while ((job = pop(jobs))) { | |
7168 | + | |
7169 | + r = fn(job); | |
7170 | + | |
7171 | + if (r < 0) { | |
7172 | + /* error this rogue job */ | |
7173 | + if (job->rw == WRITE) | |
7174 | + job->write_err = (unsigned int) -1; | |
7175 | + else | |
7176 | + job->read_err = 1; | |
7177 | + push(&_complete_jobs, job); | |
7178 | + break; | |
7179 | + } | |
7180 | + | |
7181 | + if (r > 0) { | |
7182 | + /* | |
7183 | + * We couldn't service this job ATM, so | |
7184 | + * push this job back onto the list. | |
7185 | + */ | |
7186 | + push(jobs, job); | |
7187 | + break; | |
7188 | + } | |
7189 | + | |
7190 | + count++; | |
7191 | + } | |
7192 | + | |
7193 | + return count; | |
7194 | +} | |
7195 | + | |
7196 | +/* | |
7197 | + * kcopyd does this every time it's woken up. | |
7198 | + */ | |
7199 | +static void do_work(void) | |
7200 | +{ | |
7201 | + /* | |
7202 | + * The order that these are called is *very* important. | |
7203 | + * complete jobs can free some pages for pages jobs. | |
7204 | + * Pages jobs when successful will jump onto the io jobs | |
7205 | + * list. io jobs call wake when they complete and it all | |
7206 | + * starts again. | |
7207 | + */ | |
7208 | + process_jobs(&_complete_jobs, run_complete_job); | |
7209 | + process_jobs(&_pages_jobs, run_pages_job); | |
7210 | + process_jobs(&_io_jobs, run_io_job); | |
7211 | + run_task_queue(&tq_disk); | |
7212 | +} | |
7213 | + | |
7214 | +/* | |
7215 | + * If we are copying a small region we just dispatch a single job | |
7216 | + * to do the copy, otherwise the io has to be split up into many | |
7217 | + * jobs. | |
7218 | + */ | |
7219 | +static void dispatch_job(struct kcopyd_job *job) | |
7220 | +{ | |
7221 | + push(&_pages_jobs, job); | |
7222 | + dm_daemon_wake(&_kcopyd); | |
7223 | +} | |
7224 | + | |
7225 | +static void segment_complete(int read_err, | |
7226 | + unsigned int write_err, void *context) | |
7227 | +{ | |
7228 | + /* FIXME: tidy this function */ | |
7229 | + sector_t progress = 0; | |
7230 | + sector_t count = 0; | |
7231 | + struct kcopyd_job *job = (struct kcopyd_job *) context; | |
7232 | + | |
7233 | + down(&job->lock); | |
7234 | + | |
7235 | + /* update the error */ | |
7236 | + if (read_err) | |
7237 | + job->read_err = 1; | |
7238 | + | |
7239 | + if (write_err) | |
7240 | + job->write_err &= write_err; | |
7241 | + | |
7242 | + /* | |
7243 | + * Only dispatch more work if there hasn't been an error. | |
7244 | + */ | |
7245 | + if ((!job->read_err && !job->write_err) || | |
7246 | + test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { | |
7247 | + /* get the next chunk of work */ | |
7248 | + progress = job->progress; | |
7249 | + count = job->source.count - progress; | |
7250 | + if (count) { | |
7251 | + if (count > SUB_JOB_SIZE) | |
7252 | + count = SUB_JOB_SIZE; | |
7253 | + | |
7254 | + job->progress += count; | |
7255 | + } | |
7256 | + } | |
7257 | + up(&job->lock); | |
7258 | + | |
7259 | + if (count) { | |
7260 | + int i; | |
7261 | + struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO); | |
7262 | + | |
7263 | + memcpy(sub_job, job, sizeof(*job)); | |
7264 | + sub_job->source.sector += progress; | |
7265 | + sub_job->source.count = count; | |
7266 | + | |
7267 | + for (i = 0; i < job->num_dests; i++) { | |
7268 | + sub_job->dests[i].sector += progress; | |
7269 | + sub_job->dests[i].count = count; | |
7270 | + } | |
7271 | + | |
7272 | + sub_job->fn = segment_complete; | |
7273 | + sub_job->context = job; | |
7274 | + dispatch_job(sub_job); | |
7275 | + | |
7276 | + } else if (atomic_dec_and_test(&job->sub_jobs)) { | |
7277 | + | |
7278 | + /* | |
7279 | + * To avoid a race we must keep the job around | |
7280 | + * until after the notify function has completed. | |
7281 | + * Otherwise the client may try and stop the job | |
7282 | + * after we've completed. | |
7283 | + */ | |
7284 | + job->fn(read_err, write_err, job->context); | |
7285 | + mempool_free(job, _job_pool); | |
7286 | + } | |
7287 | +} | |
7288 | + | |
7289 | +/* | |
7290 | + * Create some little jobs that will do the move between | |
7291 | + * them. | |
7292 | + */ | |
7293 | +static void split_job(struct kcopyd_job *job) | |
7294 | +{ | |
7295 | + int nr; | |
7296 | + | |
7297 | + nr = dm_div_up(job->source.count, SUB_JOB_SIZE); | |
7298 | + if (nr > job->kc->max_split) | |
7299 | + nr = job->kc->max_split; | |
7300 | + | |
7301 | + atomic_set(&job->sub_jobs, nr); | |
7302 | + while (nr--) | |
7303 | + segment_complete(0, 0u, job); | |
7304 | +} | |
7305 | + | |
7306 | +int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | |
7307 | + unsigned int num_dests, struct io_region *dests, | |
7308 | + unsigned int flags, kcopyd_notify_fn fn, void *context) | |
7309 | +{ | |
7310 | + struct kcopyd_job *job; | |
7311 | + | |
7312 | + /* | |
7313 | + * Allocate a new job. | |
7314 | + */ | |
7315 | + job = mempool_alloc(_job_pool, GFP_NOIO); | |
7316 | + | |
7317 | + /* | |
7318 | + * set up for the read. | |
7319 | + */ | |
7320 | + job->kc = kc; | |
7321 | + job->flags = flags; | |
7322 | + job->read_err = 0; | |
7323 | + job->write_err = 0; | |
7324 | + job->rw = READ; | |
7325 | + | |
7326 | + memcpy(&job->source, from, sizeof(*from)); | |
7327 | + | |
7328 | + job->num_dests = num_dests; | |
7329 | + memcpy(&job->dests, dests, sizeof(*dests) * num_dests); | |
7330 | + | |
7331 | + job->offset = 0; | |
7332 | + job->nr_pages = 0; | |
7333 | + INIT_LIST_HEAD(&job->pages); | |
7334 | + | |
7335 | + job->fn = fn; | |
7336 | + job->context = context; | |
7337 | + | |
7338 | + if (job->source.count < SUB_JOB_SIZE) | |
7339 | + dispatch_job(job); | |
7340 | + | |
7341 | + else { | |
7342 | + init_MUTEX(&job->lock); | |
7343 | + job->progress = 0; | |
7344 | + split_job(job); | |
7345 | + } | |
7346 | + | |
7347 | + return 0; | |
7348 | +} | |
7349 | + | |
7350 | +/* | |
7351 | + * Cancels a kcopyd job, eg. someone might be deactivating a | |
7352 | + * mirror. | |
7353 | + */ | |
7354 | +int kcopyd_cancel(struct kcopyd_job *job, int block) | |
7355 | +{ | |
7356 | + /* FIXME: finish */ | |
7357 | + return -1; | |
7358 | +} | |
7359 | + | |
7360 | +/*----------------------------------------------------------------- | |
7361 | + * Unit setup | |
7362 | + *---------------------------------------------------------------*/ | |
7363 | +static DECLARE_MUTEX(_client_lock); | |
7364 | +static LIST_HEAD(_clients); | |
7365 | + | |
7366 | +static int client_add(struct kcopyd_client *kc) | |
7367 | +{ | |
7368 | + down(&_client_lock); | |
7369 | + list_add(&kc->list, &_clients); | |
7370 | + up(&_client_lock); | |
7371 | + return 0; | |
7372 | +} | |
7373 | + | |
7374 | +static void client_del(struct kcopyd_client *kc) | |
7375 | +{ | |
7376 | + down(&_client_lock); | |
7377 | + list_del(&kc->list); | |
7378 | + up(&_client_lock); | |
7379 | +} | |
7380 | + | |
7381 | +int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) | |
7382 | +{ | |
7383 | + int r = 0; | |
7384 | + struct kcopyd_client *kc; | |
7385 | + | |
7386 | + if (nr_pages * SECTORS_PER_PAGE < SUB_JOB_SIZE) { | |
7387 | + DMERR("kcopyd client requested %u pages: minimum is %lu", | |
7388 | + nr_pages, SUB_JOB_SIZE / SECTORS_PER_PAGE); | |
7389 | + return -ENOMEM; | |
7390 | + } | |
7391 | + | |
7392 | + kc = kmalloc(sizeof(*kc), GFP_KERNEL); | |
7393 | + if (!kc) | |
7394 | + return -ENOMEM; | |
7395 | + | |
7396 | + kc->lock = SPIN_LOCK_UNLOCKED; | |
7397 | + INIT_LIST_HEAD(&kc->pages); | |
7398 | + kc->nr_pages = kc->nr_free_pages = 0; | |
7399 | + r = client_alloc_pages(kc, nr_pages); | |
7400 | + if (r) { | |
7401 | + kfree(kc); | |
7402 | + return r; | |
7403 | + } | |
7404 | + | |
7405 | + r = dm_io_get(nr_pages); | |
7406 | + if (r) { | |
7407 | + client_free_pages(kc); | |
7408 | + kfree(kc); | |
7409 | + return r; | |
7410 | + } | |
7411 | + | |
7412 | + r = client_add(kc); | |
7413 | + if (r) { | |
7414 | + dm_io_put(nr_pages); | |
7415 | + client_free_pages(kc); | |
7416 | + kfree(kc); | |
7417 | + return r; | |
7418 | + } | |
7419 | + | |
7420 | + *result = kc; | |
7421 | + return 0; | |
7422 | +} | |
7423 | + | |
7424 | +void kcopyd_client_destroy(struct kcopyd_client *kc) | |
7425 | +{ | |
7426 | + dm_io_put(kc->nr_pages); | |
7427 | + client_free_pages(kc); | |
7428 | + client_del(kc); | |
7429 | + kfree(kc); | |
7430 | +} | |
7431 | + | |
7432 | + | |
7433 | +int __init kcopyd_init(void) | |
7434 | +{ | |
7435 | + int r; | |
7436 | + | |
7437 | + r = jobs_init(); | |
7438 | + if (r) | |
7439 | + return r; | |
7440 | + | |
7441 | + r = dm_daemon_start(&_kcopyd, "kcopyd", do_work); | |
7442 | + if (r) | |
7443 | + jobs_exit(); | |
7444 | + | |
7445 | + return r; | |
7446 | +} | |
7447 | + | |
7448 | +void kcopyd_exit(void) | |
7449 | +{ | |
7450 | + jobs_exit(); | |
7451 | + dm_daemon_stop(&_kcopyd); | |
7452 | +} | |
7453 | + | |
7454 | +EXPORT_SYMBOL(kcopyd_client_create); | |
7455 | +EXPORT_SYMBOL(kcopyd_client_destroy); | |
7456 | +EXPORT_SYMBOL(kcopyd_copy); | |
7457 | +EXPORT_SYMBOL(kcopyd_cancel); | |
7458 | diff -urN linux-2.4.24.org/drivers/md/kcopyd.h linux-2.4.24/drivers/md/kcopyd.h | |
7459 | --- linux-2.4.24.org/drivers/md/kcopyd.h 1970-01-01 01:00:00.000000000 +0100 | |
7460 | +++ linux-2.4.24/drivers/md/kcopyd.h 2004-01-18 15:01:25.800189017 +0100 | |
7461 | @@ -0,0 +1,47 @@ | |
7462 | +/* | |
7463 | + * Copyright (C) 2001 Sistina Software | |
7464 | + * | |
7465 | + * This file is released under the GPL. | |
7466 | + */ | |
7467 | + | |
7468 | +#ifndef DM_KCOPYD_H | |
7469 | +#define DM_KCOPYD_H | |
7470 | + | |
7471 | +/* | |
7472 | + * Needed for the definition of offset_t. | |
7473 | + */ | |
7474 | +#include <linux/device-mapper.h> | |
7475 | +#include <linux/iobuf.h> | |
7476 | + | |
7477 | +#include "dm-io.h" | |
7478 | + | |
7479 | +int kcopyd_init(void); | |
7480 | +void kcopyd_exit(void); | |
7481 | + | |
7482 | +/* FIXME: make this configurable */ | |
7483 | +#define KCOPYD_MAX_REGIONS 8 | |
7484 | + | |
7485 | +#define KCOPYD_IGNORE_ERROR 1 | |
7486 | + | |
7487 | +/* | |
7488 | + * To use kcopyd you must first create a kcopyd client object. | |
7489 | + */ | |
7490 | +struct kcopyd_client; | |
7491 | +int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result); | |
7492 | +void kcopyd_client_destroy(struct kcopyd_client *kc); | |
7493 | + | |
7494 | +/* | |
7495 | + * Submit a copy job to kcopyd. This is built on top of the | |
7496 | + * previous three fns. | |
7497 | + * | |
7498 | + * read_err is a boolean, | |
7499 | + * write_err is a bitset, with 1 bit for each destination region | |
7500 | + */ | |
7501 | +typedef void (*kcopyd_notify_fn)(int read_err, | |
7502 | + unsigned int write_err, void *context); | |
7503 | + | |
7504 | +int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, | |
7505 | + unsigned int num_dests, struct io_region *dests, | |
7506 | + unsigned int flags, kcopyd_notify_fn fn, void *context); | |
7507 | + | |
7508 | +#endif | |
7509 | diff -urN linux-2.4.24.org/drivers/md/Makefile linux-2.4.24/drivers/md/Makefile | |
7510 | --- linux-2.4.24.org/drivers/md/Makefile 2004-01-18 14:58:09.300663064 +0100 | |
7511 | +++ linux-2.4.24/drivers/md/Makefile 2004-01-18 15:01:29.209473819 +0100 | |
7512 | @@ -4,24 +4,35 @@ | |
7513 | ||
7514 | O_TARGET := mddev.o | |
7515 | ||
7516 | -export-objs := md.o xor.o | |
7517 | -list-multi := lvm-mod.o | |
7518 | +export-objs := md.o xor.o dm-table.o dm-target.o dm.o dm-daemon.o \ | |
7519 | + kcopyd.o dm-io.o | |
7520 | + | |
7521 | +list-multi := lvm-mod.o dm-mod.o dm-mirror-mod.o | |
7522 | lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o | |
7523 | +dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ | |
7524 | + dm-ioctl.o dm-daemon.o kcopyd.o dm-io.o dm-snapshot.o \ | |
7525 | + dm-exception-store.o | |
7526 | ||
7527 | # Note: link order is important. All raid personalities | |
7528 | # and xor.o must come before md.o, as they each initialise | |
7529 | # themselves, and md.o may use the personalities when it | |
7530 | # auto-initialised. | |
7531 | ||
7532 | -obj-$(CONFIG_MD_LINEAR) += linear.o | |
7533 | -obj-$(CONFIG_MD_RAID0) += raid0.o | |
7534 | -obj-$(CONFIG_MD_RAID1) += raid1.o | |
7535 | -obj-$(CONFIG_MD_RAID5) += raid5.o xor.o | |
7536 | -obj-$(CONFIG_MD_MULTIPATH) += multipath.o | |
7537 | -obj-$(CONFIG_BLK_DEV_MD) += md.o | |
7538 | -obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o | |
7539 | +obj-$(CONFIG_MD_LINEAR) += linear.o | |
7540 | +obj-$(CONFIG_MD_RAID0) += raid0.o | |
7541 | +obj-$(CONFIG_MD_RAID1) += raid1.o | |
7542 | +obj-$(CONFIG_MD_RAID5) += raid5.o xor.o | |
7543 | +obj-$(CONFIG_MD_MULTIPATH) += multipath.o | |
7544 | +obj-$(CONFIG_BLK_DEV_MD) += md.o | |
7545 | + | |
7546 | +obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o | |
7547 | + | |
7548 | +obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o | |
7549 | ||
7550 | include $(TOPDIR)/Rules.make | |
7551 | ||
7552 | lvm-mod.o: $(lvm-mod-objs) | |
7553 | $(LD) -r -o $@ $(lvm-mod-objs) | |
7554 | + | |
7555 | +dm-mod.o: $(dm-mod-objs) | |
7556 | + $(LD) -r -o $@ $(dm-mod-objs) | |
7557 | diff -urN linux-2.4.24.org/include/linux/device-mapper.h linux-2.4.24/include/linux/device-mapper.h | |
7558 | --- linux-2.4.24.org/include/linux/device-mapper.h 1970-01-01 01:00:00.000000000 +0100 | |
7559 | +++ linux-2.4.24/include/linux/device-mapper.h 2004-01-18 15:01:13.800707381 +0100 | |
7560 | @@ -0,0 +1,104 @@ | |
7561 | +/* | |
7562 | + * Copyright (C) 2001 Sistina Software (UK) Limited. | |
7563 | + * | |
7564 | + * This file is released under the LGPL. | |
7565 | + */ | |
7566 | + | |
7567 | +#ifndef _LINUX_DEVICE_MAPPER_H | |
7568 | +#define _LINUX_DEVICE_MAPPER_H | |
7569 | + | |
7570 | +typedef unsigned long sector_t; | |
7571 | + | |
7572 | +struct dm_target; | |
7573 | +struct dm_table; | |
7574 | +struct dm_dev; | |
7575 | + | |
7576 | +typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; | |
7577 | + | |
7578 | +union map_info { | |
7579 | + void *ptr; | |
7580 | + unsigned long long ll; | |
7581 | +}; | |
7582 | + | |
7583 | +/* | |
7584 | + * In the constructor the target parameter will already have the | |
7585 | + * table, type, begin and len fields filled in. | |
7586 | + */ | |
7587 | +typedef int (*dm_ctr_fn) (struct dm_target * target, unsigned int argc, | |
7588 | + char **argv); | |
7589 | + | |
7590 | +/* | |
7591 | + * The destructor doesn't need to free the dm_target, just | |
7592 | + * anything hidden ti->private. | |
7593 | + */ | |
7594 | +typedef void (*dm_dtr_fn) (struct dm_target * ti); | |
7595 | + | |
7596 | +/* | |
7597 | + * The map function must return: | |
7598 | + * < 0: error | |
7599 | + * = 0: The target will handle the io by resubmitting it later | |
7600 | + * > 0: simple remap complete | |
7601 | + */ | |
7602 | +typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh, | |
7603 | + int rw, union map_info *map_context); | |
7604 | + | |
7605 | +/* | |
7606 | + * Returns: | |
7607 | + * < 0 : error (currently ignored) | |
7608 | + * 0 : ended successfully | |
7609 | + * 1 : for some reason the io has still not completed (eg, | |
7610 | + * multipath target might want to requeue a failed io). | |
7611 | + */ | |
7612 | +typedef int (*dm_endio_fn) (struct dm_target * ti, | |
7613 | + struct buffer_head * bh, int rw, int error, | |
7614 | + union map_info *map_context); | |
7615 | +typedef void (*dm_suspend_fn) (struct dm_target *ti); | |
7616 | +typedef void (*dm_resume_fn) (struct dm_target *ti); | |
7617 | +typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type, | |
7618 | + char *result, unsigned int maxlen); | |
7619 | + | |
7620 | +void dm_error(const char *message); | |
7621 | + | |
7622 | +/* | |
7623 | + * Constructors should call these functions to ensure destination devices | |
7624 | + * are opened/closed correctly. | |
7625 | + * FIXME: too many arguments. | |
7626 | + */ | |
7627 | +int dm_get_device(struct dm_target *ti, const char *path, sector_t start, | |
7628 | + sector_t len, int mode, struct dm_dev **result); | |
7629 | +void dm_put_device(struct dm_target *ti, struct dm_dev *d); | |
7630 | + | |
7631 | +/* | |
7632 | + * Information about a target type | |
7633 | + */ | |
7634 | +struct target_type { | |
7635 | + const char *name; | |
7636 | + struct module *module; | |
7637 | + dm_ctr_fn ctr; | |
7638 | + dm_dtr_fn dtr; | |
7639 | + dm_map_fn map; | |
7640 | + dm_endio_fn end_io; | |
7641 | + dm_suspend_fn suspend; | |
7642 | + dm_resume_fn resume; | |
7643 | + dm_status_fn status; | |
7644 | +}; | |
7645 | + | |
7646 | +struct dm_target { | |
7647 | + struct dm_table *table; | |
7648 | + struct target_type *type; | |
7649 | + | |
7650 | + /* target limits */ | |
7651 | + sector_t begin; | |
7652 | + sector_t len; | |
7653 | + | |
7654 | + /* target specific data */ | |
7655 | + void *private; | |
7656 | + | |
7657 | + /* Used to provide an error string from the ctr */ | |
7658 | + char *error; | |
7659 | +}; | |
7660 | + | |
7661 | +int dm_register_target(struct target_type *t); | |
7662 | +int dm_unregister_target(struct target_type *t); | |
7663 | + | |
7664 | +#endif /* _LINUX_DEVICE_MAPPER_H */ | |
7665 | diff -urN linux-2.4.24.org/include/linux/dm-ioctl.h linux-2.4.24/include/linux/dm-ioctl.h | |
7666 | --- linux-2.4.24.org/include/linux/dm-ioctl.h 1970-01-01 01:00:00.000000000 +0100 | |
7667 | +++ linux-2.4.24/include/linux/dm-ioctl.h 2004-01-18 15:01:17.793869131 +0100 | |
7668 | @@ -0,0 +1,237 @@ | |
7669 | +/* | |
7670 | + * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited. | |
7671 | + * | |
7672 | + * This file is released under the LGPL. | |
7673 | + */ | |
7674 | + | |
7675 | +#ifndef _LINUX_DM_IOCTL_H | |
7676 | +#define _LINUX_DM_IOCTL_H | |
7677 | + | |
7678 | +#include <linux/types.h> | |
7679 | + | |
7680 | +#define DM_DIR "mapper" /* Slashes not supported */ | |
7681 | +#define DM_MAX_TYPE_NAME 16 | |
7682 | +#define DM_NAME_LEN 128 | |
7683 | +#define DM_UUID_LEN 129 | |
7684 | + | |
7685 | +/* | |
7686 | + * A traditional ioctl interface for the device mapper. | |
7687 | + * | |
7688 | + * Each device can have two tables associated with it, an | |
7689 | + * 'active' table which is the one currently used by io passing | |
7690 | + * through the device, and an 'inactive' one which is a table | |
7691 | + * that is being prepared as a replacement for the 'active' one. | |
7692 | + * | |
7693 | + * DM_VERSION: | |
7694 | + * Just get the version information for the ioctl interface. | |
7695 | + * | |
7696 | + * DM_REMOVE_ALL: | |
7697 | + * Remove all dm devices, destroy all tables. Only really used | |
7698 | + * for debug. | |
7699 | + * | |
7700 | + * DM_LIST_DEVICES: | |
7701 | + * Get a list of all the dm device names. | |
7702 | + * | |
7703 | + * DM_DEV_CREATE: | |
7704 | + * Create a new device, neither the 'active' or 'inactive' table | |
7705 | + * slots will be filled. The device will be in suspended state | |
7706 | + * after creation, however any io to the device will get errored | |
7707 | + * since it will be out-of-bounds. | |
7708 | + * | |
7709 | + * DM_DEV_REMOVE: | |
7710 | + * Remove a device, destroy any tables. | |
7711 | + * | |
7712 | + * DM_DEV_RENAME: | |
7713 | + * Rename a device. | |
7714 | + * | |
7715 | + * DM_SUSPEND: | |
7716 | + * This performs both suspend and resume, depending which flag is | |
7717 | + * passed in. | |
7718 | + * Suspend: This command will not return until all pending io to | |
7719 | + * the device has completed. Further io will be deferred until | |
7720 | + * the device is resumed. | |
7721 | + * Resume: It is no longer an error to issue this command on an | |
7722 | + * unsuspended device. If a table is present in the 'inactive' | |
7723 | + * slot, it will be moved to the active slot, then the old table | |
7724 | + * from the active slot will be _destroyed_. Finally the device | |
7725 | + * is resumed. | |
7726 | + * | |
7727 | + * DM_DEV_STATUS: | |
7728 | + * Retrieves the status for the table in the 'active' slot. | |
7729 | + * | |
7730 | + * DM_DEV_WAIT: | |
7731 | + * Wait for a significant event to occur to the device. This | |
7732 | + * could either be caused by an event triggered by one of the | |
7733 | + * targets of the table in the 'active' slot, or a table change. | |
7734 | + * | |
7735 | + * DM_TABLE_LOAD: | |
7736 | + * Load a table into the 'inactive' slot for the device. The | |
7737 | + * device does _not_ need to be suspended prior to this command. | |
7738 | + * | |
7739 | + * DM_TABLE_CLEAR: | |
7740 | + * Destroy any table in the 'inactive' slot (ie. abort). | |
7741 | + * | |
7742 | + * DM_TABLE_DEPS: | |
7743 | + * Return a set of device dependencies for the 'active' table. | |
7744 | + * | |
7745 | + * DM_TABLE_STATUS: | |
7746 | + * Return the targets status for the 'active' table. | |
7747 | + */ | |
7748 | + | |
7749 | +/* | |
7750 | + * All ioctl arguments consist of a single chunk of memory, with | |
7751 | + * this structure at the start. If a uuid is specified any | |
7752 | + * lookup (eg. for a DM_INFO) will be done on that, *not* the | |
7753 | + * name. | |
7754 | + */ | |
7755 | +struct dm_ioctl { | |
7756 | + /* | |
7757 | + * The version number is made up of three parts: | |
7758 | + * major - no backward or forward compatibility, | |
7759 | + * minor - only backwards compatible, | |
7760 | + * patch - both backwards and forwards compatible. | |
7761 | + * | |
7762 | + * All clients of the ioctl interface should fill in the | |
7763 | + * version number of the interface that they were | |
7764 | + * compiled with. | |
7765 | + * | |
7766 | + * All recognised ioctl commands (ie. those that don't | |
7767 | + * return -ENOTTY) fill out this field, even if the | |
7768 | + * command failed. | |
7769 | + */ | |
7770 | + uint32_t version[3]; /* in/out */ | |
7771 | + uint32_t data_size; /* total size of data passed in | |
7772 | + * including this struct */ | |
7773 | + | |
7774 | + uint32_t data_start; /* offset to start of data | |
7775 | + * relative to start of this struct */ | |
7776 | + | |
7777 | + uint32_t target_count; /* in/out */ | |
7778 | + int32_t open_count; /* out */ | |
7779 | + uint32_t flags; /* in/out */ | |
7780 | + uint32_t event_nr; /* in/out */ | |
7781 | + uint32_t padding; | |
7782 | + | |
7783 | + uint64_t dev; /* in/out */ | |
7784 | + | |
7785 | + char name[DM_NAME_LEN]; /* device name */ | |
7786 | + char uuid[DM_UUID_LEN]; /* unique identifier for | |
7787 | + * the block device */ | |
7788 | +}; | |
7789 | + | |
7790 | +/* | |
7791 | + * Used to specify tables. These structures appear after the | |
7792 | + * dm_ioctl. | |
7793 | + */ | |
7794 | +struct dm_target_spec { | |
7795 | + uint64_t sector_start; | |
7796 | + uint64_t length; | |
7797 | + int32_t status; /* used when reading from kernel only */ | |
7798 | + | |
7799 | + /* | |
7800 | + * Offset in bytes (from the start of this struct) to | |
7801 | + * next target_spec. | |
7802 | + */ | |
7803 | + uint32_t next; | |
7804 | + | |
7805 | + char target_type[DM_MAX_TYPE_NAME]; | |
7806 | + | |
7807 | + /* | |
7808 | + * Parameter string starts immediately after this object. | |
7809 | + * Be careful to add padding after string to ensure correct | |
7810 | + * alignment of subsequent dm_target_spec. | |
7811 | + */ | |
7812 | +}; | |
7813 | + | |
7814 | +/* | |
7815 | + * Used to retrieve the target dependencies. | |
7816 | + */ | |
7817 | +struct dm_target_deps { | |
7818 | + uint32_t count; /* Array size */ | |
7819 | + uint32_t padding; /* unused */ | |
7820 | + uint64_t dev[0]; /* out */ | |
7821 | +}; | |
7822 | + | |
7823 | +/* | |
7824 | + * Used to get a list of all dm devices. | |
7825 | + */ | |
7826 | +struct dm_name_list { | |
7827 | + uint64_t dev; | |
7828 | + uint32_t next; /* offset to the next record from | |
7829 | + the _start_ of this */ | |
7830 | + char name[0]; | |
7831 | +}; | |
7832 | + | |
7833 | +/* | |
7834 | + * If you change this make sure you make the corresponding change | |
7835 | + * to dm-ioctl.c:lookup_ioctl() | |
7836 | + */ | |
7837 | +enum { | |
7838 | + /* Top level cmds */ | |
7839 | + DM_VERSION_CMD = 0, | |
7840 | + DM_REMOVE_ALL_CMD, | |
7841 | + DM_LIST_DEVICES_CMD, | |
7842 | + | |
7843 | + /* device level cmds */ | |
7844 | + DM_DEV_CREATE_CMD, | |
7845 | + DM_DEV_REMOVE_CMD, | |
7846 | + DM_DEV_RENAME_CMD, | |
7847 | + DM_DEV_SUSPEND_CMD, | |
7848 | + DM_DEV_STATUS_CMD, | |
7849 | + DM_DEV_WAIT_CMD, | |
7850 | + | |
7851 | + /* Table level cmds */ | |
7852 | + DM_TABLE_LOAD_CMD, | |
7853 | + DM_TABLE_CLEAR_CMD, | |
7854 | + DM_TABLE_DEPS_CMD, | |
7855 | + DM_TABLE_STATUS_CMD, | |
7856 | +}; | |
7857 | + | |
7858 | +#define DM_IOCTL 0xfd | |
7859 | + | |
7860 | +#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) | |
7861 | +#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) | |
7862 | +#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl) | |
7863 | + | |
7864 | +#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) | |
7865 | +#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) | |
7866 | +#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) | |
7867 | +#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) | |
7868 | +#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) | |
7869 | +#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl) | |
7870 | + | |
7871 | +#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl) | |
7872 | +#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl) | |
7873 | +#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl) | |
7874 | +#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl) | |
7875 | + | |
7876 | +#define DM_VERSION_MAJOR 4 | |
7877 | +#define DM_VERSION_MINOR 0 | |
7878 | +#define DM_VERSION_PATCHLEVEL 5 | |
7879 | +#define DM_VERSION_EXTRA "-ioctl (2003-11-18)" | |
7880 | + | |
7881 | +/* Status bits */ | |
7882 | +#define DM_READONLY_FLAG (1 << 0) /* In/Out */ | |
7883 | +#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */ | |
7884 | +#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */ | |
7885 | + | |
7886 | +/* | |
7887 | + * Flag passed into ioctl STATUS command to get table information | |
7888 | + * rather than current status. | |
7889 | + */ | |
7890 | +#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */ | |
7891 | + | |
7892 | +/* | |
7893 | + * Flags that indicate whether a table is present in either of | |
7894 | + * the two table slots that a device has. | |
7895 | + */ | |
7896 | +#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */ | |
7897 | +#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */ | |
7898 | + | |
7899 | +/* | |
7900 | + * Indicates that the buffer passed in wasn't big enough for the | |
7901 | + * results. | |
7902 | + */ | |
7903 | +#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ | |
7904 | + | |
7905 | +#endif /* _LINUX_DM_IOCTL_H */ | |
7906 | diff -urN linux-2.4.24.org/include/linux/mempool.h linux-2.4.24/include/linux/mempool.h | |
7907 | --- linux-2.4.24.org/include/linux/mempool.h 1970-01-01 01:00:00.000000000 +0100 | |
7908 | +++ linux-2.4.24/include/linux/mempool.h 2004-01-18 15:01:09.522605662 +0100 | |
7909 | @@ -0,0 +1,31 @@ | |
7910 | +/* | |
7911 | + * memory buffer pool support | |
7912 | + */ | |
7913 | +#ifndef _LINUX_MEMPOOL_H | |
7914 | +#define _LINUX_MEMPOOL_H | |
7915 | + | |
7916 | +#include <linux/list.h> | |
7917 | +#include <linux/wait.h> | |
7918 | + | |
7919 | +struct mempool_s; | |
7920 | +typedef struct mempool_s mempool_t; | |
7921 | + | |
7922 | +typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data); | |
7923 | +typedef void (mempool_free_t)(void *element, void *pool_data); | |
7924 | + | |
7925 | +extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, | |
7926 | + mempool_free_t *free_fn, void *pool_data); | |
7927 | +extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask); | |
7928 | +extern void mempool_destroy(mempool_t *pool); | |
7929 | +extern void * mempool_alloc(mempool_t *pool, int gfp_mask); | |
7930 | +extern void mempool_free(void *element, mempool_t *pool); | |
7931 | + | |
7932 | +/* | |
7933 | + * A mempool_alloc_t and mempool_free_t that get the memory from | |
7934 | + * a slab that is passed in through pool_data. | |
7935 | + */ | |
7936 | +void *mempool_alloc_slab(int gfp_mask, void *pool_data); | |
7937 | +void mempool_free_slab(void *element, void *pool_data); | |
7938 | + | |
7939 | + | |
7940 | +#endif /* _LINUX_MEMPOOL_H */ | |
7941 | diff -urN linux-2.4.24.org/MAINTAINERS linux-2.4.24/MAINTAINERS | |
7942 | --- linux-2.4.24.org/MAINTAINERS 2004-01-18 14:59:47.570857618 +0100 | |
7943 | +++ linux-2.4.24/MAINTAINERS 2004-01-18 15:01:13.766714518 +0100 | |
7944 | @@ -581,6 +581,13 @@ | |
7945 | W: http://www.debian.org/~dz/i8k/ | |
7946 | S: Maintained | |
7947 | ||
7948 | +DEVICE MAPPER | |
7949 | +P: Joe Thornber | |
7950 | +M: dm@uk.sistina.com | |
7951 | +L: linux-LVM@sistina.com | |
7952 | +W: http://www.sistina.com/lvm | |
7953 | +S: Maintained | |
7954 | + | |
7955 | DEVICE NUMBER REGISTRY | |
7956 | P: H. Peter Anvin | |
7957 | M: hpa@zytor.com | |
7958 | diff -urN linux-2.4.24.org/mm/Makefile linux-2.4.24/mm/Makefile | |
7959 | --- linux-2.4.24.org/mm/Makefile 2004-01-18 14:55:23.909936044 +0100 | |
7960 | +++ linux-2.4.24/mm/Makefile 2004-01-18 15:01:09.497610911 +0100 | |
7961 | @@ -9,12 +9,12 @@ | |
7962 | ||
7963 | O_TARGET := mm.o | |
7964 | ||
7965 | -export-objs := shmem.o filemap.o memory.o page_alloc.o | |
7966 | +export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o | |
7967 | ||
7968 | obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ | |
7969 | vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ | |
7970 | page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ | |
7971 | - shmem.o | |
7972 | + shmem.o mempool.o | |
7973 | ||
7974 | obj-$(CONFIG_HIGHMEM) += highmem.o | |
7975 | obj-$(CONFIG_PROC_MM) += proc_mm.o | |
7976 | diff -urN linux-2.4.24.org/mm/mempool.c linux-2.4.24/mm/mempool.c | |
7977 | --- linux-2.4.24.org/mm/mempool.c 1970-01-01 01:00:00.000000000 +0100 | |
7978 | +++ linux-2.4.24/mm/mempool.c 2004-01-18 15:01:09.525605032 +0100 | |
7979 | @@ -0,0 +1,299 @@ | |
7980 | +/* | |
7981 | + * linux/mm/mempool.c | |
7982 | + * | |
7983 | + * memory buffer pool support. Such pools are mostly used | |
7984 | + * for guaranteed, deadlock-free memory allocations during | |
7985 | + * extreme VM load. | |
7986 | + * | |
7987 | + * started by Ingo Molnar, Copyright (C) 2001 | |
7988 | + */ | |
7989 | + | |
7990 | +#include <linux/mm.h> | |
7991 | +#include <linux/slab.h> | |
7992 | +#include <linux/module.h> | |
7993 | +#include <linux/mempool.h> | |
7994 | + | |
7995 | +struct mempool_s { | |
7996 | + spinlock_t lock; | |
7997 | + int min_nr; /* nr of elements at *elements */ | |
7998 | + int curr_nr; /* Current nr of elements at *elements */ | |
7999 | + void **elements; | |
8000 | + | |
8001 | + void *pool_data; | |
8002 | + mempool_alloc_t *alloc; | |
8003 | + mempool_free_t *free; | |
8004 | + wait_queue_head_t wait; | |
8005 | +}; | |
8006 | + | |
8007 | +static void add_element(mempool_t *pool, void *element) | |
8008 | +{ | |
8009 | + BUG_ON(pool->curr_nr >= pool->min_nr); | |
8010 | + pool->elements[pool->curr_nr++] = element; | |
8011 | +} | |
8012 | + | |
8013 | +static void *remove_element(mempool_t *pool) | |
8014 | +{ | |
8015 | + BUG_ON(pool->curr_nr <= 0); | |
8016 | + return pool->elements[--pool->curr_nr]; | |
8017 | +} | |
8018 | + | |
8019 | +static void free_pool(mempool_t *pool) | |
8020 | +{ | |
8021 | + while (pool->curr_nr) { | |
8022 | + void *element = remove_element(pool); | |
8023 | + pool->free(element, pool->pool_data); | |
8024 | + } | |
8025 | + kfree(pool->elements); | |
8026 | + kfree(pool); | |
8027 | +} | |
8028 | + | |
8029 | +/** | |
8030 | + * mempool_create - create a memory pool | |
8031 | + * @min_nr: the minimum number of elements guaranteed to be | |
8032 | + * allocated for this pool. | |
8033 | + * @alloc_fn: user-defined element-allocation function. | |
8034 | + * @free_fn: user-defined element-freeing function. | |
8035 | + * @pool_data: optional private data available to the user-defined functions. | |
8036 | + * | |
8037 | + * this function creates and allocates a guaranteed size, preallocated | |
8038 | + * memory pool. The pool can be used from the mempool_alloc and mempool_free | |
8039 | + * functions. This function might sleep. Both the alloc_fn() and the free_fn() | |
8040 | + * functions might sleep - as long as the mempool_alloc function is not called | |
8041 | + * from IRQ contexts. | |
8042 | + */ | |
8043 | +mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn, | |
8044 | + mempool_free_t *free_fn, void *pool_data) | |
8045 | +{ | |
8046 | + mempool_t *pool; | |
8047 | + | |
8048 | + pool = kmalloc(sizeof(*pool), GFP_KERNEL); | |
8049 | + if (!pool) | |
8050 | + return NULL; | |
8051 | + memset(pool, 0, sizeof(*pool)); | |
8052 | + pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL); | |
8053 | + if (!pool->elements) { | |
8054 | + kfree(pool); | |
8055 | + return NULL; | |
8056 | + } | |
8057 | + spin_lock_init(&pool->lock); | |
8058 | + pool->min_nr = min_nr; | |
8059 | + pool->pool_data = pool_data; | |
8060 | + init_waitqueue_head(&pool->wait); | |
8061 | + pool->alloc = alloc_fn; | |
8062 | + pool->free = free_fn; | |
8063 | + | |
8064 | + /* | |
8065 | + * First pre-allocate the guaranteed number of buffers. | |
8066 | + */ | |
8067 | + while (pool->curr_nr < pool->min_nr) { | |
8068 | + void *element; | |
8069 | + | |
8070 | + element = pool->alloc(GFP_KERNEL, pool->pool_data); | |
8071 | + if (unlikely(!element)) { | |
8072 | + free_pool(pool); | |
8073 | + return NULL; | |
8074 | + } | |
8075 | + add_element(pool, element); | |
8076 | + } | |
8077 | + return pool; | |
8078 | +} | |
8079 | + | |
8080 | +/** | |
8081 | + * mempool_resize - resize an existing memory pool | |
8082 | + * @pool: pointer to the memory pool which was allocated via | |
8083 | + * mempool_create(). | |
8084 | + * @new_min_nr: the new minimum number of elements guaranteed to be | |
8085 | + * allocated for this pool. | |
8086 | + * @gfp_mask: the usual allocation bitmask. | |
8087 | + * | |
8088 | + * This function shrinks/grows the pool. In the case of growing, | |
8089 | + * it cannot be guaranteed that the pool will be grown to the new | |
8090 | + * size immediately, but new mempool_free() calls will refill it. | |
8091 | + * | |
8092 | + * Note, the caller must guarantee that no mempool_destroy is called | |
8093 | + * while this function is running. mempool_alloc() & mempool_free() | |
8094 | + * might be called (eg. from IRQ contexts) while this function executes. | |
8095 | + */ | |
8096 | +int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask) | |
8097 | +{ | |
8098 | + void *element; | |
8099 | + void **new_elements; | |
8100 | + unsigned long flags; | |
8101 | + | |
8102 | + BUG_ON(new_min_nr <= 0); | |
8103 | + | |
8104 | + spin_lock_irqsave(&pool->lock, flags); | |
8105 | + if (new_min_nr < pool->min_nr) { | |
8106 | + while (pool->curr_nr > new_min_nr) { | |
8107 | + element = remove_element(pool); | |
8108 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8109 | + pool->free(element, pool->pool_data); | |
8110 | + spin_lock_irqsave(&pool->lock, flags); | |
8111 | + } | |
8112 | + pool->min_nr = new_min_nr; | |
8113 | + goto out_unlock; | |
8114 | + } | |
8115 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8116 | + | |
8117 | + /* Grow the pool */ | |
8118 | + new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask); | |
8119 | + if (!new_elements) | |
8120 | + return -ENOMEM; | |
8121 | + | |
8122 | + spin_lock_irqsave(&pool->lock, flags); | |
8123 | + memcpy(new_elements, pool->elements, | |
8124 | + pool->curr_nr * sizeof(*new_elements)); | |
8125 | + kfree(pool->elements); | |
8126 | + pool->elements = new_elements; | |
8127 | + pool->min_nr = new_min_nr; | |
8128 | + | |
8129 | + while (pool->curr_nr < pool->min_nr) { | |
8130 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8131 | + element = pool->alloc(gfp_mask, pool->pool_data); | |
8132 | + if (!element) | |
8133 | + goto out; | |
8134 | + spin_lock_irqsave(&pool->lock, flags); | |
8135 | + if (pool->curr_nr < pool->min_nr) | |
8136 | + add_element(pool, element); | |
8137 | + else | |
8138 | + kfree(element); /* Raced */ | |
8139 | + } | |
8140 | +out_unlock: | |
8141 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8142 | +out: | |
8143 | + return 0; | |
8144 | +} | |
8145 | + | |
8146 | +/** | |
8147 | + * mempool_destroy - deallocate a memory pool | |
8148 | + * @pool: pointer to the memory pool which was allocated via | |
8149 | + * mempool_create(). | |
8150 | + * | |
8151 | + * this function only sleeps if the free_fn() function sleeps. The caller | |
8152 | + * has to guarantee that all elements have been returned to the pool (ie: | |
8153 | + * freed) prior to calling mempool_destroy(). | |
8154 | + */ | |
8155 | +void mempool_destroy(mempool_t *pool) | |
8156 | +{ | |
8157 | + if (pool->curr_nr != pool->min_nr) | |
8158 | + BUG(); /* There were outstanding elements */ | |
8159 | + free_pool(pool); | |
8160 | +} | |
8161 | + | |
8162 | +/** | |
8163 | + * mempool_alloc - allocate an element from a specific memory pool | |
8164 | + * @pool: pointer to the memory pool which was allocated via | |
8165 | + * mempool_create(). | |
8166 | + * @gfp_mask: the usual allocation bitmask. | |
8167 | + * | |
8168 | + * this function only sleeps if the alloc_fn function sleeps or | |
8169 | + * returns NULL. Note that due to preallocation, this function | |
8170 | + * *never* fails when called from process contexts. (it might | |
8171 | + * fail if called from an IRQ context.) | |
8172 | + */ | |
8173 | +void * mempool_alloc(mempool_t *pool, int gfp_mask) | |
8174 | +{ | |
8175 | + void *element; | |
8176 | + unsigned long flags; | |
8177 | + int curr_nr; | |
8178 | + DECLARE_WAITQUEUE(wait, current); | |
8179 | + int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO); | |
8180 | + | |
8181 | +repeat_alloc: | |
8182 | + element = pool->alloc(gfp_nowait, pool->pool_data); | |
8183 | + if (likely(element != NULL)) | |
8184 | + return element; | |
8185 | + | |
8186 | + /* | |
8187 | + * If the pool is less than 50% full then try harder | |
8188 | + * to allocate an element: | |
8189 | + */ | |
8190 | + if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) { | |
8191 | + element = pool->alloc(gfp_mask, pool->pool_data); | |
8192 | + if (likely(element != NULL)) | |
8193 | + return element; | |
8194 | + } | |
8195 | + | |
8196 | + /* | |
8197 | + * Kick the VM at this point. | |
8198 | + */ | |
8199 | + wakeup_bdflush(); | |
8200 | + | |
8201 | + spin_lock_irqsave(&pool->lock, flags); | |
8202 | + if (likely(pool->curr_nr)) { | |
8203 | + element = remove_element(pool); | |
8204 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8205 | + return element; | |
8206 | + } | |
8207 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8208 | + | |
8209 | + /* We must not sleep in the GFP_ATOMIC case */ | |
8210 | + if (gfp_mask == gfp_nowait) | |
8211 | + return NULL; | |
8212 | + | |
8213 | + run_task_queue(&tq_disk); | |
8214 | + | |
8215 | + add_wait_queue_exclusive(&pool->wait, &wait); | |
8216 | + set_task_state(current, TASK_UNINTERRUPTIBLE); | |
8217 | + | |
8218 | + spin_lock_irqsave(&pool->lock, flags); | |
8219 | + curr_nr = pool->curr_nr; | |
8220 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8221 | + | |
8222 | + if (!curr_nr) | |
8223 | + schedule(); | |
8224 | + | |
8225 | + current->state = TASK_RUNNING; | |
8226 | + remove_wait_queue(&pool->wait, &wait); | |
8227 | + | |
8228 | + goto repeat_alloc; | |
8229 | +} | |
8230 | + | |
8231 | +/** | |
8232 | + * mempool_free - return an element to the pool. | |
8233 | + * @element: pool element pointer. | |
8234 | + * @pool: pointer to the memory pool which was allocated via | |
8235 | + * mempool_create(). | |
8236 | + * | |
8237 | + * this function only sleeps if the free_fn() function sleeps. | |
8238 | + */ | |
8239 | +void mempool_free(void *element, mempool_t *pool) | |
8240 | +{ | |
8241 | + unsigned long flags; | |
8242 | + | |
8243 | + if (pool->curr_nr < pool->min_nr) { | |
8244 | + spin_lock_irqsave(&pool->lock, flags); | |
8245 | + if (pool->curr_nr < pool->min_nr) { | |
8246 | + add_element(pool, element); | |
8247 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8248 | + wake_up(&pool->wait); | |
8249 | + return; | |
8250 | + } | |
8251 | + spin_unlock_irqrestore(&pool->lock, flags); | |
8252 | + } | |
8253 | + pool->free(element, pool->pool_data); | |
8254 | +} | |
8255 | + | |
8256 | +/* | |
8257 | + * A commonly used alloc and free fn. | |
8258 | + */ | |
8259 | +void *mempool_alloc_slab(int gfp_mask, void *pool_data) | |
8260 | +{ | |
8261 | + kmem_cache_t *mem = (kmem_cache_t *) pool_data; | |
8262 | + return kmem_cache_alloc(mem, gfp_mask); | |
8263 | +} | |
8264 | + | |
8265 | +void mempool_free_slab(void *element, void *pool_data) | |
8266 | +{ | |
8267 | + kmem_cache_t *mem = (kmem_cache_t *) pool_data; | |
8268 | + kmem_cache_free(mem, element); | |
8269 | +} | |
8270 | + | |
8271 | + | |
8272 | +EXPORT_SYMBOL(mempool_create); | |
8273 | +EXPORT_SYMBOL(mempool_resize); | |
8274 | +EXPORT_SYMBOL(mempool_destroy); | |
8275 | +EXPORT_SYMBOL(mempool_alloc); | |
8276 | +EXPORT_SYMBOL(mempool_free); | |
8277 | +EXPORT_SYMBOL(mempool_alloc_slab); | |
8278 | +EXPORT_SYMBOL(mempool_free_slab); |