1 diff -urN linux-2.4.24.org/arch/mips64/kernel/ioctl32.c linux-2.4.24/arch/mips64/kernel/ioctl32.c
2 --- linux-2.4.24.org/arch/mips64/kernel/ioctl32.c 2004-01-18 14:59:17.636181134 +0100
3 +++ linux-2.4.24/arch/mips64/kernel/ioctl32.c 2004-01-18 15:01:17.736881093 +0100
6 #include <linux/mtd/mtd.h>
7 #include <linux/serial.h>
8 +#include <linux/dm-ioctl.h>
10 #ifdef CONFIG_SIBYTE_TBPROF
11 #include <asm/sibyte/trace_prof.h>
12 @@ -2324,6 +2325,22 @@
13 IOCTL32_DEFAULT(RESTART_ARRAY_RW),
14 #endif /* CONFIG_MD */
16 +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
17 + IOCTL32_DEFAULT(DM_VERSION),
18 + IOCTL32_DEFAULT(DM_REMOVE_ALL),
19 + IOCTL32_DEFAULT(DM_DEV_CREATE),
20 + IOCTL32_DEFAULT(DM_DEV_REMOVE),
21 + IOCTL32_DEFAULT(DM_TABLE_LOAD),
22 + IOCTL32_DEFAULT(DM_DEV_SUSPEND),
23 + IOCTL32_DEFAULT(DM_DEV_RENAME),
24 + IOCTL32_DEFAULT(DM_TABLE_DEPS),
25 + IOCTL32_DEFAULT(DM_DEV_STATUS),
26 + IOCTL32_DEFAULT(DM_TABLE_STATUS),
27 + IOCTL32_DEFAULT(DM_DEV_WAIT),
28 + IOCTL32_DEFAULT(DM_LIST_DEVICES),
29 + IOCTL32_DEFAULT(DM_TABLE_CLEAR),
30 +#endif /* CONFIG_BLK_DEV_DM */
32 #ifdef CONFIG_SIBYTE_TBPROF
33 IOCTL32_DEFAULT(SBPROF_ZBSTART),
34 IOCTL32_DEFAULT(SBPROF_ZBSTOP),
35 diff -urN linux-2.4.24.org/arch/parisc/kernel/ioctl32.c linux-2.4.24/arch/parisc/kernel/ioctl32.c
36 --- linux-2.4.24.org/arch/parisc/kernel/ioctl32.c 2004-01-18 14:59:20.929484849 +0100
37 +++ linux-2.4.24/arch/parisc/kernel/ioctl32.c 2004-01-18 15:01:17.742879834 +0100
40 #include <linux/lvm.h>
42 +#include <linux/dm-ioctl.h>
44 #include <scsi/scsi.h>
46 @@ -3423,6 +3424,22 @@
47 COMPATIBLE_IOCTL(LV_BMAP)
48 COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
51 +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
52 +COMPATIBLE_IOCTL(DM_VERSION)
53 +COMPATIBLE_IOCTL(DM_REMOVE_ALL)
54 +COMPATIBLE_IOCTL(DM_DEV_CREATE)
55 +COMPATIBLE_IOCTL(DM_DEV_REMOVE)
56 +COMPATIBLE_IOCTL(DM_TABLE_LOAD)
57 +COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
58 +COMPATIBLE_IOCTL(DM_DEV_RENAME)
59 +COMPATIBLE_IOCTL(DM_TABLE_DEPS)
60 +COMPATIBLE_IOCTL(DM_DEV_STATUS)
61 +COMPATIBLE_IOCTL(DM_TABLE_STATUS)
62 +COMPATIBLE_IOCTL(DM_DEV_WAIT)
63 +COMPATIBLE_IOCTL(DM_LIST_DEVICES)
64 +COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
65 +#endif /* CONFIG_BLK_DEV_DM */
66 #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
67 COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
68 COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
69 diff -urN linux-2.4.24.org/arch/ppc64/kernel/ioctl32.c linux-2.4.24/arch/ppc64/kernel/ioctl32.c
70 --- linux-2.4.24.org/arch/ppc64/kernel/ioctl32.c 2004-01-18 14:58:17.568907286 +0100
71 +++ linux-2.4.24/arch/ppc64/kernel/ioctl32.c 2004-01-18 15:01:17.754877316 +0100
73 #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
74 #include <linux/lvm.h>
76 +#include <linux/dm-ioctl.h>
78 #include <scsi/scsi.h>
80 @@ -4408,6 +4409,22 @@
81 COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
82 COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
83 COMPATIBLE_IOCTL(NBD_DISCONNECT),
85 +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
86 +COMPATIBLE_IOCTL(DM_VERSION),
87 +COMPATIBLE_IOCTL(DM_REMOVE_ALL),
88 +COMPATIBLE_IOCTL(DM_DEV_CREATE),
89 +COMPATIBLE_IOCTL(DM_DEV_REMOVE),
90 +COMPATIBLE_IOCTL(DM_TABLE_LOAD),
91 +COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
92 +COMPATIBLE_IOCTL(DM_DEV_RENAME),
93 +COMPATIBLE_IOCTL(DM_TABLE_DEPS),
94 +COMPATIBLE_IOCTL(DM_DEV_STATUS),
95 +COMPATIBLE_IOCTL(DM_TABLE_STATUS),
96 +COMPATIBLE_IOCTL(DM_DEV_WAIT),
97 +COMPATIBLE_IOCTL(DM_LIST_DEVICES),
98 +COMPATIBLE_IOCTL(DM_TABLE_CLEAR),
99 +#endif /* CONFIG_BLK_DEV_DM */
100 /* Remove *PRIVATE in 2.5 */
101 COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
102 COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
103 diff -urN linux-2.4.24.org/arch/s390x/kernel/ioctl32.c linux-2.4.24/arch/s390x/kernel/ioctl32.c
104 --- linux-2.4.24.org/arch/s390x/kernel/ioctl32.c 2004-01-18 14:59:24.825661296 +0100
105 +++ linux-2.4.24/arch/s390x/kernel/ioctl32.c 2004-01-18 15:01:17.759876266 +0100
107 #include <linux/blk.h>
108 #include <linux/elevator.h>
109 #include <linux/raw.h>
110 +#include <linux/dm-ioctl.h>
111 #include <asm/types.h>
112 #include <asm/uaccess.h>
113 #include <asm/dasd.h>
116 IOCTL32_DEFAULT(SIOCGSTAMP),
118 + IOCTL32_DEFAULT(DM_VERSION),
119 + IOCTL32_DEFAULT(DM_REMOVE_ALL),
120 + IOCTL32_DEFAULT(DM_DEV_CREATE),
121 + IOCTL32_DEFAULT(DM_DEV_REMOVE),
122 + IOCTL32_DEFAULT(DM_TABLE_LOAD),
123 + IOCTL32_DEFAULT(DM_DEV_SUSPEND),
124 + IOCTL32_DEFAULT(DM_DEV_RENAME),
125 + IOCTL32_DEFAULT(DM_TABLE_DEPS),
126 + IOCTL32_DEFAULT(DM_DEV_STATUS),
127 + IOCTL32_DEFAULT(DM_TABLE_STATUS),
128 + IOCTL32_DEFAULT(DM_DEV_WAIT),
129 + IOCTL32_DEFAULT(DM_LIST_DEVICES),
130 + IOCTL32_DEFAULT(DM_TABLE_CLEAR),
132 IOCTL32_DEFAULT(LOOP_SET_FD),
133 IOCTL32_DEFAULT(LOOP_CLR_FD),
135 diff -urN linux-2.4.24.org/arch/sparc64/kernel/ioctl32.c linux-2.4.24/arch/sparc64/kernel/ioctl32.c
136 --- linux-2.4.24.org/arch/sparc64/kernel/ioctl32.c 2004-01-18 14:58:59.210079599 +0100
137 +++ linux-2.4.24/arch/sparc64/kernel/ioctl32.c 2004-01-18 15:01:17.768874378 +0100
139 #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
140 #include <linux/lvm.h>
142 +#include <linux/dm-ioctl.h>
144 #include <scsi/scsi.h>
146 @@ -5086,6 +5087,22 @@
147 COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
148 COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
149 COMPATIBLE_IOCTL(NBD_DISCONNECT)
151 +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
152 +COMPATIBLE_IOCTL(DM_VERSION)
153 +COMPATIBLE_IOCTL(DM_REMOVE_ALL)
154 +COMPATIBLE_IOCTL(DM_DEV_CREATE)
155 +COMPATIBLE_IOCTL(DM_DEV_REMOVE)
156 +COMPATIBLE_IOCTL(DM_TABLE_LOAD)
157 +COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
158 +COMPATIBLE_IOCTL(DM_DEV_RENAME)
159 +COMPATIBLE_IOCTL(DM_TABLE_DEPS)
160 +COMPATIBLE_IOCTL(DM_DEV_STATUS)
161 +COMPATIBLE_IOCTL(DM_TABLE_STATUS)
162 +COMPATIBLE_IOCTL(DM_DEV_WAIT)
163 +COMPATIBLE_IOCTL(DM_LIST_DEVICES)
164 +COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
165 +#endif /* CONFIG_BLK_DEV_DM */
167 #if defined(CONFIG_IEEE1394) || defined(CONFIG_IEEE1394_MODULE)
168 COMPATIBLE_IOCTL(AMDTP_IOC_CHANNEL)
169 diff -urN linux-2.4.24.org/arch/x86_64/ia32/ia32_ioctl.c linux-2.4.24/arch/x86_64/ia32/ia32_ioctl.c
170 --- linux-2.4.24.org/arch/x86_64/ia32/ia32_ioctl.c 2004-01-18 14:58:15.119427333 +0100
171 +++ linux-2.4.24/arch/x86_64/ia32/ia32_ioctl.c 2004-01-18 15:01:17.778872279 +0100
174 #include <linux/lvm.h>
176 +#include <linux/dm-ioctl.h>
178 #include <scsi/scsi.h>
180 @@ -4051,6 +4052,22 @@
181 COMPATIBLE_IOCTL(LV_BMAP)
182 COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
185 +#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
186 +COMPATIBLE_IOCTL(DM_VERSION)
187 +COMPATIBLE_IOCTL(DM_REMOVE_ALL)
188 +COMPATIBLE_IOCTL(DM_DEV_CREATE)
189 +COMPATIBLE_IOCTL(DM_DEV_REMOVE)
190 +COMPATIBLE_IOCTL(DM_TABLE_LOAD)
191 +COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
192 +COMPATIBLE_IOCTL(DM_DEV_RENAME)
193 +COMPATIBLE_IOCTL(DM_TABLE_DEPS)
194 +COMPATIBLE_IOCTL(DM_DEV_STATUS)
195 +COMPATIBLE_IOCTL(DM_TABLE_STATUS)
196 +COMPATIBLE_IOCTL(DM_DEV_WAIT)
197 +COMPATIBLE_IOCTL(DM_LIST_DEVICES)
198 +COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
199 +#endif /* CONFIG_BLK_DEV_DM */
200 #ifdef CONFIG_AUTOFS_FS
201 COMPATIBLE_IOCTL(AUTOFS_IOC_READY)
202 COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
203 diff -urN linux-2.4.24.org/Documentation/Configure.help linux-2.4.24/Documentation/Configure.help
204 --- linux-2.4.24.org/Documentation/Configure.help 2004-01-18 14:59:47.177940541 +0100
205 +++ linux-2.4.24/Documentation/Configure.help 2004-01-18 15:01:13.758716197 +0100
206 @@ -1952,6 +1952,20 @@
207 want), say M here and read <file:Documentation/modules.txt>. The
208 module will be called lvm-mod.o.
210 +Device-mapper support
212 + Device-mapper is a low level volume manager. It works by allowing
213 + people to specify mappings for ranges of logical sectors. Various
214 + mapping types are available, in addition people may write their own
215 + modules containing custom mappings if they wish.
217 + Higher level volume managers such as LVM2 use this driver.
219 + If you want to compile this as a module, say M here and read
220 + <file:Documentation/modules.txt>. The module will be called dm-mod.o.
224 Multiple devices driver support (RAID and LVM)
226 Support multiple physical spindles through a single logical device.
227 diff -urN linux-2.4.24.org/drivers/md/Config.in linux-2.4.24/drivers/md/Config.in
228 --- linux-2.4.24.org/drivers/md/Config.in 2004-01-18 14:58:09.306661789 +0100
229 +++ linux-2.4.24/drivers/md/Config.in 2004-01-18 15:01:13.770713678 +0100
231 dep_tristate ' Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
233 dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
234 +dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
237 diff -urN linux-2.4.24.org/drivers/md/dm.c linux-2.4.24/drivers/md/dm.c
238 --- linux-2.4.24.org/drivers/md/dm.c 1970-01-01 01:00:00.000000000 +0100
239 +++ linux-2.4.24/drivers/md/dm.c 2004-01-18 15:01:29.214472770 +0100
242 + * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
244 + * This file is released under the GPL.
250 +#include <linux/init.h>
251 +#include <linux/module.h>
252 +#include <linux/blk.h>
253 +#include <linux/blkpg.h>
254 +#include <linux/mempool.h>
255 +#include <linux/slab.h>
256 +#include <linux/major.h>
257 +#include <linux/kdev_t.h>
258 +#include <linux/lvm.h>
260 +#include <asm/uaccess.h>
262 +static const char *_name = DM_NAME;
263 +#define DEFAULT_READ_AHEAD 64
266 + struct mapped_device *md;
268 + struct dm_target *ti;
270 + union map_info map_context;
271 + void (*end_io) (struct buffer_head * bh, int uptodate);
275 +struct deferred_io {
277 + struct buffer_head *bh;
278 + struct deferred_io *next;
282 + * Bits for the md->flags field.
284 +#define DMF_BLOCK_IO 0
285 +#define DMF_SUSPENDED 1
287 +struct mapped_device {
288 + struct rw_semaphore lock;
292 + unsigned long flags;
295 + * A list of ios that arrived while we were suspended.
298 + wait_queue_head_t wait;
299 + struct deferred_io *deferred;
302 + * The current mapping.
304 + struct dm_table *map;
307 + * io objects are allocated from here.
309 + mempool_t *io_pool;
315 + wait_queue_head_t eventq;
319 +static kmem_cache_t *_io_cache;
321 +static struct mapped_device *get_kdev(kdev_t dev);
322 +static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
323 +static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
325 +/*-----------------------------------------------------------------
326 + * In order to avoid the 256 minor number limit we are going to
327 + * register more major numbers as neccessary.
328 + *---------------------------------------------------------------*/
329 +#define MAX_MINORS (1 << MINORBITS)
331 +struct major_details {
332 + unsigned int major;
335 + struct list_head transient_list;
337 + unsigned int first_free_minor;
338 + int nr_free_minors;
340 + struct mapped_device *mds[MAX_MINORS];
341 + int blk_size[MAX_MINORS];
342 + int blksize_size[MAX_MINORS];
343 + int hardsect_size[MAX_MINORS];
346 +static struct rw_semaphore _dev_lock;
347 +static struct major_details *_majors[MAX_BLKDEV];
350 + * This holds a list of majors that non-specified device numbers
351 + * may be allocated from. Only majors with free minors appear on
354 +static LIST_HEAD(_transients_free);
356 +static int __alloc_major(unsigned int major, struct major_details **result)
359 + unsigned int transient = !major;
360 + struct major_details *maj;
362 + /* Major already allocated? */
363 + if (major && _majors[major])
366 + maj = kmalloc(sizeof(*maj), GFP_KERNEL);
370 + memset(maj, 0, sizeof(*maj));
371 + INIT_LIST_HEAD(&maj->transient_list);
373 + maj->nr_free_minors = MAX_MINORS;
375 + r = register_blkdev(major, _name, &dm_blk_dops);
377 + DMERR("register_blkdev failed for %d", major);
384 + maj->major = major;
387 + maj->transient = transient;
388 + list_add_tail(&maj->transient_list, &_transients_free);
391 + _majors[major] = maj;
393 + blk_size[major] = maj->blk_size;
394 + blksize_size[major] = maj->blksize_size;
395 + hardsect_size[major] = maj->hardsect_size;
396 + read_ahead[major] = DEFAULT_READ_AHEAD;
398 + blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request);
404 +static void __free_major(struct major_details *maj)
406 + unsigned int major = maj->major;
408 + list_del(&maj->transient_list);
410 + read_ahead[major] = 0;
411 + blk_size[major] = NULL;
412 + blksize_size[major] = NULL;
413 + hardsect_size[major] = NULL;
415 + _majors[major] = NULL;
418 + if (unregister_blkdev(major, _name) < 0)
419 + DMERR("devfs_unregister_blkdev failed");
422 +static void free_all_majors(void)
424 + unsigned int major = ARRAY_SIZE(_majors);
426 + down_write(&_dev_lock);
429 + if (_majors[major])
430 + __free_major(_majors[major]);
432 + up_write(&_dev_lock);
435 +static void free_dev(kdev_t dev)
437 + unsigned int major = major(dev);
438 + unsigned int minor = minor(dev);
439 + struct major_details *maj;
441 + down_write(&_dev_lock);
443 + maj = _majors[major];
447 + maj->mds[minor] = NULL;
448 + maj->nr_free_minors++;
450 + if (maj->nr_free_minors == MAX_MINORS) {
455 + if (!maj->transient)
458 + if (maj->nr_free_minors == 1)
459 + list_add_tail(&maj->transient_list, &_transients_free);
461 + if (minor < maj->first_free_minor)
462 + maj->first_free_minor = minor;
465 + up_write(&_dev_lock);
468 +static void __alloc_minor(struct major_details *maj, unsigned int minor,
469 + struct mapped_device *md)
471 + maj->mds[minor] = md;
472 + md->dev = mk_kdev(maj->major, minor);
473 + maj->nr_free_minors--;
475 + if (maj->transient && !maj->nr_free_minors)
476 + list_del_init(&maj->transient_list);
480 + * See if requested kdev_t is available.
482 +static int specific_dev(kdev_t dev, struct mapped_device *md)
485 + unsigned int major = major(dev);
486 + unsigned int minor = minor(dev);
487 + struct major_details *maj;
489 + if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) {
490 + DMWARN("device number requested out of range (%d, %d)",
495 + down_write(&_dev_lock);
496 + maj = _majors[major];
498 + /* Register requested major? */
500 + r = __alloc_major(major, &maj);
504 + major = maj->major;
507 + if (maj->mds[minor]) {
512 + __alloc_minor(maj, minor, md);
515 + up_write(&_dev_lock);
521 + * Find first unused device number, requesting a new major number if required.
523 +static int first_free_dev(struct mapped_device *md)
526 + struct major_details *maj;
528 + down_write(&_dev_lock);
530 + if (list_empty(&_transients_free)) {
531 + r = __alloc_major(0, &maj);
535 + maj = list_entry(_transients_free.next, struct major_details,
538 + while (maj->mds[maj->first_free_minor++])
541 + __alloc_minor(maj, maj->first_free_minor - 1, md);
544 + up_write(&_dev_lock);
549 +static struct mapped_device *get_kdev(kdev_t dev)
551 + struct mapped_device *md;
552 + struct major_details *maj;
554 + down_read(&_dev_lock);
555 + maj = _majors[major(dev)];
560 + md = maj->mds[minor(dev)];
564 + up_read(&_dev_lock);
569 +/*-----------------------------------------------------------------
571 + *---------------------------------------------------------------*/
573 +static __init int local_init(void)
575 + init_rwsem(&_dev_lock);
577 + /* allocate a slab for the dm_ios */
578 + _io_cache = kmem_cache_create("dm io",
579 + sizeof(struct dm_io), 0, 0, NULL, NULL);
587 +static void local_exit(void)
589 + kmem_cache_destroy(_io_cache);
592 + DMINFO("cleaned up");
596 + * We have a lot of init/exit functions, so it seems easier to
597 + * store them in an array. The disposable macro 'xx'
598 + * expands a prefix into a pair of function names.
601 + int (*init) (void);
602 + void (*exit) (void);
605 +#define xx(n) {n ## _init, n ## _exit},
616 +static int __init dm_init(void)
618 + const int count = ARRAY_SIZE(_inits);
622 + for (i = 0; i < count; i++) {
623 + r = _inits[i].init();
637 +static void __exit dm_exit(void)
639 + int i = ARRAY_SIZE(_inits);
646 + * Block device functions
648 +static int dm_blk_open(struct inode *inode, struct file *file)
650 + struct mapped_device *md;
652 + md = get_kdev(inode->i_rdev);
659 +static int dm_blk_close(struct inode *inode, struct file *file)
661 + struct mapped_device *md;
663 + md = get_kdev(inode->i_rdev);
664 + dm_put(md); /* put the reference gained by dm_blk_open */
669 +static inline struct dm_io *alloc_io(struct mapped_device *md)
671 + return mempool_alloc(md->io_pool, GFP_NOIO);
674 +static inline void free_io(struct mapped_device *md, struct dm_io *io)
676 + mempool_free(io, md->io_pool);
679 +static inline struct deferred_io *alloc_deferred(void)
681 + return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
684 +static inline void free_deferred(struct deferred_io *di)
689 +static inline sector_t volume_size(kdev_t dev)
691 + return blk_size[major(dev)][minor(dev)] << 1;
694 +/* FIXME: check this */
695 +static int dm_blk_ioctl(struct inode *inode, struct file *file,
696 + unsigned int command, unsigned long a)
698 + kdev_t dev = inode->i_rdev;
708 + //case BLKRRPART: /* Re-read partition tables */
714 + return blk_ioctl(dev, command, a);
718 + size = volume_size(dev);
719 + if (copy_to_user((void *) a, &size, sizeof(long)))
724 + size = volume_size(dev);
725 + if (put_user((u64) ((u64) size) << 9, (u64 *) a))
733 + return dm_user_bmap(inode, (struct lv_bmap *) a);
736 + DMWARN("unknown block ioctl 0x%x", command);
744 + * Add the buffer to the list of deferred io.
746 +static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
748 + struct deferred_io *di;
750 + di = alloc_deferred();
754 + down_write(&md->lock);
756 + if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
757 + up_write(&md->lock);
764 + di->next = md->deferred;
767 + up_write(&md->lock);
768 + return 0; /* deferred successfully */
772 + * bh->b_end_io routine that decrements the pending count
773 + * and then calls the original bh->b_end_io fn.
775 +static void dec_pending(struct buffer_head *bh, int uptodate)
778 + struct dm_io *io = bh->b_private;
779 + dm_endio_fn endio = io->ti->type->end_io;
782 + r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
788 + /* the target wants another shot at the io */
792 + if (atomic_dec_and_test(&io->md->pending))
793 + /* nudge anyone waiting on suspend queue */
794 + wake_up(&io->md->wait);
796 + bh->b_end_io = io->end_io;
797 + bh->b_private = io->context;
798 + free_io(io->md, io);
800 + bh->b_end_io(bh, uptodate);
804 + * Do the bh mapping for a given leaf
806 +static inline int __map_buffer(struct mapped_device *md, int rw,
807 + struct buffer_head *bh, struct dm_io *io)
809 + struct dm_target *ti;
814 + ti = dm_table_find_target(md->map, bh->b_rsector);
818 + /* hook the end io request fn */
819 + atomic_inc(&md->pending);
823 + io->end_io = bh->b_end_io;
824 + io->context = bh->b_private;
825 + bh->b_end_io = dec_pending;
826 + bh->b_private = io;
828 + return ti->type->map(ti, bh, rw, &io->map_context);
832 + * Checks to see if we should be deferring io, if so it queues it
835 +static inline int __deferring(struct mapped_device *md, int rw,
836 + struct buffer_head *bh)
841 + * If we're suspended we have to queue this io for later.
843 + while (test_bit(DMF_BLOCK_IO, &md->flags)) {
844 + up_read(&md->lock);
847 + * There's no point deferring a read ahead
848 + * request, just drop it.
851 + down_read(&md->lock);
855 + r = queue_io(md, bh, rw);
856 + down_read(&md->lock);
862 + return 1; /* deferred successfully */
869 +static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
873 + struct mapped_device *md;
875 + md = get_kdev(bh->b_rdev);
877 + buffer_IO_error(bh);
882 + down_read(&md->lock);
884 + r = __deferring(md, rw, bh);
889 + /* not deferring */
890 + r = __map_buffer(md, rw, bh, io);
896 + up_read(&md->lock);
901 + buffer_IO_error(bh);
902 + up_read(&md->lock);
907 +static int check_dev_size(kdev_t dev, unsigned long block)
909 + unsigned int major = major(dev);
910 + unsigned int minor = minor(dev);
912 + /* FIXME: check this */
913 + unsigned long max_sector = (blk_size[major][minor] << 1) + 1;
914 + unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9);
916 + return (sector > max_sector) ? 0 : 1;
920 + * Creates a dummy buffer head and maps it (for lilo).
922 +static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
923 + kdev_t *r_dev, unsigned long *r_block)
925 + struct buffer_head bh;
926 + struct dm_target *ti;
927 + union map_info map_context;
930 + if (test_bit(DMF_BLOCK_IO, &md->flags)) {
934 + if (!check_dev_size(dev, block)) {
941 + /* setup dummy bh */
942 + memset(&bh, 0, sizeof(bh));
943 + bh.b_blocknr = block;
944 + bh.b_dev = bh.b_rdev = dev;
945 + bh.b_size = blksize_size[major(dev)][minor(dev)];
946 + bh.b_rsector = block * (bh.b_size >> 9);
949 + ti = dm_table_find_target(md->map, bh.b_rsector);
951 + /* do the mapping */
952 + r = ti->type->map(ti, &bh, READ, &map_context);
953 + ti->type->end_io(ti, &bh, READ, 0, &map_context);
956 + *r_dev = bh.b_rdev;
957 + *r_block = bh.b_rsector / (bh.b_size >> 9);
964 + * Marshals arguments and results between user and kernel space.
966 +static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
968 + struct mapped_device *md;
969 + unsigned long block, r_block;
973 + if (get_user(block, &lvb->lv_block))
976 + md = get_kdev(inode->i_rdev);
980 + down_read(&md->lock);
981 + r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
982 + up_read(&md->lock);
985 + if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
986 + put_user(r_block, &lvb->lv_block)))
992 +static void free_md(struct mapped_device *md)
995 + mempool_destroy(md->io_pool);
1000 + * Allocate and initialise a blank device with a given minor.
1002 +static struct mapped_device *alloc_md(kdev_t dev)
1005 + struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
1008 + DMWARN("unable to allocate device, out of memory.");
1012 + memset(md, 0, sizeof(*md));
1014 + /* Allocate suitable device number */
1016 + r = first_free_dev(md);
1018 + r = specific_dev(dev, md);
1025 + md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
1026 + mempool_free_slab, _io_cache);
1027 + if (!md->io_pool) {
1033 + init_rwsem(&md->lock);
1034 + atomic_set(&md->holders, 1);
1035 + atomic_set(&md->pending, 0);
1036 + init_waitqueue_head(&md->wait);
1037 + init_waitqueue_head(&md->eventq);
1043 + * The hardsect size for a mapped device is the largest hardsect size
1044 + * from the devices it maps onto.
1046 +static int __find_hardsect_size(struct list_head *devices)
1048 + int result = 512, size;
1049 + struct list_head *tmp;
1051 + list_for_each (tmp, devices) {
1052 + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
1053 + size = get_hardsect_size(dd->dev);
1054 + if (size > result)
1062 + * Bind a table to the device.
1064 +static void event_callback(void *context)
1066 + struct mapped_device *md = (struct mapped_device *) context;
1068 + down_write(&md->lock);
1070 + wake_up_interruptible(&md->eventq);
1071 + up_write(&md->lock);
1074 +static int __bind(struct mapped_device *md, struct dm_table *t)
1076 + unsigned int minor = minor(md->dev);
1077 + unsigned int major = major(md->dev);
1081 + blk_size[major][minor] = dm_table_get_size(t) >> 1;
1082 + blksize_size[major][minor] = BLOCK_SIZE;
1083 + hardsect_size[major][minor] =
1084 + __find_hardsect_size(dm_table_get_devices(t));
1085 + register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]);
1087 + dm_table_event_callback(md->map, event_callback, md);
1092 +static void __unbind(struct mapped_device *md)
1094 + unsigned int minor = minor(md->dev);
1095 + unsigned int major = major(md->dev);
1098 + dm_table_event_callback(md->map, NULL, NULL);
1099 + dm_table_put(md->map);
1104 + blk_size[major][minor] = 0;
1105 + blksize_size[major][minor] = 0;
1106 + hardsect_size[major][minor] = 0;
1110 + * Constructor for a new device.
1112 +int dm_create(kdev_t dev, struct mapped_device **result)
1114 + struct mapped_device *md;
1116 + md = alloc_md(dev);
1120 + __unbind(md); /* Ensure zero device size */
1126 +void dm_get(struct mapped_device *md)
1128 + atomic_inc(&md->holders);
1131 +void dm_put(struct mapped_device *md)
1133 + if (atomic_dec_and_test(&md->holders)) {
1135 + dm_table_suspend_targets(md->map);
1142 + * Requeue the deferred io by calling generic_make_request.
1144 +static void flush_deferred_io(struct deferred_io *c)
1146 + struct deferred_io *n;
1150 + generic_make_request(c->rw, c->bh);
1157 + * Swap in a new table (destroying old one).
1159 +int dm_swap_table(struct mapped_device *md, struct dm_table *table)
1163 + down_write(&md->lock);
1166 + * The device must be suspended, or have no table bound yet.
1168 + if (md->map && !test_bit(DMF_SUSPENDED, &md->flags)) {
1169 + up_write(&md->lock);
1174 + r = __bind(md, table);
1178 + up_write(&md->lock);
1183 + * We need to be able to change a mapping table under a mounted
1184 + * filesystem. For example we might want to move some data in
1185 + * the background. Before the table can be swapped with
1186 + * dm_bind_table, dm_suspend must be called to flush any in
1187 + * flight io and ensure that any further io gets deferred.
1189 +int dm_suspend(struct mapped_device *md)
1192 + DECLARE_WAITQUEUE(wait, current);
1194 + down_write(&md->lock);
1197 + * First we set the BLOCK_IO flag so no more ios will be
1200 + if (test_bit(DMF_BLOCK_IO, &md->flags)) {
1201 + up_write(&md->lock);
1205 + set_bit(DMF_BLOCK_IO, &md->flags);
1206 + add_wait_queue(&md->wait, &wait);
1207 + up_write(&md->lock);
1210 + * Then we wait for the already mapped ios to
1213 + run_task_queue(&tq_disk);
1215 + set_current_state(TASK_INTERRUPTIBLE);
1217 + if (!atomic_read(&md->pending) || signal_pending(current))
1222 + set_current_state(TASK_RUNNING);
1224 + down_write(&md->lock);
1225 + remove_wait_queue(&md->wait, &wait);
1227 + /* did we flush everything ? */
1228 + if (atomic_read(&md->pending)) {
1229 + clear_bit(DMF_BLOCK_IO, &md->flags);
1232 + set_bit(DMF_SUSPENDED, &md->flags);
1234 + dm_table_suspend_targets(md->map);
1236 + up_write(&md->lock);
1241 +int dm_resume(struct mapped_device *md)
1243 + struct deferred_io *def;
1245 + down_write(&md->lock);
1246 + if (!test_bit(DMF_SUSPENDED, &md->flags)) {
1247 + up_write(&md->lock);
1252 + dm_table_resume_targets(md->map);
1254 + clear_bit(DMF_SUSPENDED, &md->flags);
1255 + clear_bit(DMF_BLOCK_IO, &md->flags);
1256 + def = md->deferred;
1257 + md->deferred = NULL;
1258 + up_write(&md->lock);
1260 + flush_deferred_io(def);
1261 + run_task_queue(&tq_disk);
1266 +struct dm_table *dm_get_table(struct mapped_device *md)
1268 + struct dm_table *t;
1270 + down_read(&md->lock);
1274 + up_read(&md->lock);
1279 +/*-----------------------------------------------------------------
1280 + * Event notification.
1281 + *---------------------------------------------------------------*/
1282 +uint32_t dm_get_event_nr(struct mapped_device *md)
1286 + down_read(&md->lock);
1288 + up_read(&md->lock);
1293 +int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
1294 + uint32_t event_nr)
1296 + down_write(&md->lock);
1297 + if (event_nr != md->event_nr) {
1298 + up_write(&md->lock);
1302 + add_wait_queue(&md->eventq, wq);
1303 + up_write(&md->lock);
1308 +const char *dm_kdevname(kdev_t dev)
1310 + static char buffer[32];
1311 + sprintf(buffer, "%03d:%03d", MAJOR(dev), MINOR(dev));
1315 +void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq)
1317 + down_write(&md->lock);
1318 + remove_wait_queue(&md->eventq, wq);
1319 + up_write(&md->lock);
1322 +kdev_t dm_kdev(struct mapped_device *md)
1326 + down_read(&md->lock);
1328 + up_read(&md->lock);
1333 +int dm_suspended(struct mapped_device *md)
1335 + return test_bit(DMF_SUSPENDED, &md->flags);
1338 +struct block_device_operations dm_blk_dops = {
1339 + .open = dm_blk_open,
1340 + .release = dm_blk_close,
1341 + .ioctl = dm_blk_ioctl,
1342 + .owner = THIS_MODULE
1348 +module_init(dm_init);
1349 +module_exit(dm_exit);
1351 +MODULE_DESCRIPTION(DM_NAME " driver");
1352 +MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
1353 +MODULE_LICENSE("GPL");
1355 +EXPORT_SYMBOL(dm_kdevname);
1356 diff -urN linux-2.4.24.org/drivers/md/dm-daemon.c linux-2.4.24/drivers/md/dm-daemon.c
1357 --- linux-2.4.24.org/drivers/md/dm-daemon.c 1970-01-01 01:00:00.000000000 +0100
1358 +++ linux-2.4.24/drivers/md/dm-daemon.c 2004-01-18 15:01:21.977991002 +0100
1361 + * Copyright (C) 2003 Sistina Software
1363 + * This file is released under the LGPL.
1367 +#include "dm-daemon.h"
1369 +#include <linux/module.h>
1370 +#include <linux/sched.h>
1372 +static int daemon(void *arg)
1374 + struct dm_daemon *dd = (struct dm_daemon *) arg;
1375 + DECLARE_WAITQUEUE(wq, current);
1378 + reparent_to_init();
1380 + /* block all signals */
1381 + spin_lock_irq(¤t->sigmask_lock);
1382 + sigfillset(¤t->blocked);
1383 + flush_signals(current);
1384 + spin_unlock_irq(¤t->sigmask_lock);
1386 + strcpy(current->comm, dd->name);
1387 + atomic_set(&dd->please_die, 0);
1389 + add_wait_queue(&dd->job_queue, &wq);
1391 + down(&dd->run_lock);
1392 + up(&dd->start_lock);
1395 + * dd->fn() could do anything, very likely it will
1396 + * suspend. So we can't set the state to
1397 + * TASK_INTERRUPTIBLE before calling it. In order to
1398 + * prevent a race with a waking thread we do this little
1399 + * dance with the dd->woken variable.
1403 + set_current_state(TASK_RUNNING);
1405 + if (atomic_read(&dd->please_die))
1408 + atomic_set(&dd->woken, 0);
1412 + set_current_state(TASK_INTERRUPTIBLE);
1413 + } while (atomic_read(&dd->woken));
1419 + remove_wait_queue(&dd->job_queue, &wq);
1420 + up(&dd->run_lock);
1424 +int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void))
1429 + * Initialise the dm_daemon.
1432 + strncpy(dd->name, name, sizeof(dd->name) - 1);
1433 + sema_init(&dd->start_lock, 1);
1434 + sema_init(&dd->run_lock, 1);
1435 + init_waitqueue_head(&dd->job_queue);
1438 + * Start the new thread.
1440 + down(&dd->start_lock);
1441 + pid = kernel_thread(daemon, dd, 0);
1443 + DMERR("Failed to start %s thread", name);
1448 + * wait for the daemon to up this mutex.
1450 + down(&dd->start_lock);
1451 + up(&dd->start_lock);
1456 +void dm_daemon_stop(struct dm_daemon *dd)
1458 + atomic_set(&dd->please_die, 1);
1459 + dm_daemon_wake(dd);
1460 + down(&dd->run_lock);
1461 + up(&dd->run_lock);
1464 +void dm_daemon_wake(struct dm_daemon *dd)
1466 + atomic_set(&dd->woken, 1);
1467 + wake_up_interruptible(&dd->job_queue);
1470 +EXPORT_SYMBOL(dm_daemon_start);
1471 +EXPORT_SYMBOL(dm_daemon_stop);
1472 +EXPORT_SYMBOL(dm_daemon_wake);
1473 diff -urN linux-2.4.24.org/drivers/md/dm-daemon.h linux-2.4.24/drivers/md/dm-daemon.h
1474 --- linux-2.4.24.org/drivers/md/dm-daemon.h 1970-01-01 01:00:00.000000000 +0100
1475 +++ linux-2.4.24/drivers/md/dm-daemon.h 2004-01-18 15:01:21.980990372 +0100
1478 + * Copyright (C) 2003 Sistina Software
1480 + * This file is released under the LGPL.
1483 +#ifndef DM_DAEMON_H
1484 +#define DM_DAEMON_H
1486 +#include <asm/atomic.h>
1487 +#include <asm/semaphore.h>
1492 + atomic_t please_die;
1493 + struct semaphore start_lock;
1494 + struct semaphore run_lock;
1497 + wait_queue_head_t job_queue;
1500 +int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void));
1501 +void dm_daemon_stop(struct dm_daemon *dd);
1502 +void dm_daemon_wake(struct dm_daemon *dd);
1503 +int dm_daemon_running(struct dm_daemon *dd);
1506 diff -urN linux-2.4.24.org/drivers/md/dm-exception-store.c linux-2.4.24/drivers/md/dm-exception-store.c
1507 --- linux-2.4.24.org/drivers/md/dm-exception-store.c 1970-01-01 01:00:00.000000000 +0100
1508 +++ linux-2.4.24/drivers/md/dm-exception-store.c 2004-01-18 15:01:29.225470463 +0100
1513 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
1515 + * This file is released under the GPL.
1518 +#include "dm-snapshot.h"
1520 +#include "kcopyd.h"
1522 +#include <linux/mm.h>
1523 +#include <linux/pagemap.h>
1524 +#include <linux/vmalloc.h>
1525 +#include <linux/slab.h>
1527 +/*-----------------------------------------------------------------
1528 + * Persistent snapshots, by persistent we mean that the snapshot
1529 + * will survive a reboot.
1530 + *---------------------------------------------------------------*/
1533 + * We need to store a record of which parts of the origin have
1534 + * been copied to the snapshot device. The snapshot code
1535 + * requires that we copy exception chunks to chunk aligned areas
1536 + * of the COW store. It makes sense therefore, to store the
1537 + * metadata in chunk size blocks.
1539 + * There is no backward or forward compatibility implemented,
1540 + * snapshots with different disk versions than the kernel will
1541 + * not be usable. It is expected that "lvcreate" will blank out
1542 + * the start of a fresh COW device before calling the snapshot
1545 + * The first chunk of the COW device just contains the header.
1546 + * After this there is a chunk filled with exception metadata,
1547 + * followed by as many exception chunks as can fit in the
1550 + * All on disk structures are in little-endian format. The end
1551 + * of the exceptions info is indicated by an exception with a
1552 + * new_chunk of 0, which is invalid since it would point to the
1557 + * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
1559 +#define SNAP_MAGIC 0x70416e53
1562 + * The on-disk version of the metadata.
1564 +#define SNAPSHOT_DISK_VERSION 1
1566 +struct disk_header {
1570 + * Is this snapshot valid. There is no way of recovering
1571 + * an invalid snapshot.
1576 + * Simple, incrementing version. no backward
1582 + uint32_t chunk_size;
1585 +struct disk_exception {
1586 + uint64_t old_chunk;
1587 + uint64_t new_chunk;
1590 +struct commit_callback {
1591 + void (*callback)(void *, int success);
1596 + * The top level structure for a persistent exception store.
1599 + struct dm_snapshot *snap; /* up pointer to my snapshot */
1602 + uint32_t chunk_size;
1603 + uint32_t exceptions_per_area;
1606 + * Now that we have an asynchronous kcopyd there is no
1607 + * need for large chunk sizes, so it wont hurt to have a
1608 + * whole chunks worth of metadata in memory at once.
1613 + * Used to keep track of which metadata area the data in
1614 + * 'chunk' refers to.
1616 + uint32_t current_area;
1619 + * The next free chunk for an exception.
1621 + uint32_t next_free;
1624 + * The index of next free exception in the current
1627 + uint32_t current_committed;
1629 + atomic_t pending_count;
1630 + uint32_t callback_count;
1631 + struct commit_callback *callbacks;
1634 +static inline unsigned int sectors_to_pages(unsigned int sectors)
1636 + return sectors / (PAGE_SIZE / SECTOR_SIZE);
1639 +static int alloc_area(struct pstore *ps)
1642 + size_t i, len, nr_pages;
1643 + struct page *page, *last = NULL;
1645 + len = ps->chunk_size << SECTOR_SHIFT;
1648 + * Allocate the chunk_size block of memory that will hold
1649 + * a single metadata area.
1651 + ps->area = vmalloc(len);
1655 + nr_pages = sectors_to_pages(ps->chunk_size);
1658 + * We lock the pages for ps->area into memory since
1659 + * they'll be doing a lot of io. We also chain them
1660 + * together ready for dm-io.
1662 + for (i = 0; i < nr_pages; i++) {
1663 + page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
1666 + last->list.next = &page->list;
1673 +static void free_area(struct pstore *ps)
1675 + size_t i, nr_pages;
1676 + struct page *page;
1678 + nr_pages = sectors_to_pages(ps->chunk_size);
1679 + for (i = 0; i < nr_pages; i++) {
1680 + page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
1681 + page->list.next = NULL;
1689 + * Read or write a chunk aligned and sized block of data from a device.
1691 +static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
1693 + struct io_region where;
1694 + unsigned int bits;
1696 + where.dev = ps->snap->cow->dev;
1697 + where.sector = ps->chunk_size * chunk;
1698 + where.count = ps->chunk_size;
1700 + return dm_io_sync(1, &where, rw, vmalloc_to_page(ps->area), 0, &bits);
1704 + * Read or write a metadata area. Remembering to skip the first
1705 + * chunk which holds the header.
1707 +static int area_io(struct pstore *ps, uint32_t area, int rw)
1712 + /* convert a metadata area index to a chunk index */
1713 + chunk = 1 + ((ps->exceptions_per_area + 1) * area);
1715 + r = chunk_io(ps, chunk, rw);
1719 + ps->current_area = area;
1723 +static int zero_area(struct pstore *ps, uint32_t area)
1725 + memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
1726 + return area_io(ps, area, WRITE);
1729 +static int read_header(struct pstore *ps, int *new_snapshot)
1732 + struct disk_header *dh;
1734 + r = chunk_io(ps, 0, READ);
1738 + dh = (struct disk_header *) ps->area;
1740 + if (le32_to_cpu(dh->magic) == 0) {
1741 + *new_snapshot = 1;
1743 + } else if (le32_to_cpu(dh->magic) == SNAP_MAGIC) {
1744 + *new_snapshot = 0;
1745 + ps->valid = le32_to_cpu(dh->valid);
1746 + ps->version = le32_to_cpu(dh->version);
1747 + ps->chunk_size = le32_to_cpu(dh->chunk_size);
1750 + DMWARN("Invalid/corrupt snapshot");
1757 +static int write_header(struct pstore *ps)
1759 + struct disk_header *dh;
1761 + memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
1763 + dh = (struct disk_header *) ps->area;
1764 + dh->magic = cpu_to_le32(SNAP_MAGIC);
1765 + dh->valid = cpu_to_le32(ps->valid);
1766 + dh->version = cpu_to_le32(ps->version);
1767 + dh->chunk_size = cpu_to_le32(ps->chunk_size);
1769 + return chunk_io(ps, 0, WRITE);
1773 + * Access functions for the disk exceptions, these do the endian conversions.
1775 +static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
1777 + if (index >= ps->exceptions_per_area)
1780 + return ((struct disk_exception *) ps->area) + index;
1783 +static int read_exception(struct pstore *ps,
1784 + uint32_t index, struct disk_exception *result)
1786 + struct disk_exception *e;
1788 + e = get_exception(ps, index);
1793 + result->old_chunk = le64_to_cpu(e->old_chunk);
1794 + result->new_chunk = le64_to_cpu(e->new_chunk);
1799 +static int write_exception(struct pstore *ps,
1800 + uint32_t index, struct disk_exception *de)
1802 + struct disk_exception *e;
1804 + e = get_exception(ps, index);
1809 + e->old_chunk = cpu_to_le64(de->old_chunk);
1810 + e->new_chunk = cpu_to_le64(de->new_chunk);
1816 + * Registers the exceptions that are present in the current area.
1817 + * 'full' is filled in to indicate if the area has been
1820 +static int insert_exceptions(struct pstore *ps, int *full)
1824 + struct disk_exception de;
1826 + /* presume the area is full */
1829 + for (i = 0; i < ps->exceptions_per_area; i++) {
1830 + r = read_exception(ps, i, &de);
1836 + * If the new_chunk is pointing at the start of
1837 + * the COW device, where the first metadata area
1838 + * is we know that we've hit the end of the
1839 + * exceptions. Therefore the area is not full.
1841 + if (de.new_chunk == 0LL) {
1842 + ps->current_committed = i;
1848 + * Keep track of the start of the free chunks.
1850 + if (ps->next_free <= de.new_chunk)
1851 + ps->next_free = de.new_chunk + 1;
1854 + * Otherwise we add the exception to the snapshot.
1856 + r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
1864 +static int read_exceptions(struct pstore *ps)
1870 + * Keeping reading chunks and inserting exceptions until
1871 + * we find a partially full area.
1873 + for (area = 0; full; area++) {
1874 + r = area_io(ps, area, READ);
1878 + r = insert_exceptions(ps, &full);
1886 +static inline struct pstore *get_info(struct exception_store *store)
1888 + return (struct pstore *) store->context;
1891 +static void persistent_fraction_full(struct exception_store *store,
1892 + sector_t *numerator, sector_t *denominator)
1894 + *numerator = get_info(store)->next_free * store->snap->chunk_size;
1895 + *denominator = get_dev_size(store->snap->cow->dev);
1898 +static void persistent_destroy(struct exception_store *store)
1900 + struct pstore *ps = get_info(store);
1902 + dm_io_put(sectors_to_pages(ps->chunk_size));
1903 + vfree(ps->callbacks);
1908 +static int persistent_read_metadata(struct exception_store *store)
1910 + int r, new_snapshot;
1911 + struct pstore *ps = get_info(store);
1914 + * Read the snapshot header.
1916 + r = read_header(ps, &new_snapshot);
1921 + * Do we need to setup a new snapshot ?
1923 + if (new_snapshot) {
1924 + r = write_header(ps);
1926 + DMWARN("write_header failed");
1930 + r = zero_area(ps, 0);
1932 + DMWARN("zero_area(0) failed");
1941 + DMWARN("snapshot is marked invalid");
1945 + if (ps->version != SNAPSHOT_DISK_VERSION) {
1946 + DMWARN("unable to handle snapshot disk version %d",
1952 + * Read the metadata.
1954 + r = read_exceptions(ps);
1962 +static int persistent_prepare(struct exception_store *store,
1963 + struct exception *e)
1965 + struct pstore *ps = get_info(store);
1967 + sector_t size = get_dev_size(store->snap->cow->dev);
1969 + /* Is there enough room ? */
1970 + if (size < ((ps->next_free + 1) * store->snap->chunk_size))
1973 + e->new_chunk = ps->next_free;
1976 + * Move onto the next free pending, making sure to take
1977 + * into account the location of the metadata chunks.
1979 + stride = (ps->exceptions_per_area + 1);
1980 + if ((++ps->next_free % stride) == 1)
1983 + atomic_inc(&ps->pending_count);
1987 +static void persistent_commit(struct exception_store *store,
1988 + struct exception *e,
1989 + void (*callback) (void *, int success),
1990 + void *callback_context)
1994 + struct pstore *ps = get_info(store);
1995 + struct disk_exception de;
1996 + struct commit_callback *cb;
1998 + de.old_chunk = e->old_chunk;
1999 + de.new_chunk = e->new_chunk;
2000 + write_exception(ps, ps->current_committed++, &de);
2003 + * Add the callback to the back of the array. This code
2004 + * is the only place where the callback array is
2005 + * manipulated, and we know that it will never be called
2006 + * multiple times concurrently.
2008 + cb = ps->callbacks + ps->callback_count++;
2009 + cb->callback = callback;
2010 + cb->context = callback_context;
2013 + * If there are no more exceptions in flight, or we have
2014 + * filled this metadata area we commit the exceptions to
2017 + if (atomic_dec_and_test(&ps->pending_count) ||
2018 + (ps->current_committed == ps->exceptions_per_area)) {
2019 + r = area_io(ps, ps->current_area, WRITE);
2023 + for (i = 0; i < ps->callback_count; i++) {
2024 + cb = ps->callbacks + i;
2025 + cb->callback(cb->context, r == 0 ? 1 : 0);
2028 + ps->callback_count = 0;
2032 + * Have we completely filled the current area ?
2034 + if (ps->current_committed == ps->exceptions_per_area) {
2035 + ps->current_committed = 0;
2036 + r = zero_area(ps, ps->current_area + 1);
2042 +static void persistent_drop(struct exception_store *store)
2044 + struct pstore *ps = get_info(store);
2047 + if (write_header(ps))
2048 + DMWARN("write header failed");
2051 +int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
2054 + struct pstore *ps;
2056 + r = dm_io_get(sectors_to_pages(chunk_size));
2060 + /* allocate the pstore */
2061 + ps = kmalloc(sizeof(*ps), GFP_KERNEL);
2067 + ps->snap = store->snap;
2069 + ps->version = SNAPSHOT_DISK_VERSION;
2070 + ps->chunk_size = chunk_size;
2071 + ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
2072 + sizeof(struct disk_exception);
2073 + ps->next_free = 2; /* skipping the header and first area */
2074 + ps->current_committed = 0;
2076 + r = alloc_area(ps);
2081 + * Allocate space for all the callbacks.
2083 + ps->callback_count = 0;
2084 + atomic_set(&ps->pending_count, 0);
2085 + ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
2086 + sizeof(*ps->callbacks));
2088 + if (!ps->callbacks) {
2093 + store->destroy = persistent_destroy;
2094 + store->read_metadata = persistent_read_metadata;
2095 + store->prepare_exception = persistent_prepare;
2096 + store->commit_exception = persistent_commit;
2097 + store->drop_snapshot = persistent_drop;
2098 + store->fraction_full = persistent_fraction_full;
2099 + store->context = ps;
2104 + dm_io_put(sectors_to_pages(chunk_size));
2106 + if (ps->callbacks)
2107 + vfree(ps->callbacks);
2114 +/*-----------------------------------------------------------------
2115 + * Implementation of the store for non-persistent snapshots.
2116 + *---------------------------------------------------------------*/
2117 +struct transient_c {
2118 + sector_t next_free;
2121 +void transient_destroy(struct exception_store *store)
2123 + kfree(store->context);
2126 +int transient_read_metadata(struct exception_store *store)
2131 +int transient_prepare(struct exception_store *store, struct exception *e)
2133 + struct transient_c *tc = (struct transient_c *) store->context;
2134 + sector_t size = get_dev_size(store->snap->cow->dev);
2136 + if (size < (tc->next_free + store->snap->chunk_size))
2139 + e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
2140 + tc->next_free += store->snap->chunk_size;
2145 +void transient_commit(struct exception_store *store,
2146 + struct exception *e,
2147 + void (*callback) (void *, int success),
2148 + void *callback_context)
2150 + /* Just succeed */
2151 + callback(callback_context, 1);
2154 +static void transient_fraction_full(struct exception_store *store,
2155 + sector_t *numerator, sector_t *denominator)
2157 + *numerator = ((struct transient_c *) store->context)->next_free;
2158 + *denominator = get_dev_size(store->snap->cow->dev);
2161 +int dm_create_transient(struct exception_store *store,
2162 + struct dm_snapshot *s, int blocksize)
2164 + struct transient_c *tc;
2166 + memset(store, 0, sizeof(*store));
2167 + store->destroy = transient_destroy;
2168 + store->read_metadata = transient_read_metadata;
2169 + store->prepare_exception = transient_prepare;
2170 + store->commit_exception = transient_commit;
2171 + store->fraction_full = transient_fraction_full;
2174 + tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
2178 + tc->next_free = 0;
2179 + store->context = tc;
2183 diff -urN linux-2.4.24.org/drivers/md/dm.h linux-2.4.24/drivers/md/dm.h
2184 --- linux-2.4.24.org/drivers/md/dm.h 1970-01-01 01:00:00.000000000 +0100
2185 +++ linux-2.4.24/drivers/md/dm.h 2004-01-18 15:01:29.219471722 +0100
2188 + * Internal header file for device mapper
2190 + * Copyright (C) 2001, 2002 Sistina Software
2192 + * This file is released under the LGPL.
2195 +#ifndef DM_INTERNAL_H
2196 +#define DM_INTERNAL_H
2198 +#include <linux/fs.h>
2199 +#include <linux/device-mapper.h>
2200 +#include <linux/list.h>
2201 +#include <linux/blkdev.h>
2203 +#define DM_NAME "device-mapper"
2204 +#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
2205 +#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
2206 +#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
2209 + * FIXME: I think this should be with the definition of sector_t
2213 +#define SECTOR_FORMAT "%Lu"
2215 +#define SECTOR_FORMAT "%lu"
2218 +#define SECTOR_SHIFT 9
2219 +#define SECTOR_SIZE (1 << SECTOR_SHIFT)
2221 +extern struct block_device_operations dm_blk_dops;
2224 + * List of devices that a metadevice uses and should open/close.
2227 + struct list_head list;
2232 + struct block_device *bdev;
2236 +struct mapped_device;
2238 +/*-----------------------------------------------------------------
2239 + * Functions for manipulating a struct mapped_device.
2240 + * Drop the reference with dm_put when you finish with the object.
2241 + *---------------------------------------------------------------*/
2242 +int dm_create(kdev_t dev, struct mapped_device **md);
2245 + * Reference counting for md.
2247 +void dm_get(struct mapped_device *md);
2248 +void dm_put(struct mapped_device *md);
2251 + * A device can still be used while suspended, but I/O is deferred.
2253 +int dm_suspend(struct mapped_device *md);
2254 +int dm_resume(struct mapped_device *md);
2257 + * The device must be suspended before calling this method.
2259 +int dm_swap_table(struct mapped_device *md, struct dm_table *t);
2262 + * Drop a reference on the table when you've finished with the
2265 +struct dm_table *dm_get_table(struct mapped_device *md);
2268 + * Event functions.
2270 +uint32_t dm_get_event_nr(struct mapped_device *md);
2271 +int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
2272 + uint32_t event_nr);
2273 +void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq);
2278 +kdev_t dm_kdev(struct mapped_device *md);
2279 +int dm_suspended(struct mapped_device *md);
2281 +/*-----------------------------------------------------------------
2282 + * Functions for manipulating a table. Tables are also reference
2284 + *---------------------------------------------------------------*/
2285 +int dm_table_create(struct dm_table **result, int mode, unsigned num_targets);
2287 +void dm_table_get(struct dm_table *t);
2288 +void dm_table_put(struct dm_table *t);
2290 +int dm_table_add_target(struct dm_table *t, const char *type,
2291 + sector_t start, sector_t len, char *params);
2292 +int dm_table_complete(struct dm_table *t);
2293 +void dm_table_event_callback(struct dm_table *t,
2294 + void (*fn)(void *), void *context);
2295 +void dm_table_event(struct dm_table *t);
2296 +sector_t dm_table_get_size(struct dm_table *t);
2297 +struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
2298 +struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
2299 +unsigned int dm_table_get_num_targets(struct dm_table *t);
2300 +struct list_head *dm_table_get_devices(struct dm_table *t);
2301 +int dm_table_get_mode(struct dm_table *t);
2302 +void dm_table_suspend_targets(struct dm_table *t);
2303 +void dm_table_resume_targets(struct dm_table *t);
2305 +/*-----------------------------------------------------------------
2306 + * A registry of target types.
2307 + *---------------------------------------------------------------*/
2308 +int dm_target_init(void);
2309 +void dm_target_exit(void);
2310 +struct target_type *dm_get_target_type(const char *name);
2311 +void dm_put_target_type(struct target_type *t);
2314 +/*-----------------------------------------------------------------
2316 + *---------------------------------------------------------------*/
2317 +static inline int array_too_big(unsigned long fixed, unsigned long obj,
2318 + unsigned long num)
2320 + return (num > (ULONG_MAX - fixed) / obj);
2324 + * ceiling(n / size) * size
2326 +static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
2328 + unsigned long r = n % size;
2329 + return n + (r ? (size - r) : 0);
2333 + * Ceiling(n / size)
2335 +static inline unsigned long dm_div_up(unsigned long n, unsigned long size)
2337 + return dm_round_up(n, size) / size;
2340 +const char *dm_kdevname(kdev_t dev);
2341 +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
2344 + * The device-mapper can be driven through one of two interfaces;
2345 + * ioctl or filesystem, depending which patch you have applied.
2347 +int dm_interface_init(void);
2348 +void dm_interface_exit(void);
2351 + * Targets for linear and striped mappings
2353 +int dm_linear_init(void);
2354 +void dm_linear_exit(void);
2356 +int dm_stripe_init(void);
2357 +void dm_stripe_exit(void);
2359 +int dm_snapshot_init(void);
2360 +void dm_snapshot_exit(void);
2363 diff -urN linux-2.4.24.org/drivers/md/dm-io.c linux-2.4.24/drivers/md/dm-io.c
2364 --- linux-2.4.24.org/drivers/md/dm-io.c 1970-01-01 01:00:00.000000000 +0100
2365 +++ linux-2.4.24/drivers/md/dm-io.c 2004-01-18 15:01:25.790191115 +0100
2368 + * Copyright (C) 2003 Sistina Software
2370 + * This file is released under the GPL.
2375 +#include <linux/mempool.h>
2376 +#include <linux/module.h>
2377 +#include <linux/slab.h>
2378 +#include <linux/sched.h>
2379 +#include <linux/bitops.h>
2381 +/* FIXME: can we shrink this ? */
2382 +struct io_context {
2384 + unsigned int error;
2386 + struct task_struct *sleeper;
2387 + io_notify_fn callback;
2392 + * We maintain a pool of buffer heads for dispatching the io.
2394 +static unsigned int _num_bhs;
2395 +static mempool_t *_buffer_pool;
2398 + * io contexts are only dynamically allocated for asynchronous
2399 + * io. Since async io is likely to be the majority of io we'll
2400 + * have the same number of io contexts as buffer heads ! (FIXME:
2401 + * must reduce this).
2403 +mempool_t *_io_pool;
2405 +static void *alloc_bh(int gfp_mask, void *pool_data)
2407 + struct buffer_head *bh;
2409 + bh = kmem_cache_alloc(bh_cachep, gfp_mask);
2411 + bh->b_reqnext = NULL;
2412 + init_waitqueue_head(&bh->b_wait);
2413 + INIT_LIST_HEAD(&bh->b_inode_buffers);
2419 +static void *alloc_io(int gfp_mask, void *pool_data)
2421 + return kmalloc(sizeof(struct io_context), gfp_mask);
2424 +static void free_io(void *element, void *pool_data)
2429 +static unsigned int pages_to_buffers(unsigned int pages)
2431 + return 4 * pages; /* too many ? */
2434 +static int resize_pool(unsigned int new_bhs)
2438 + if (_buffer_pool) {
2439 + if (new_bhs == 0) {
2440 + /* free off the pools */
2441 + mempool_destroy(_buffer_pool);
2442 + mempool_destroy(_io_pool);
2443 + _buffer_pool = _io_pool = NULL;
2445 + /* resize the pools */
2446 + r = mempool_resize(_buffer_pool, new_bhs, GFP_KERNEL);
2448 + r = mempool_resize(_io_pool,
2449 + new_bhs, GFP_KERNEL);
2452 + /* create new pools */
2453 + _buffer_pool = mempool_create(new_bhs, alloc_bh,
2454 + mempool_free_slab, bh_cachep);
2455 + if (!_buffer_pool)
2458 + _io_pool = mempool_create(new_bhs, alloc_io, free_io, NULL);
2460 + mempool_destroy(_buffer_pool);
2461 + _buffer_pool = NULL;
2467 + _num_bhs = new_bhs;
2472 +int dm_io_get(unsigned int num_pages)
2474 + return resize_pool(_num_bhs + pages_to_buffers(num_pages));
2477 +void dm_io_put(unsigned int num_pages)
2479 + resize_pool(_num_bhs - pages_to_buffers(num_pages));
2482 +/*-----------------------------------------------------------------
2483 + * We need to keep track of which region a buffer is doing io
2484 + * for. In order to save a memory allocation we store this in an
2485 + * unused field of the buffer head, and provide these access
2488 + * FIXME: add compile time check that an unsigned int can fit
2491 + *---------------------------------------------------------------*/
2492 +static inline void bh_set_region(struct buffer_head *bh, unsigned int region)
2494 + bh->b_journal_head = (void *) region;
2497 +static inline int bh_get_region(struct buffer_head *bh)
2499 + return (unsigned int) bh->b_journal_head;
2502 +/*-----------------------------------------------------------------
2503 + * We need an io object to keep track of the number of bhs that
2504 + * have been dispatched for a particular io.
2505 + *---------------------------------------------------------------*/
2506 +static void dec_count(struct io_context *io, unsigned int region, int error)
2509 + set_bit(region, &io->error);
2511 + if (atomic_dec_and_test(&io->count)) {
2513 + wake_up_process(io->sleeper);
2516 + int r = io->error;
2517 + io_notify_fn fn = io->callback;
2518 + void *context = io->context;
2520 + mempool_free(io, _io_pool);
2526 +static void endio(struct buffer_head *bh, int uptodate)
2528 + struct io_context *io = (struct io_context *) bh->b_private;
2530 + if (!uptodate && io->rw != WRITE) {
2532 + * We need to zero this region, otherwise people
2533 + * like kcopyd may write the arbitrary contents
2536 + memset(bh->b_data, 0, bh->b_size);
2539 + dec_count((struct io_context *) bh->b_private,
2540 + bh_get_region(bh), !uptodate);
2541 + mempool_free(bh, _buffer_pool);
2545 + * Primitives for alignment calculations.
2547 +int fls(unsigned n)
2549 + return generic_fls32(n);
2552 +static inline int log2_floor(unsigned n)
2554 + return ffs(n) - 1;
2557 +static inline int log2_align(unsigned n)
2559 + return fls(n) - 1;
2563 + * Returns the next block for io.
2565 +static int do_page(kdev_t dev, sector_t *block, sector_t end_block,
2566 + unsigned int block_size,
2567 + struct page *p, unsigned int offset,
2568 + unsigned int region, struct io_context *io)
2570 + struct buffer_head *bh;
2571 + sector_t b = *block;
2572 + sector_t blocks_per_page = PAGE_SIZE / block_size;
2573 + unsigned int this_size; /* holds the size of the current io */
2576 + if (!blocks_per_page) {
2577 + DMERR("dm-io: PAGE_SIZE (%lu) < block_size (%u) unsupported",
2578 + PAGE_SIZE, block_size);
2582 + while ((offset < PAGE_SIZE) && (b != end_block)) {
2583 + bh = mempool_alloc(_buffer_pool, GFP_NOIO);
2584 + init_buffer(bh, endio, io);
2585 + bh_set_region(bh, region);
2588 + * Block size must be a power of 2 and aligned
2592 + len = min(end_block - b, blocks_per_page);
2593 + len = min(len, blocks_per_page - offset / block_size);
2596 + DMERR("dm-io: Invalid offset/block_size (%u/%u).",
2597 + offset, block_size);
2601 + this_size = 1 << log2_align(len);
2603 + this_size = min(this_size,
2604 + (unsigned) 1 << log2_floor(b));
2607 + * Add in the job offset.
2609 + bh->b_blocknr = (b / this_size);
2610 + bh->b_size = block_size * this_size;
2611 + set_bh_page(bh, p, offset);
2612 + bh->b_this_page = bh;
2615 + atomic_set(&bh->b_count, 1);
2617 + bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) |
2620 + if (io->rw == WRITE)
2621 + clear_bit(BH_Dirty, &bh->b_state);
2623 + atomic_inc(&io->count);
2624 + submit_bh(io->rw, bh);
2627 + offset += block_size * this_size;
2631 + return (b == end_block);
2634 +static void do_region(unsigned int region, struct io_region *where,
2635 + struct page *page, unsigned int offset,
2636 + struct io_context *io)
2638 + unsigned int block_size = get_hardsect_size(where->dev);
2639 + unsigned int sblock_size = block_size >> 9;
2640 + sector_t block = where->sector / sblock_size;
2641 + sector_t end_block = (where->sector + where->count) / sblock_size;
2644 + if (do_page(where->dev, &block, end_block, block_size,
2645 + page, offset, region, io))
2648 + offset = 0; /* only offset the first page */
2650 + page = list_entry(page->list.next, struct page, list);
2654 +static void dispatch_io(unsigned int num_regions, struct io_region *where,
2655 + struct page *pages, unsigned int offset,
2656 + struct io_context *io)
2660 + for (i = 0; i < num_regions; i++)
2661 + if (where[i].count)
2662 + do_region(i, where + i, pages, offset, io);
2665 + * Drop the extra refence that we were holding to avoid
2666 + * the io being completed too early.
2668 + dec_count(io, 0, 0);
2674 +int dm_io_sync(unsigned int num_regions, struct io_region *where,
2675 + int rw, struct page *pages, unsigned int offset,
2676 + unsigned int *error_bits)
2678 + struct io_context io;
2680 + BUG_ON(num_regions > 1 && rw != WRITE);
2684 + atomic_set(&io.count, 1); /* see dispatch_io() */
2685 + io.sleeper = current;
2687 + dispatch_io(num_regions, where, pages, offset, &io);
2688 + run_task_queue(&tq_disk);
2691 + set_current_state(TASK_UNINTERRUPTIBLE);
2693 + if (!atomic_read(&io.count))
2698 + set_current_state(TASK_RUNNING);
2700 + *error_bits = io.error;
2701 + return io.error ? -EIO : 0;
2707 +int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
2708 + struct page *pages, unsigned int offset,
2709 + io_notify_fn fn, void *context)
2711 + struct io_context *io = mempool_alloc(_io_pool, GFP_NOIO);
2715 + atomic_set(&io->count, 1); /* see dispatch_io() */
2716 + io->sleeper = NULL;
2717 + io->callback = fn;
2718 + io->context = context;
2720 + dispatch_io(num_regions, where, pages, offset, io);
2724 +EXPORT_SYMBOL(dm_io_get);
2725 +EXPORT_SYMBOL(dm_io_put);
2726 +EXPORT_SYMBOL(dm_io_sync);
2727 +EXPORT_SYMBOL(dm_io_async);
2728 diff -urN linux-2.4.24.org/drivers/md/dm-ioctl.c linux-2.4.24/drivers/md/dm-ioctl.c
2729 --- linux-2.4.24.org/drivers/md/dm-ioctl.c 1970-01-01 01:00:00.000000000 +0100
2730 +++ linux-2.4.24/drivers/md/dm-ioctl.c 2004-01-18 15:01:17.790869761 +0100
2733 + * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
2735 + * This file is released under the GPL.
2740 +#include <linux/module.h>
2741 +#include <linux/vmalloc.h>
2742 +#include <linux/miscdevice.h>
2743 +#include <linux/dm-ioctl.h>
2744 +#include <linux/init.h>
2745 +#include <linux/wait.h>
2746 +#include <linux/blk.h>
2747 +#include <linux/slab.h>
2749 +#include <asm/uaccess.h>
2751 +#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
2753 +/*-----------------------------------------------------------------
2754 + * The ioctl interface needs to be able to look up devices by
2756 + *---------------------------------------------------------------*/
2758 + struct list_head name_list;
2759 + struct list_head uuid_list;
2763 + struct mapped_device *md;
2764 + struct dm_table *new_map;
2766 + /* I hate devfs */
2767 + devfs_handle_t devfs_entry;
2770 +#define NUM_BUCKETS 64
2771 +#define MASK_BUCKETS (NUM_BUCKETS - 1)
2772 +static struct list_head _name_buckets[NUM_BUCKETS];
2773 +static struct list_head _uuid_buckets[NUM_BUCKETS];
2775 +static devfs_handle_t _dev_dir;
2776 +void dm_hash_remove_all(void);
2779 + * Guards access to both hash tables.
2781 +static DECLARE_RWSEM(_hash_lock);
2783 +static void init_buckets(struct list_head *buckets)
2787 + for (i = 0; i < NUM_BUCKETS; i++)
2788 + INIT_LIST_HEAD(buckets + i);
2791 +int dm_hash_init(void)
2793 + init_buckets(_name_buckets);
2794 + init_buckets(_uuid_buckets);
2795 + _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
2799 +void dm_hash_exit(void)
2801 + dm_hash_remove_all();
2802 + devfs_unregister(_dev_dir);
2805 +/*-----------------------------------------------------------------
2807 + * We're not really concerned with the str hash function being
2808 + * fast since it's only used by the ioctl interface.
2809 + *---------------------------------------------------------------*/
2810 +static unsigned int hash_str(const char *str)
2812 + const unsigned int hash_mult = 2654435387U;
2813 + unsigned int h = 0;
2816 + h = (h + (unsigned int) *str++) * hash_mult;
2818 + return h & MASK_BUCKETS;
2821 +/*-----------------------------------------------------------------
2822 + * Code for looking up a device by name
2823 + *---------------------------------------------------------------*/
2824 +static struct hash_cell *__get_name_cell(const char *str)
2826 + struct list_head *tmp;
2827 + struct hash_cell *hc;
2828 + unsigned int h = hash_str(str);
2830 + list_for_each (tmp, _name_buckets + h) {
2831 + hc = list_entry(tmp, struct hash_cell, name_list);
2832 + if (!strcmp(hc->name, str))
2839 +static struct hash_cell *__get_uuid_cell(const char *str)
2841 + struct list_head *tmp;
2842 + struct hash_cell *hc;
2843 + unsigned int h = hash_str(str);
2845 + list_for_each (tmp, _uuid_buckets + h) {
2846 + hc = list_entry(tmp, struct hash_cell, uuid_list);
2847 + if (!strcmp(hc->uuid, str))
2854 +/*-----------------------------------------------------------------
2855 + * Inserting, removing and renaming a device.
2856 + *---------------------------------------------------------------*/
2857 +static inline char *kstrdup(const char *str)
2859 + char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
2865 +static struct hash_cell *alloc_cell(const char *name, const char *uuid,
2866 + struct mapped_device *md)
2868 + struct hash_cell *hc;
2870 + hc = kmalloc(sizeof(*hc), GFP_KERNEL);
2874 + hc->name = kstrdup(name);
2884 + hc->uuid = kstrdup(uuid);
2892 + INIT_LIST_HEAD(&hc->name_list);
2893 + INIT_LIST_HEAD(&hc->uuid_list);
2895 + hc->new_map = NULL;
2899 +static void free_cell(struct hash_cell *hc)
2911 +static int register_with_devfs(struct hash_cell *hc)
2913 + kdev_t dev = dm_kdev(hc->md);
2916 + devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
2917 + major(dev), minor(dev),
2918 + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
2919 + &dm_blk_dops, NULL);
2924 +static int unregister_with_devfs(struct hash_cell *hc)
2926 + devfs_unregister(hc->devfs_entry);
2931 + * The kdev_t and uuid of a device can never change once it is
2932 + * initially inserted.
2934 +int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
2936 + struct hash_cell *cell;
2939 + * Allocate the new cells.
2941 + cell = alloc_cell(name, uuid, md);
2946 + * Insert the cell into both hash tables.
2948 + down_write(&_hash_lock);
2949 + if (__get_name_cell(name))
2952 + list_add(&cell->name_list, _name_buckets + hash_str(name));
2955 + if (__get_uuid_cell(uuid)) {
2956 + list_del(&cell->name_list);
2959 + list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
2961 + register_with_devfs(cell);
2963 + up_write(&_hash_lock);
2968 + up_write(&_hash_lock);
2973 +void __hash_remove(struct hash_cell *hc)
2975 + /* remove from the dev hash */
2976 + list_del(&hc->uuid_list);
2977 + list_del(&hc->name_list);
2978 + unregister_with_devfs(hc);
2981 + dm_table_put(hc->new_map);
2985 +void dm_hash_remove_all(void)
2988 + struct hash_cell *hc;
2989 + struct list_head *tmp, *n;
2991 + down_write(&_hash_lock);
2992 + for (i = 0; i < NUM_BUCKETS; i++) {
2993 + list_for_each_safe (tmp, n, _name_buckets + i) {
2994 + hc = list_entry(tmp, struct hash_cell, name_list);
2995 + __hash_remove(hc);
2998 + up_write(&_hash_lock);
3001 +int dm_hash_rename(const char *old, const char *new)
3003 + char *new_name, *old_name;
3004 + struct hash_cell *hc;
3009 + new_name = kstrdup(new);
3013 + down_write(&_hash_lock);
3018 + hc = __get_name_cell(new);
3020 + DMWARN("asked to rename to an already existing name %s -> %s",
3022 + up_write(&_hash_lock);
3028 + * Is there such a device as 'old' ?
3030 + hc = __get_name_cell(old);
3032 + DMWARN("asked to rename a non existent device %s -> %s",
3034 + up_write(&_hash_lock);
3040 + * rename and move the name cell.
3042 + list_del(&hc->name_list);
3043 + old_name = hc->name;
3044 + hc->name = new_name;
3045 + list_add(&hc->name_list, _name_buckets + hash_str(new_name));
3047 + /* rename the device node in devfs */
3048 + unregister_with_devfs(hc);
3049 + register_with_devfs(hc);
3051 + up_write(&_hash_lock);
3056 +/*-----------------------------------------------------------------
3057 + * Implementation of the ioctl commands
3058 + *---------------------------------------------------------------*/
3060 + * All the ioctl commands get dispatched to functions with this
3063 +typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
3065 +static int remove_all(struct dm_ioctl *param, size_t param_size)
3067 + dm_hash_remove_all();
3068 + param->data_size = 0;
3073 + * Round up the ptr to an 8-byte boundary.
3075 +#define ALIGN_MASK 7
3076 +static inline void *align_ptr(void *ptr)
3078 + return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
3082 + * Retrieves the data payload buffer from an already allocated
3083 + * struct dm_ioctl.
3085 +static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
3088 + param->data_start = align_ptr(param + 1) - (void *) param;
3090 + if (param->data_start < param_size)
3091 + *len = param_size - param->data_start;
3095 + return ((void *) param) + param->data_start;
3098 +static int list_devices(struct dm_ioctl *param, size_t param_size)
3101 + struct hash_cell *hc;
3102 + size_t len, needed = 0;
3103 + struct dm_name_list *nl, *old_nl = NULL;
3105 + down_write(&_hash_lock);
3108 + * Loop through all the devices working out how much
3111 + for (i = 0; i < NUM_BUCKETS; i++) {
3112 + list_for_each_entry (hc, _name_buckets + i, name_list) {
3113 + needed += sizeof(struct dm_name_list);
3114 + needed += strlen(hc->name);
3115 + needed += ALIGN_MASK;
3120 + * Grab our output buffer.
3122 + nl = get_result_buffer(param, param_size, &len);
3123 + if (len < needed) {
3124 + param->flags |= DM_BUFFER_FULL_FLAG;
3127 + param->data_size = param->data_start + needed;
3129 + nl->dev = 0; /* Flags no data */
3132 + * Now loop through filling out the names.
3134 + for (i = 0; i < NUM_BUCKETS; i++) {
3135 + list_for_each_entry (hc, _name_buckets + i, name_list) {
3137 + old_nl->next = (uint32_t) ((void *) nl -
3140 + nl->dev = dm_kdev(hc->md);
3142 + strcpy(nl->name, hc->name);
3145 + nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
3150 + up_write(&_hash_lock);
3154 +static int check_name(const char *name)
3156 + if (strchr(name, '/')) {
3157 + DMWARN("invalid device name");
3165 + * Fills in a dm_ioctl structure, ready for sending back to
3168 +static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
3170 + kdev_t dev = dm_kdev(md);
3171 + struct dm_table *table;
3172 + struct block_device *bdev;
3174 + param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
3175 + DM_ACTIVE_PRESENT_FLAG);
3177 + if (dm_suspended(md))
3178 + param->flags |= DM_SUSPEND_FLAG;
3180 + param->dev = kdev_t_to_nr(dev);
3182 + if (is_read_only(dev))
3183 + param->flags |= DM_READONLY_FLAG;
3185 + param->event_nr = dm_get_event_nr(md);
3187 + table = dm_get_table(md);
3189 + param->flags |= DM_ACTIVE_PRESENT_FLAG;
3190 + param->target_count = dm_table_get_num_targets(table);
3191 + dm_table_put(table);
3193 + param->target_count = 0;
3195 + bdev = bdget(param->dev);
3198 + param->open_count = bdev->bd_openers;
3204 +static int dev_create(struct dm_ioctl *param, size_t param_size)
3208 + struct mapped_device *md;
3210 + r = check_name(param->name);
3214 + if (param->flags & DM_PERSISTENT_DEV_FLAG)
3215 + dev = to_kdev_t(param->dev);
3217 + r = dm_create(dev, &md);
3221 + r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
3227 + param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
3229 + r = __dev_status(md, param);
3236 + * Always use UUID for lookups if it's present, otherwise use name.
3238 +static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
3240 + return *param->uuid ?
3241 + __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
3244 +static inline struct mapped_device *find_device(struct dm_ioctl *param)
3246 + struct hash_cell *hc;
3247 + struct mapped_device *md = NULL;
3249 + down_read(&_hash_lock);
3250 + hc = __find_device_hash_cell(param);
3255 + * Sneakily write in both the name and the uuid
3256 + * while we have the cell.
3258 + strncpy(param->name, hc->name, sizeof(param->name));
3260 + strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
3262 + param->uuid[0] = '\0';
3265 + param->flags |= DM_INACTIVE_PRESENT_FLAG;
3267 + param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
3271 + up_read(&_hash_lock);
3276 +static int dev_remove(struct dm_ioctl *param, size_t param_size)
3278 + struct hash_cell *hc;
3280 + down_write(&_hash_lock);
3281 + hc = __find_device_hash_cell(param);
3284 + DMWARN("device doesn't appear to be in the dev hash table.");
3285 + up_write(&_hash_lock);
3289 + __hash_remove(hc);
3290 + up_write(&_hash_lock);
3291 + param->data_size = 0;
3296 + * Check a string doesn't overrun the chunk of
3297 + * memory we copied from userland.
3299 +static int invalid_str(char *str, void *end)
3301 + while ((void *) str < end)
3308 +static int dev_rename(struct dm_ioctl *param, size_t param_size)
3311 + char *new_name = (char *) param + param->data_start;
3313 + if (new_name < (char *) (param + 1) ||
3314 + invalid_str(new_name, (void *) param + param_size)) {
3315 + DMWARN("Invalid new logical volume name supplied.");
3319 + r = check_name(new_name);
3323 + param->data_size = 0;
3324 + return dm_hash_rename(param->name, new_name);
3327 +static int do_suspend(struct dm_ioctl *param)
3330 + struct mapped_device *md;
3332 + md = find_device(param);
3336 + if (!dm_suspended(md))
3337 + r = dm_suspend(md);
3340 + r = __dev_status(md, param);
3346 +static int do_resume(struct dm_ioctl *param)
3349 + struct hash_cell *hc;
3350 + struct mapped_device *md;
3351 + struct dm_table *new_map;
3353 + down_write(&_hash_lock);
3355 + hc = __find_device_hash_cell(param);
3357 + DMWARN("device doesn't appear to be in the dev hash table.");
3358 + up_write(&_hash_lock);
3365 + new_map = hc->new_map;
3366 + hc->new_map = NULL;
3367 + param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
3369 + up_write(&_hash_lock);
3371 + /* Do we need to load a new map ? */
3373 + /* Suspend if it isn't already suspended */
3374 + if (!dm_suspended(md))
3377 + r = dm_swap_table(md, new_map);
3380 + dm_table_put(new_map);
3384 + if (dm_table_get_mode(new_map) & FMODE_WRITE)
3385 + set_device_ro(dm_kdev(md), 0);
3387 + set_device_ro(dm_kdev(md), 1);
3389 + dm_table_put(new_map);
3392 + if (dm_suspended(md))
3393 + r = dm_resume(md);
3396 + r = __dev_status(md, param);
3403 + * Set or unset the suspension state of a device.
3404 + * If the device already is in the requested state we just return its status.
3406 +static int dev_suspend(struct dm_ioctl *param, size_t param_size)
3408 + if (param->flags & DM_SUSPEND_FLAG)
3409 + return do_suspend(param);
3411 + return do_resume(param);
3415 + * Copies device info back to user space, used by
3416 + * the create and info ioctls.
3418 +static int dev_status(struct dm_ioctl *param, size_t param_size)
3421 + struct mapped_device *md;
3423 + md = find_device(param);
3427 + r = __dev_status(md, param);
3433 + * Build up the status struct for each target
3435 +static void retrieve_status(struct dm_table *table, struct dm_ioctl *param,
3436 + size_t param_size)
3438 + unsigned int i, num_targets;
3439 + struct dm_target_spec *spec;
3440 + char *outbuf, *outptr;
3441 + status_type_t type;
3442 + size_t remaining, len, used = 0;
3444 + outptr = outbuf = get_result_buffer(param, param_size, &len);
3446 + if (param->flags & DM_STATUS_TABLE_FLAG)
3447 + type = STATUSTYPE_TABLE;
3449 + type = STATUSTYPE_INFO;
3451 + /* Get all the target info */
3452 + num_targets = dm_table_get_num_targets(table);
3453 + for (i = 0; i < num_targets; i++) {
3454 + struct dm_target *ti = dm_table_get_target(table, i);
3456 + remaining = len - (outptr - outbuf);
3457 + if (remaining < sizeof(struct dm_target_spec)) {
3458 + param->flags |= DM_BUFFER_FULL_FLAG;
3462 + spec = (struct dm_target_spec *) outptr;
3465 + spec->sector_start = ti->begin;
3466 + spec->length = ti->len;
3467 + strncpy(spec->target_type, ti->type->name,
3468 + sizeof(spec->target_type));
3470 + outptr += sizeof(struct dm_target_spec);
3471 + remaining = len - (outptr - outbuf);
3473 + /* Get the status/table string from the target driver */
3474 + if (ti->type->status) {
3475 + if (ti->type->status(ti, type, outptr, remaining)) {
3476 + param->flags |= DM_BUFFER_FULL_FLAG;
3482 + outptr += strlen(outptr) + 1;
3483 + used = param->data_start + (outptr - outbuf);
3485 + align_ptr(outptr);
3486 + spec->next = outptr - outbuf;
3490 + param->data_size = used;
3492 + param->target_count = num_targets;
3496 + * Wait for a device to report an event
3498 +static int dev_wait(struct dm_ioctl *param, size_t param_size)
3501 + struct mapped_device *md;
3502 + struct dm_table *table;
3503 + DECLARE_WAITQUEUE(wq, current);
3505 + md = find_device(param);
3510 + * Wait for a notification event
3512 + set_current_state(TASK_INTERRUPTIBLE);
3513 + if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
3515 + dm_remove_wait_queue(md, &wq);
3517 + set_current_state(TASK_RUNNING);
3520 + * The userland program is going to want to know what
3521 + * changed to trigger the event, so we may as well tell
3522 + * him and save an ioctl.
3524 + r = __dev_status(md, param);
3528 + table = dm_get_table(md);
3530 + retrieve_status(table, param, param_size);
3531 + dm_table_put(table);
3539 +static inline int get_mode(struct dm_ioctl *param)
3541 + int mode = FMODE_READ | FMODE_WRITE;
3543 + if (param->flags & DM_READONLY_FLAG)
3544 + mode = FMODE_READ;
3549 +static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
3550 + struct dm_target_spec **spec, char **target_params)
3552 + *spec = (struct dm_target_spec *) ((unsigned char *) last + next);
3553 + *target_params = (char *) (*spec + 1);
3555 + if (*spec < (last + 1))
3558 + return invalid_str(*target_params, end);
3561 +static int populate_table(struct dm_table *table, struct dm_ioctl *param,
3562 + size_t param_size)
3565 + unsigned int i = 0;
3566 + struct dm_target_spec *spec = (struct dm_target_spec *) param;
3567 + uint32_t next = param->data_start;
3568 + void *end = (void *) param + param_size;
3569 + char *target_params;
3571 + if (!param->target_count) {
3572 + DMWARN("populate_table: no targets specified");
3576 + for (i = 0; i < param->target_count; i++) {
3578 + r = next_target(spec, next, end, &spec, &target_params);
3580 + DMWARN("unable to find target");
3584 + r = dm_table_add_target(table, spec->target_type,
3585 + (sector_t) spec->sector_start,
3586 + (sector_t) spec->length,
3589 + DMWARN("error adding target to table");
3593 + next = spec->next;
3596 + return dm_table_complete(table);
3599 +static int table_load(struct dm_ioctl *param, size_t param_size)
3602 + struct hash_cell *hc;
3603 + struct dm_table *t;
3605 + r = dm_table_create(&t, get_mode(param), param->target_count);
3609 + r = populate_table(t, param, param_size);
3615 + down_write(&_hash_lock);
3616 + hc = __find_device_hash_cell(param);
3618 + DMWARN("device doesn't appear to be in the dev hash table.");
3619 + up_write(&_hash_lock);
3624 + dm_table_put(hc->new_map);
3626 + param->flags |= DM_INACTIVE_PRESENT_FLAG;
3628 + r = __dev_status(hc->md, param);
3629 + up_write(&_hash_lock);
3633 +static int table_clear(struct dm_ioctl *param, size_t param_size)
3636 + struct hash_cell *hc;
3638 + down_write(&_hash_lock);
3640 + hc = __find_device_hash_cell(param);
3642 + DMWARN("device doesn't appear to be in the dev hash table.");
3643 + up_write(&_hash_lock);
3647 + if (hc->new_map) {
3648 + dm_table_put(hc->new_map);
3649 + hc->new_map = NULL;
3652 + param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
3654 + r = __dev_status(hc->md, param);
3655 + up_write(&_hash_lock);
3660 + * Retrieves a list of devices used by a particular dm device.
3662 +static void retrieve_deps(struct dm_table *table, struct dm_ioctl *param,
3663 + size_t param_size)
3665 + unsigned int count = 0;
3666 + struct list_head *tmp;
3667 + size_t len, needed;
3668 + struct dm_target_deps *deps;
3670 + deps = get_result_buffer(param, param_size, &len);
3673 + * Count the devices.
3675 + list_for_each(tmp, dm_table_get_devices(table))
3679 + * Check we have enough space.
3681 + needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
3682 + if (len < needed) {
3683 + param->flags |= DM_BUFFER_FULL_FLAG;
3688 + * Fill in the devices.
3690 + deps->count = count;
3692 + list_for_each(tmp, dm_table_get_devices(table)) {
3693 + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
3694 + deps->dev[count++] = dd->bdev->bd_dev;
3697 + param->data_size = param->data_start + needed;
3700 +static int table_deps(struct dm_ioctl *param, size_t param_size)
3703 + struct mapped_device *md;
3704 + struct dm_table *table;
3706 + md = find_device(param);
3710 + r = __dev_status(md, param);
3714 + table = dm_get_table(md);
3716 + retrieve_deps(table, param, param_size);
3717 + dm_table_put(table);
3726 + * Return the status of a device as a text string for each
3729 +static int table_status(struct dm_ioctl *param, size_t param_size)
3732 + struct mapped_device *md;
3733 + struct dm_table *table;
3735 + md = find_device(param);
3739 + r = __dev_status(md, param);
3743 + table = dm_get_table(md);
3745 + retrieve_status(table, param, param_size);
3746 + dm_table_put(table);
3754 +/*-----------------------------------------------------------------
3755 + * Implementation of open/close/ioctl on the special char
3757 + *---------------------------------------------------------------*/
3758 +static ioctl_fn lookup_ioctl(unsigned int cmd)
3764 + {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
3765 + {DM_REMOVE_ALL_CMD, remove_all},
3766 + {DM_LIST_DEVICES_CMD, list_devices},
3768 + {DM_DEV_CREATE_CMD, dev_create},
3769 + {DM_DEV_REMOVE_CMD, dev_remove},
3770 + {DM_DEV_RENAME_CMD, dev_rename},
3771 + {DM_DEV_SUSPEND_CMD, dev_suspend},
3772 + {DM_DEV_STATUS_CMD, dev_status},
3773 + {DM_DEV_WAIT_CMD, dev_wait},
3775 + {DM_TABLE_LOAD_CMD, table_load},
3776 + {DM_TABLE_CLEAR_CMD, table_clear},
3777 + {DM_TABLE_DEPS_CMD, table_deps},
3778 + {DM_TABLE_STATUS_CMD, table_status}
3781 + return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
3785 + * As well as checking the version compatibility this always
3786 + * copies the kernel interface version out.
3788 +static int check_version(unsigned int cmd, struct dm_ioctl *user)
3790 + uint32_t version[3];
3793 + if (copy_from_user(version, user->version, sizeof(version)))
3796 + if ((DM_VERSION_MAJOR != version[0]) ||
3797 + (DM_VERSION_MINOR < version[1])) {
3798 + DMWARN("ioctl interface mismatch: "
3799 + "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
3800 + DM_VERSION_MAJOR, DM_VERSION_MINOR,
3801 + DM_VERSION_PATCHLEVEL,
3802 + version[0], version[1], version[2], cmd);
3807 + * Fill in the kernel version.
3809 + version[0] = DM_VERSION_MAJOR;
3810 + version[1] = DM_VERSION_MINOR;
3811 + version[2] = DM_VERSION_PATCHLEVEL;
3812 + if (copy_to_user(user->version, version, sizeof(version)))
3818 +static void free_params(struct dm_ioctl *param)
3823 +static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
3825 + struct dm_ioctl tmp, *dmi;
3827 + if (copy_from_user(&tmp, user, sizeof(tmp)))
3830 + if (tmp.data_size < sizeof(tmp))
3833 + dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
3837 + if (copy_from_user(dmi, user, tmp.data_size)) {
3846 +static int validate_params(uint cmd, struct dm_ioctl *param)
3848 + /* Always clear this flag */
3849 + param->flags &= ~DM_BUFFER_FULL_FLAG;
3851 + /* Ignores parameters */
3852 + if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
3855 + /* Unless creating, either name or uuid but not both */
3856 + if (cmd != DM_DEV_CREATE_CMD) {
3857 + if ((!*param->uuid && !*param->name) ||
3858 + (*param->uuid && *param->name)) {
3859 + DMWARN("one of name or uuid must be supplied, cmd(%u)",
3865 + /* Ensure strings are terminated */
3866 + param->name[DM_NAME_LEN - 1] = '\0';
3867 + param->uuid[DM_UUID_LEN - 1] = '\0';
3872 +static int ctl_ioctl(struct inode *inode, struct file *file,
3873 + uint command, ulong u)
3877 + struct dm_ioctl *param;
3878 + struct dm_ioctl *user = (struct dm_ioctl *) u;
3879 + ioctl_fn fn = NULL;
3880 + size_t param_size;
3882 + /* only root can play with this */
3883 + if (!capable(CAP_SYS_ADMIN))
3886 + if (_IOC_TYPE(command) != DM_IOCTL)
3889 + cmd = _IOC_NR(command);
3892 + * Check the interface version passed in. This also
3893 + * writes out the kernel's interface version.
3895 + r = check_version(cmd, user);
3900 + * Nothing more to do for the version command.
3902 + if (cmd == DM_VERSION_CMD)
3905 + fn = lookup_ioctl(cmd);
3907 + DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
3912 + * FIXME: I don't like this, we're trying to avoid low
3913 + * memory issues when a device is suspended.
3915 + current->flags |= PF_MEMALLOC;
3918 + * Copy the parameters into kernel space.
3920 + r = copy_params(user, ¶m);
3922 + current->flags &= ~PF_MEMALLOC;
3926 + r = validate_params(cmd, param);
3930 + param_size = param->data_size;
3931 + param->data_size = sizeof(*param);
3932 + r = fn(param, param_size);
3935 + * Copy the results back to userland.
3937 + if (!r && copy_to_user(user, param, param->data_size))
3941 + free_params(param);
3942 + current->flags &= ~PF_MEMALLOC;
3946 +static struct file_operations _ctl_fops = {
3947 + .ioctl = ctl_ioctl,
3948 + .owner = THIS_MODULE,
3951 +static devfs_handle_t _ctl_handle;
3953 +static struct miscdevice _dm_misc = {
3954 + .minor = MISC_DYNAMIC_MINOR,
3956 + .fops = &_ctl_fops
3960 + * Create misc character device and link to DM_DIR/control.
3962 +int __init dm_interface_init(void)
3967 + r = dm_hash_init();
3971 + r = misc_register(&_dm_misc);
3973 + DMERR("misc_register failed for control device");
3978 + r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
3979 + sizeof rname - 3);
3981 + goto done; /* devfs not present */
3984 + DMERR("devfs_generate_path failed for control device");
3988 + strncpy(rname + r, "../", 3);
3989 + r = devfs_mk_symlink(NULL, DM_DIR "/control",
3990 + DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
3992 + DMERR("devfs_mk_symlink failed for control device");
3995 + devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
3998 + DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
3999 + DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
4004 + misc_deregister(&_dm_misc);
4009 +void dm_interface_exit(void)
4011 + if (misc_deregister(&_dm_misc) < 0)
4012 + DMERR("misc_deregister failed for control device");
4016 diff -urN linux-2.4.24.org/drivers/md/dm-io.h linux-2.4.24/drivers/md/dm-io.h
4017 --- linux-2.4.24.org/drivers/md/dm-io.h 1970-01-01 01:00:00.000000000 +0100
4018 +++ linux-2.4.24/drivers/md/dm-io.h 2004-01-18 15:01:25.794190275 +0100
4021 + * Copyright (C) 2003 Sistina Software
4023 + * This file is released under the GPL.
4031 +#include <linux/list.h>
4033 +/* Move these to bitops.h eventually */
4034 +/* Improved generic_fls algorithm (in 2.4 there is no generic_fls so far) */
4035 +/* (c) 2002, D.Phillips and Sistina Software */
4036 +/* Licensed under Version 2 of the GPL */
4038 +static unsigned generic_fls8(unsigned n)
4041 + n & 0xc0 ? (n >> 7) + 7 : (n >> 5) + 5:
4042 + n & 0x0c ? (n >> 3) + 3 : n - ((n + 1) >> 2);
4045 +static inline unsigned generic_fls16(unsigned n)
4047 + return n & 0xff00? generic_fls8(n >> 8) + 8 : generic_fls8(n);
4050 +static inline unsigned generic_fls32(unsigned n)
4052 + return n & 0xffff0000 ? generic_fls16(n >> 16) + 16 : generic_fls16(n);
4055 +/* FIXME make this configurable */
4056 +#define DM_MAX_IO_REGIONS 8
4066 + * 'error' is a bitset, with each bit indicating whether an error
4067 + * occurred doing io to the corresponding region.
4069 +typedef void (*io_notify_fn)(unsigned int error, void *context);
4073 + * Before anyone uses the IO interface they should call
4074 + * dm_io_get(), specifying roughly how many pages they are
4075 + * expecting to perform io on concurrently.
4077 + * This function may block.
4079 +int dm_io_get(unsigned int num_pages);
4080 +void dm_io_put(unsigned int num_pages);
4086 + * Please ensure that the rw flag in the next two functions is
4087 + * either READ or WRITE, ie. we don't take READA. Any
4088 + * regions with a zero count field will be ignored.
4090 +int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
4091 + struct page *pages, unsigned int offset,
4092 + unsigned int *error_bits);
4098 + * The 'where' array may be safely allocated on the stack since
4099 + * the function takes a copy.
4101 +int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
4102 + struct page *pages, unsigned int offset,
4103 + io_notify_fn fn, void *context);
4106 diff -urN linux-2.4.24.org/drivers/md/dm-linear.c linux-2.4.24/drivers/md/dm-linear.c
4107 --- linux-2.4.24.org/drivers/md/dm-linear.c 1970-01-01 01:00:00.000000000 +0100
4108 +++ linux-2.4.24/drivers/md/dm-linear.c 2004-01-18 15:01:13.777712209 +0100
4111 + * Copyright (C) 2001 Sistina Software (UK) Limited.
4113 + * This file is released under the GPL.
4118 +#include <linux/module.h>
4119 +#include <linux/init.h>
4120 +#include <linux/blkdev.h>
4121 +#include <linux/slab.h>
4124 + * Linear: maps a linear range of a device.
4127 + struct dm_dev *dev;
4132 + * Construct a linear mapping: <dev_path> <offset>
4134 +static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
4136 + struct linear_c *lc;
4139 + ti->error = "dm-linear: Invalid argument count";
4143 + lc = kmalloc(sizeof(*lc), GFP_KERNEL);
4145 + ti->error = "dm-linear: Cannot allocate linear context";
4149 + if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
4150 + ti->error = "dm-linear: Invalid device sector";
4154 + if (dm_get_device(ti, argv[0], lc->start, ti->len,
4155 + dm_table_get_mode(ti->table), &lc->dev)) {
4156 + ti->error = "dm-linear: Device lookup failed";
4168 +static void linear_dtr(struct dm_target *ti)
4170 + struct linear_c *lc = (struct linear_c *) ti->private;
4172 + dm_put_device(ti, lc->dev);
4176 +static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
4177 + union map_info *map_context)
4179 + struct linear_c *lc = (struct linear_c *) ti->private;
4181 + bh->b_rdev = lc->dev->dev;
4182 + bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
4187 +static int linear_status(struct dm_target *ti, status_type_t type,
4188 + char *result, unsigned int maxlen)
4190 + struct linear_c *lc = (struct linear_c *) ti->private;
4194 + case STATUSTYPE_INFO:
4198 + case STATUSTYPE_TABLE:
4199 + kdev = to_kdev_t(lc->dev->bdev->bd_dev);
4200 + snprintf(result, maxlen, "%s " SECTOR_FORMAT,
4201 + dm_kdevname(kdev), lc->start);
4207 +static struct target_type linear_target = {
4209 + .module = THIS_MODULE,
4210 + .ctr = linear_ctr,
4211 + .dtr = linear_dtr,
4212 + .map = linear_map,
4213 + .status = linear_status,
4216 +int __init dm_linear_init(void)
4218 + int r = dm_register_target(&linear_target);
4221 + DMERR("linear: register failed %d", r);
4226 +void dm_linear_exit(void)
4228 + int r = dm_unregister_target(&linear_target);
4231 + DMERR("linear: unregister failed %d", r);
4233 diff -urN linux-2.4.24.org/drivers/md/dm-snapshot.c linux-2.4.24/drivers/md/dm-snapshot.c
4234 --- linux-2.4.24.org/drivers/md/dm-snapshot.c 1970-01-01 01:00:00.000000000 +0100
4235 +++ linux-2.4.24/drivers/md/dm-snapshot.c 2004-01-18 15:01:29.247465850 +0100
4240 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
4242 + * This file is released under the GPL.
4245 +#include <linux/config.h>
4246 +#include <linux/ctype.h>
4247 +#include <linux/module.h>
4248 +#include <linux/init.h>
4249 +#include <linux/slab.h>
4250 +#include <linux/list.h>
4251 +#include <linux/fs.h>
4252 +#include <linux/blkdev.h>
4253 +#include <linux/mempool.h>
4254 +#include <linux/device-mapper.h>
4255 +#include <linux/vmalloc.h>
4257 +#include "dm-snapshot.h"
4258 +#include "kcopyd.h"
4261 + * FIXME: Remove this before release.
4264 +#define DMDEBUG(x...) DMWARN( ## x)
4266 +#define DMDEBUG(x...)
4270 + * The percentage increment we will wake up users at
4272 +#define WAKE_UP_PERCENT 5
4275 + * kcopyd priority of snapshot operations
4277 +#define SNAPSHOT_COPY_PRIORITY 2
4280 + * Each snapshot reserves this many pages for io
4281 + * FIXME: calculate this
4283 +#define SNAPSHOT_PAGES 256
4285 +struct pending_exception {
4286 + struct exception e;
4289 + * Origin buffers waiting for this to complete are held
4290 + * in a list (using b_reqnext).
4292 + struct buffer_head *origin_bhs;
4293 + struct buffer_head *snapshot_bhs;
4296 + * Other pending_exceptions that are processing this
4297 + * chunk. When this list is empty, we know we can
4298 + * complete the origins.
4300 + struct list_head siblings;
4302 + /* Pointer back to snapshot context */
4303 + struct dm_snapshot *snap;
4306 + * 1 indicates the exception has already been sent to
4313 + * Hash table mapping origin volumes to lists of snapshots and
4314 + * a lock to protect it
4316 +static kmem_cache_t *exception_cache;
4317 +static kmem_cache_t *pending_cache;
4318 +static mempool_t *pending_pool;
4321 + * One of these per registered origin, held in the snapshot_origins hash
4324 + /* The origin device */
4327 + struct list_head hash_list;
4329 + /* List of snapshots for this origin */
4330 + struct list_head snapshots;
4334 + * Size of the hash table for origin volumes. If we make this
4335 + * the size of the minors list then it should be nearly perfect
4337 +#define ORIGIN_HASH_SIZE 256
4338 +#define ORIGIN_MASK 0xFF
4339 +static struct list_head *_origins;
4340 +static struct rw_semaphore _origins_lock;
4342 +static int init_origin_hash(void)
4346 + _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
4349 + DMERR("Device mapper: Snapshot: unable to allocate memory");
4353 + for (i = 0; i < ORIGIN_HASH_SIZE; i++)
4354 + INIT_LIST_HEAD(_origins + i);
4355 + init_rwsem(&_origins_lock);
4360 +static void exit_origin_hash(void)
4365 +static inline unsigned int origin_hash(kdev_t dev)
4367 + return MINOR(dev) & ORIGIN_MASK;
4370 +static struct origin *__lookup_origin(kdev_t origin)
4372 + struct list_head *slist;
4373 + struct list_head *ol;
4376 + ol = &_origins[origin_hash(origin)];
4377 + list_for_each(slist, ol) {
4378 + o = list_entry(slist, struct origin, hash_list);
4380 + if (o->dev == origin)
4387 +static void __insert_origin(struct origin *o)
4389 + struct list_head *sl = &_origins[origin_hash(o->dev)];
4390 + list_add_tail(&o->hash_list, sl);
4394 + * Make a note of the snapshot and its origin so we can look it
4395 + * up when the origin has a write on it.
4397 +static int register_snapshot(struct dm_snapshot *snap)
4400 + kdev_t dev = snap->origin->dev;
4402 + down_write(&_origins_lock);
4403 + o = __lookup_origin(dev);
4407 + o = kmalloc(sizeof(*o), GFP_KERNEL);
4409 + up_write(&_origins_lock);
4413 + /* Initialise the struct */
4414 + INIT_LIST_HEAD(&o->snapshots);
4417 + __insert_origin(o);
4420 + list_add_tail(&snap->list, &o->snapshots);
4422 + up_write(&_origins_lock);
4426 +static void unregister_snapshot(struct dm_snapshot *s)
4430 + down_write(&_origins_lock);
4431 + o = __lookup_origin(s->origin->dev);
4433 + list_del(&s->list);
4434 + if (list_empty(&o->snapshots)) {
4435 + list_del(&o->hash_list);
4439 + up_write(&_origins_lock);
4443 + * Implementation of the exception hash tables.
4445 +static int init_exception_table(struct exception_table *et, uint32_t size)
4449 + et->hash_mask = size - 1;
4450 + et->table = dm_vcalloc(size, sizeof(struct list_head));
4454 + for (i = 0; i < size; i++)
4455 + INIT_LIST_HEAD(et->table + i);
4460 +static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
4462 + struct list_head *slot, *entry, *temp;
4463 + struct exception *ex;
4466 + size = et->hash_mask + 1;
4467 + for (i = 0; i < size; i++) {
4468 + slot = et->table + i;
4470 + list_for_each_safe(entry, temp, slot) {
4471 + ex = list_entry(entry, struct exception, hash_list);
4472 + kmem_cache_free(mem, ex);
4480 + * FIXME: check how this hash fn is performing.
4482 +static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
4484 + return chunk & et->hash_mask;
4487 +static void insert_exception(struct exception_table *eh, struct exception *e)
4489 + struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
4490 + list_add(&e->hash_list, l);
4493 +static inline void remove_exception(struct exception *e)
4495 + list_del(&e->hash_list);
4499 + * Return the exception data for a sector, or NULL if not
4502 +static struct exception *lookup_exception(struct exception_table *et,
4505 + struct list_head *slot, *el;
4506 + struct exception *e;
4508 + slot = &et->table[exception_hash(et, chunk)];
4509 + list_for_each(el, slot) {
4510 + e = list_entry(el, struct exception, hash_list);
4511 + if (e->old_chunk == chunk)
4518 +static inline struct exception *alloc_exception(void)
4520 + struct exception *e;
4522 + e = kmem_cache_alloc(exception_cache, GFP_NOIO);
4524 + e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
4529 +static inline void free_exception(struct exception *e)
4531 + kmem_cache_free(exception_cache, e);
4534 +static inline struct pending_exception *alloc_pending_exception(void)
4536 + return mempool_alloc(pending_pool, GFP_NOIO);
4539 +static inline void free_pending_exception(struct pending_exception *pe)
4541 + mempool_free(pe, pending_pool);
4544 +int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
4546 + struct exception *e;
4548 + e = alloc_exception();
4552 + e->old_chunk = old;
4553 + e->new_chunk = new;
4554 + insert_exception(&s->complete, e);
4559 + * Hard coded magic.
4561 +static int calc_max_buckets(void)
4563 + unsigned long mem;
4565 + mem = num_physpages << PAGE_SHIFT;
4567 + mem /= sizeof(struct list_head);
4573 + * Rounds a number down to a power of 2.
4575 +static inline uint32_t round_down(uint32_t n)
4577 + while (n & (n - 1))
4583 + * Allocate room for a suitable hash table.
4585 +static int init_hash_tables(struct dm_snapshot *s)
4587 + sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
4590 + * Calculate based on the size of the original volume or
4591 + * the COW volume...
4593 + cow_dev_size = get_dev_size(s->cow->dev);
4594 + origin_dev_size = get_dev_size(s->origin->dev);
4595 + max_buckets = calc_max_buckets();
4597 + hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
4598 + hash_size = min(hash_size, max_buckets);
4600 + /* Round it down to a power of 2 */
4601 + hash_size = round_down(hash_size);
4602 + if (init_exception_table(&s->complete, hash_size))
4606 + * Allocate hash table for in-flight exceptions
4607 + * Make this smaller than the real hash table
4613 + if (init_exception_table(&s->pending, hash_size)) {
4614 + exit_exception_table(&s->complete, exception_cache);
4622 + * Round a number up to the nearest 'size' boundary. size must
4623 + * be a power of 2.
4625 +static inline ulong round_up(ulong n, ulong size)
4628 + return (n + size) & ~size;
4632 + * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
4634 +static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
4636 + struct dm_snapshot *s;
4637 + unsigned long chunk_size;
4640 + char *origin_path;
4646 + ti->error = "dm-snapshot: requires exactly 4 arguments";
4651 + origin_path = argv[0];
4652 + cow_path = argv[1];
4653 + persistent = toupper(*argv[2]);
4655 + if (persistent != 'P' && persistent != 'N') {
4656 + ti->error = "Persistent flag is not P or N";
4661 + chunk_size = simple_strtoul(argv[3], &value, 10);
4662 + if (chunk_size == 0 || value == NULL) {
4663 + ti->error = "Invalid chunk size";
4668 + s = kmalloc(sizeof(*s), GFP_KERNEL);
4670 + ti->error = "Cannot allocate snapshot context private "
4676 + r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
4678 + ti->error = "Cannot get origin device";
4682 + /* FIXME: get cow length */
4683 + r = dm_get_device(ti, cow_path, 0, 0,
4684 + FMODE_READ | FMODE_WRITE, &s->cow);
4686 + dm_put_device(ti, s->origin);
4687 + ti->error = "Cannot get COW device";
4692 + * Chunk size must be multiple of page size. Silently
4693 + * round up if it's not.
4695 + chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
4697 + /* Validate the chunk size against the device block size */
4698 + blocksize = get_hardsect_size(s->cow->dev);
4699 + if (chunk_size % (blocksize / SECTOR_SIZE)) {
4700 + ti->error = "Chunk size is not a multiple of device blocksize";
4705 + /* Check the sizes are small enough to fit in one kiovec */
4706 + if (chunk_size > KIO_MAX_SECTORS) {
4707 + ti->error = "Chunk size is too big";
4712 + /* Check chunk_size is a power of 2 */
4713 + if (chunk_size & (chunk_size - 1)) {
4714 + ti->error = "Chunk size is not a power of 2";
4719 + s->chunk_size = chunk_size;
4720 + s->chunk_mask = chunk_size - 1;
4721 + s->type = persistent;
4722 + for (s->chunk_shift = 0; chunk_size;
4723 + s->chunk_shift++, chunk_size >>= 1)
4728 + s->have_metadata = 0;
4729 + s->last_percent = 0;
4730 + init_rwsem(&s->lock);
4731 + s->table = ti->table;
4733 + /* Allocate hash table for COW data */
4734 + if (init_hash_tables(s)) {
4735 + ti->error = "Unable to allocate hash table space";
4741 + * Check the persistent flag - done here because we need the iobuf
4742 + * to check the LV header
4744 + s->store.snap = s;
4746 + if (persistent == 'P')
4747 + r = dm_create_persistent(&s->store, s->chunk_size);
4749 + r = dm_create_transient(&s->store, s, blocksize);
4752 + ti->error = "Couldn't create exception store";
4757 + r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
4759 + ti->error = "Could not create kcopyd client";
4763 + /* Flush IO to the origin device */
4764 + fsync_dev(s->origin->dev);
4766 + /* Add snapshot to the list of snapshots for this origin */
4767 + if (register_snapshot(s)) {
4769 + ti->error = "Cannot register snapshot origin";
4777 + kcopyd_client_destroy(s->kcopyd_client);
4780 + s->store.destroy(&s->store);
4783 + exit_exception_table(&s->pending, pending_cache);
4784 + exit_exception_table(&s->complete, exception_cache);
4787 + dm_put_device(ti, s->cow);
4788 + dm_put_device(ti, s->origin);
4797 +static void snapshot_dtr(struct dm_target *ti)
4799 + struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
4801 + dm_table_event(ti->table);
4803 + unregister_snapshot(s);
4805 + exit_exception_table(&s->pending, pending_cache);
4806 + exit_exception_table(&s->complete, exception_cache);
4808 + /* Deallocate memory used */
4809 + s->store.destroy(&s->store);
4811 + dm_put_device(ti, s->origin);
4812 + dm_put_device(ti, s->cow);
4813 + kcopyd_client_destroy(s->kcopyd_client);
4818 + * We hold lists of buffer_heads, using the b_reqnext field.
4820 +static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
4822 + bh->b_reqnext = *queue;
4827 + * FIXME: inefficient.
4829 +static void queue_buffers(struct buffer_head **queue, struct buffer_head *bhs)
4832 + queue = &((*queue)->b_reqnext);
4838 + * Flush a list of buffers.
4840 +static void flush_buffers(struct buffer_head *bh)
4842 + struct buffer_head *n;
4844 + DMDEBUG("begin flush");
4846 + n = bh->b_reqnext;
4847 + bh->b_reqnext = NULL;
4848 + DMDEBUG("flushing %p", bh);
4849 + generic_make_request(WRITE, bh);
4853 + run_task_queue(&tq_disk);
4857 + * Error a list of buffers.
4859 +static void error_buffers(struct buffer_head *bh)
4861 + struct buffer_head *n;
4864 + n = bh->b_reqnext;
4865 + bh->b_reqnext = NULL;
4866 + buffer_IO_error(bh);
4871 +static struct buffer_head *__flush_bhs(struct pending_exception *pe)
4873 + struct pending_exception *sibling;
4875 + if (list_empty(&pe->siblings))
4876 + return pe->origin_bhs;
4878 + sibling = list_entry(pe->siblings.next,
4879 + struct pending_exception, siblings);
4881 + list_del(&pe->siblings);
4883 + /* FIXME: I think there's a race on SMP machines here, add spin lock */
4884 + queue_buffers(&sibling->origin_bhs, pe->origin_bhs);
4889 +static void pending_complete(struct pending_exception *pe, int success)
4891 + struct exception *e;
4892 + struct dm_snapshot *s = pe->snap;
4893 + struct buffer_head *flush = NULL;
4896 + e = alloc_exception();
4898 + DMWARN("Unable to allocate exception.");
4899 + down_write(&s->lock);
4900 + s->store.drop_snapshot(&s->store);
4902 + flush = __flush_bhs(pe);
4903 + up_write(&s->lock);
4905 + error_buffers(pe->snapshot_bhs);
4910 + * Add a proper exception, and remove the
4911 + * in-flight exception from the list.
4913 + down_write(&s->lock);
4915 + memcpy(e, &pe->e, sizeof(*e));
4916 + insert_exception(&s->complete, e);
4917 + remove_exception(&pe->e);
4918 + flush = __flush_bhs(pe);
4920 + /* Submit any pending write BHs */
4921 + up_write(&s->lock);
4923 + flush_buffers(pe->snapshot_bhs);
4924 + DMDEBUG("Exception completed successfully.");
4926 + /* Notify any interested parties */
4927 + if (s->store.fraction_full) {
4928 + sector_t numerator, denominator;
4931 + s->store.fraction_full(&s->store, &numerator,
4933 + pc = numerator * 100 / denominator;
4935 + if (pc >= s->last_percent + WAKE_UP_PERCENT) {
4936 + dm_table_event(s->table);
4937 + s->last_percent = pc - pc % WAKE_UP_PERCENT;
4942 + /* Read/write error - snapshot is unusable */
4943 + down_write(&s->lock);
4945 + DMERR("Error reading/writing snapshot");
4946 + s->store.drop_snapshot(&s->store);
4948 + remove_exception(&pe->e);
4949 + flush = __flush_bhs(pe);
4950 + up_write(&s->lock);
4952 + error_buffers(pe->snapshot_bhs);
4954 + dm_table_event(s->table);
4955 + DMDEBUG("Exception failed.");
4960 + flush_buffers(flush);
4962 + free_pending_exception(pe);
4965 +static void commit_callback(void *context, int success)
4967 + struct pending_exception *pe = (struct pending_exception *) context;
4968 + pending_complete(pe, success);
4972 + * Called when the copy I/O has finished. kcopyd actually runs
4973 + * this code so don't block.
4975 +static void copy_callback(int read_err, unsigned int write_err, void *context)
4977 + struct pending_exception *pe = (struct pending_exception *) context;
4978 + struct dm_snapshot *s = pe->snap;
4980 + if (read_err || write_err)
4981 + pending_complete(pe, 0);
4984 + /* Update the metadata if we are persistent */
4985 + s->store.commit_exception(&s->store, &pe->e, commit_callback,
4990 + * Dispatches the copy operation to kcopyd.
4992 +static inline void start_copy(struct pending_exception *pe)
4994 + struct dm_snapshot *s = pe->snap;
4995 + struct io_region src, dest;
4996 + kdev_t dev = s->origin->dev;
4997 + int *sizes = blk_size[major(dev)];
4998 + sector_t dev_size = (sector_t) -1;
5003 + /* this is protected by snap->lock */
5006 + if (sizes && sizes[minor(dev)])
5007 + dev_size = sizes[minor(dev)] << 1;
5010 + src.sector = chunk_to_sector(s, pe->e.old_chunk);
5011 + src.count = min(s->chunk_size, dev_size - src.sector);
5013 + dest.dev = s->cow->dev;
5014 + dest.sector = chunk_to_sector(s, pe->e.new_chunk);
5015 + dest.count = src.count;
5017 + /* Hand over to kcopyd */
5018 + kcopyd_copy(s->kcopyd_client,
5019 + &src, 1, &dest, 0, copy_callback, pe);
5023 + * Looks to see if this snapshot already has a pending exception
5024 + * for this chunk, otherwise it allocates a new one and inserts
5025 + * it into the pending table.
5027 +static struct pending_exception *find_pending_exception(struct dm_snapshot *s,
5028 + struct buffer_head *bh)
5030 + struct exception *e;
5031 + struct pending_exception *pe;
5032 + chunk_t chunk = sector_to_chunk(s, bh->b_rsector);
5035 + * Is there a pending exception for this already ?
5037 + e = lookup_exception(&s->pending, chunk);
5039 + /* cast the exception to a pending exception */
5040 + pe = list_entry(e, struct pending_exception, e);
5043 + /* Create a new pending exception */
5044 + pe = alloc_pending_exception();
5045 + pe->e.old_chunk = chunk;
5046 + pe->origin_bhs = pe->snapshot_bhs = NULL;
5047 + INIT_LIST_HEAD(&pe->siblings);
5051 + if (s->store.prepare_exception(&s->store, &pe->e)) {
5052 + free_pending_exception(pe);
5057 + insert_exception(&s->pending, &pe->e);
5063 +static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
5064 + struct buffer_head *bh)
5066 + bh->b_rdev = s->cow->dev;
5067 + bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
5068 + (bh->b_rsector & s->chunk_mask);
5071 +static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw,
5072 + union map_info *map_context)
5074 + struct exception *e;
5075 + struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
5078 + struct pending_exception *pe;
5080 + chunk = sector_to_chunk(s, bh->b_rsector);
5082 + /* Full snapshots are not usable */
5087 + * Write to snapshot - higher level takes care of RW/RO
5088 + * flags so we should only get this if we are
5091 + if (rw == WRITE) {
5093 + down_write(&s->lock);
5095 + /* If the block is already remapped - use that, else remap it */
5096 + e = lookup_exception(&s->complete, chunk);
5098 + remap_exception(s, e, bh);
5101 + pe = find_pending_exception(s, bh);
5104 + s->store.drop_snapshot(&s->store);
5108 + remap_exception(s, &pe->e, bh);
5109 + queue_buffer(&pe->snapshot_bhs, bh);
5115 + up_write(&s->lock);
5119 + * FIXME: this read path scares me because we
5120 + * always use the origin when we have a pending
5121 + * exception. However I can't think of a
5122 + * situation where this is wrong - ejt.
5126 + down_read(&s->lock);
5128 + /* See if it it has been remapped */
5129 + e = lookup_exception(&s->complete, chunk);
5131 + remap_exception(s, e, bh);
5133 + bh->b_rdev = s->origin->dev;
5135 + up_read(&s->lock);
5141 +void snapshot_resume(struct dm_target *ti)
5143 + struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
5145 + if (s->have_metadata)
5148 + if (s->store.read_metadata(&s->store)) {
5149 + down_write(&s->lock);
5151 + up_write(&s->lock);
5154 + s->have_metadata = 1;
5157 +static int snapshot_status(struct dm_target *ti, status_type_t type,
5158 + char *result, unsigned int maxlen)
5160 + struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
5165 + case STATUSTYPE_INFO:
5167 + snprintf(result, maxlen, "Invalid");
5169 + if (snap->store.fraction_full) {
5170 + sector_t numerator, denominator;
5171 + snap->store.fraction_full(&snap->store,
5174 + snprintf(result, maxlen,
5175 + SECTOR_FORMAT "/" SECTOR_FORMAT,
5176 + numerator, denominator);
5179 + snprintf(result, maxlen, "Unknown");
5183 + case STATUSTYPE_TABLE:
5185 + * kdevname returns a static pointer so we need
5186 + * to make private copies if the output is to
5189 + strncpy(cow, dm_kdevname(snap->cow->dev), sizeof(cow));
5190 + strncpy(org, dm_kdevname(snap->origin->dev), sizeof(org));
5191 + snprintf(result, maxlen, "%s %s %c %ld", org, cow,
5192 + snap->type, snap->chunk_size);
5199 +/*-----------------------------------------------------------------
5201 + *---------------------------------------------------------------*/
5202 +static void list_merge(struct list_head *l1, struct list_head *l2)
5204 + struct list_head *l1_n, *l2_p;
5212 + l2_p->next = l1_n;
5213 + l1_n->prev = l2_p;
5216 +static int __origin_write(struct list_head *snapshots, struct buffer_head *bh)
5218 + int r = 1, first = 1;
5219 + struct list_head *sl;
5220 + struct dm_snapshot *snap;
5221 + struct exception *e;
5222 + struct pending_exception *pe, *last = NULL;
5225 + /* Do all the snapshots on this origin */
5226 + list_for_each(sl, snapshots) {
5227 + snap = list_entry(sl, struct dm_snapshot, list);
5229 + /* Only deal with valid snapshots */
5233 + down_write(&snap->lock);
5236 + * Remember, different snapshots can have
5237 + * different chunk sizes.
5239 + chunk = sector_to_chunk(snap, bh->b_rsector);
5242 + * Check exception table to see if block
5243 + * is already remapped in this snapshot
5244 + * and trigger an exception if not.
5246 + e = lookup_exception(&snap->complete, chunk);
5248 + pe = find_pending_exception(snap, bh);
5250 + snap->store.drop_snapshot(&snap->store);
5255 + list_merge(&pe->siblings,
5263 + up_write(&snap->lock);
5267 + * Now that we have a complete pe list we can start the copying.
5272 + down_write(&pe->snap->lock);
5274 + queue_buffer(&pe->origin_bhs, bh);
5276 + up_write(&pe->snap->lock);
5278 + pe = list_entry(pe->siblings.next,
5279 + struct pending_exception, siblings);
5281 + } while (pe != last);
5288 + * Called on a write from the origin driver.
5290 +int do_origin(struct dm_dev *origin, struct buffer_head *bh)
5295 + down_read(&_origins_lock);
5296 + o = __lookup_origin(origin->dev);
5300 + r = __origin_write(&o->snapshots, bh);
5301 + up_read(&_origins_lock);
5307 + * Origin: maps a linear range of a device, with hooks for snapshotting.
5311 + * Construct an origin mapping: <dev_path>
5312 + * The context for an origin is merely a 'struct dm_dev *'
5313 + * pointing to the real device.
5315 +static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
5318 + struct dm_dev *dev;
5321 + ti->error = "dm-origin: incorrect number of arguments";
5325 + r = dm_get_device(ti, argv[0], 0, ti->len,
5326 + dm_table_get_mode(ti->table), &dev);
5328 + ti->error = "Cannot get target device";
5332 + ti->private = dev;
5336 +static void origin_dtr(struct dm_target *ti)
5338 + struct dm_dev *dev = (struct dm_dev *) ti->private;
5339 + dm_put_device(ti, dev);
5342 +static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw,
5343 + union map_info *map_context)
5345 + struct dm_dev *dev = (struct dm_dev *) ti->private;
5346 + bh->b_rdev = dev->dev;
5348 + /* Only tell snapshots if this is a write */
5349 + return (rw == WRITE) ? do_origin(dev, bh) : 1;
5352 +static int origin_status(struct dm_target *ti, status_type_t type, char *result,
5353 + unsigned int maxlen)
5355 + struct dm_dev *dev = (struct dm_dev *) ti->private;
5358 + case STATUSTYPE_INFO:
5362 + case STATUSTYPE_TABLE:
5363 + snprintf(result, maxlen, "%s", dm_kdevname(dev->dev));
5370 +static struct target_type origin_target = {
5371 + name: "snapshot-origin",
5372 + module: THIS_MODULE,
5376 + status: origin_status,
5379 +static struct target_type snapshot_target = {
5381 + module: THIS_MODULE,
5382 + ctr: snapshot_ctr,
5383 + dtr: snapshot_dtr,
5384 + map: snapshot_map,
5385 + resume: snapshot_resume,
5386 + status: snapshot_status,
5389 +int __init dm_snapshot_init(void)
5393 + r = dm_register_target(&snapshot_target);
5395 + DMERR("snapshot target register failed %d", r);
5399 + r = dm_register_target(&origin_target);
5401 + DMERR("Device mapper: Origin: register failed %d\n", r);
5405 + r = init_origin_hash();
5407 + DMERR("init_origin_hash failed.");
5411 + exception_cache = kmem_cache_create("dm-snapshot-ex",
5412 + sizeof(struct exception),
5413 + __alignof__(struct exception),
5415 + if (!exception_cache) {
5416 + DMERR("Couldn't create exception cache.");
5422 + kmem_cache_create("dm-snapshot-in",
5423 + sizeof(struct pending_exception),
5424 + __alignof__(struct pending_exception),
5426 + if (!pending_cache) {
5427 + DMERR("Couldn't create pending cache.");
5432 + pending_pool = mempool_create(128, mempool_alloc_slab,
5433 + mempool_free_slab, pending_cache);
5434 + if (!pending_pool) {
5435 + DMERR("Couldn't create pending pool.");
5443 + kmem_cache_destroy(pending_cache);
5445 + kmem_cache_destroy(exception_cache);
5447 + exit_origin_hash();
5449 + dm_unregister_target(&origin_target);
5451 + dm_unregister_target(&snapshot_target);
5455 +void dm_snapshot_exit(void)
5459 + r = dm_unregister_target(&snapshot_target);
5461 + DMERR("snapshot unregister failed %d", r);
5463 + r = dm_unregister_target(&origin_target);
5465 + DMERR("origin unregister failed %d", r);
5467 + exit_origin_hash();
5468 + mempool_destroy(pending_pool);
5469 + kmem_cache_destroy(pending_cache);
5470 + kmem_cache_destroy(exception_cache);
5472 diff -urN linux-2.4.24.org/drivers/md/dm-snapshot.h linux-2.4.24/drivers/md/dm-snapshot.h
5473 --- linux-2.4.24.org/drivers/md/dm-snapshot.h 1970-01-01 01:00:00.000000000 +0100
5474 +++ linux-2.4.24/drivers/md/dm-snapshot.h 2004-01-18 15:01:29.250465221 +0100
5479 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
5481 + * This file is released under the GPL.
5484 +#ifndef DM_SNAPSHOT_H
5485 +#define DM_SNAPSHOT_H
5488 +#include <linux/blkdev.h>
5490 +struct exception_table {
5491 + uint32_t hash_mask;
5492 + struct list_head *table;
5496 + * The snapshot code deals with largish chunks of the disk at a
5497 + * time. Typically 64k - 256k.
5499 +/* FIXME: can we get away with limiting these to a uint32_t ? */
5500 +typedef sector_t chunk_t;
5503 + * An exception is used where an old chunk of data has been
5504 + * replaced by a new one.
5507 + struct list_head hash_list;
5509 + chunk_t old_chunk;
5510 + chunk_t new_chunk;
5514 + * Abstraction to handle the meta/layout of exception stores (the
5517 +struct exception_store {
5520 + * Destroys this object when you've finished with it.
5522 + void (*destroy) (struct exception_store *store);
5525 + * The target shouldn't read the COW device until this is
5528 + int (*read_metadata) (struct exception_store *store);
5531 + * Find somewhere to store the next exception.
5533 + int (*prepare_exception) (struct exception_store *store,
5534 + struct exception *e);
5537 + * Update the metadata with this exception.
5539 + void (*commit_exception) (struct exception_store *store,
5540 + struct exception *e,
5541 + void (*callback) (void *, int success),
5542 + void *callback_context);
5545 + * The snapshot is invalid, note this in the metadata.
5547 + void (*drop_snapshot) (struct exception_store *store);
5550 + * Return how full the snapshot is.
5552 + void (*fraction_full) (struct exception_store *store,
5553 + sector_t *numerator,
5554 + sector_t *denominator);
5556 + struct dm_snapshot *snap;
5560 +struct dm_snapshot {
5561 + struct rw_semaphore lock;
5562 + struct dm_table *table;
5564 + struct dm_dev *origin;
5565 + struct dm_dev *cow;
5567 + /* List of snapshots per Origin */
5568 + struct list_head list;
5570 + /* Size of data blocks saved - must be a power of 2 */
5571 + chunk_t chunk_size;
5572 + chunk_t chunk_mask;
5573 + chunk_t chunk_shift;
5575 + /* You can't use a snapshot if this is 0 (e.g. if full) */
5577 + int have_metadata;
5579 + /* Used for display of table */
5582 + /* The last percentage we notified */
5585 + struct exception_table pending;
5586 + struct exception_table complete;
5588 + /* The on disk metadata handler */
5589 + struct exception_store store;
5591 + struct kcopyd_client *kcopyd_client;
5595 + * Used by the exception stores to load exceptions hen
5598 +int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
5601 + * Constructor and destructor for the default persistent
5604 +int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
5606 +int dm_create_transient(struct exception_store *store,
5607 + struct dm_snapshot *s, int blocksize);
5610 + * Return the number of sectors in the device.
5612 +static inline sector_t get_dev_size(kdev_t dev)
5616 + sizes = blk_size[MAJOR(dev)];
5618 + return sizes[MINOR(dev)] << 1;
5623 +static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
5625 + return (sector & ~s->chunk_mask) >> s->chunk_shift;
5628 +static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
5630 + return chunk << s->chunk_shift;
5634 diff -urN linux-2.4.24.org/drivers/md/dm-stripe.c linux-2.4.24/drivers/md/dm-stripe.c
5635 --- linux-2.4.24.org/drivers/md/dm-stripe.c 1970-01-01 01:00:00.000000000 +0100
5636 +++ linux-2.4.24/drivers/md/dm-stripe.c 2004-01-18 15:01:13.781711369 +0100
5639 + * Copyright (C) 2001 Sistina Software (UK) Limited.
5641 + * This file is released under the GPL.
5646 +#include <linux/module.h>
5647 +#include <linux/init.h>
5648 +#include <linux/blkdev.h>
5649 +#include <linux/slab.h>
5652 + struct dm_dev *dev;
5653 + sector_t physical_start;
5659 + /* The size of this target / num. stripes */
5660 + uint32_t stripe_width;
5662 + /* stripe chunk size */
5663 + uint32_t chunk_shift;
5664 + sector_t chunk_mask;
5666 + struct stripe stripe[0];
5669 +static inline struct stripe_c *alloc_context(unsigned int stripes)
5673 + if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
5677 + len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
5679 + return kmalloc(len, GFP_KERNEL);
5683 + * Parse a single <dev> <sector> pair
5685 +static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
5686 + unsigned int stripe, char **argv)
5690 + if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
5693 + if (dm_get_device(ti, argv[0], start, sc->stripe_width,
5694 + dm_table_get_mode(ti->table),
5695 + &sc->stripe[stripe].dev))
5698 + sc->stripe[stripe].physical_start = start;
5703 + * FIXME: Nasty function, only present because we can't link
5704 + * against __moddi3 and __divdi3.
5706 + * returns a == b * n
5708 +static int multiple(sector_t a, sector_t b, sector_t *n)
5710 + sector_t acc, prev, i;
5714 + for (acc = b, prev = 0, i = 1;
5716 + prev = acc, acc <<= 1, i <<= 1)
5727 + * Construct a striped mapping.
5728 + * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
5730 +static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
5732 + struct stripe_c *sc;
5735 + uint32_t chunk_size;
5741 + ti->error = "dm-stripe: Not enough arguments";
5745 + stripes = simple_strtoul(argv[0], &end, 10);
5747 + ti->error = "dm-stripe: Invalid stripe count";
5751 + chunk_size = simple_strtoul(argv[1], &end, 10);
5753 + ti->error = "dm-stripe: Invalid chunk_size";
5758 + * chunk_size is a power of two
5760 + if (!chunk_size || (chunk_size & (chunk_size - 1))) {
5761 + ti->error = "dm-stripe: Invalid chunk size";
5765 + if (!multiple(ti->len, stripes, &width)) {
5766 + ti->error = "dm-stripe: Target length not divisable by "
5767 + "number of stripes";
5772 + * Do we have enough arguments for that many stripes ?
5774 + if (argc != (2 + 2 * stripes)) {
5775 + ti->error = "dm-stripe: Not enough destinations specified";
5779 + sc = alloc_context(stripes);
5781 + ti->error = "dm-stripe: Memory allocation for striped context "
5786 + sc->stripes = stripes;
5787 + sc->stripe_width = width;
5789 + sc->chunk_mask = ((sector_t) chunk_size) - 1;
5790 + for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
5792 + sc->chunk_shift--;
5795 + * Get the stripe destinations.
5797 + for (i = 0; i < stripes; i++) {
5800 + r = get_stripe(ti, sc, i, argv);
5802 + ti->error = "dm-stripe: Couldn't parse stripe "
5805 + dm_put_device(ti, sc->stripe[i].dev);
5815 +static void stripe_dtr(struct dm_target *ti)
5818 + struct stripe_c *sc = (struct stripe_c *) ti->private;
5820 + for (i = 0; i < sc->stripes; i++)
5821 + dm_put_device(ti, sc->stripe[i].dev);
5826 +static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
5827 + union map_info *context)
5829 + struct stripe_c *sc = (struct stripe_c *) ti->private;
5831 + sector_t offset = bh->b_rsector - ti->begin;
5832 + uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
5833 + uint32_t stripe = chunk % sc->stripes; /* 32bit modulus */
5834 + chunk = chunk / sc->stripes;
5836 + bh->b_rdev = sc->stripe[stripe].dev->dev;
5837 + bh->b_rsector = sc->stripe[stripe].physical_start +
5838 + (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
5842 +static int stripe_status(struct dm_target *ti, status_type_t type,
5843 + char *result, unsigned int maxlen)
5845 + struct stripe_c *sc = (struct stripe_c *) ti->private;
5850 + case STATUSTYPE_INFO:
5854 + case STATUSTYPE_TABLE:
5855 + offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
5856 + sc->stripes, sc->chunk_mask + 1);
5857 + for (i = 0; i < sc->stripes; i++) {
5859 + snprintf(result + offset, maxlen - offset,
5860 + " %s " SECTOR_FORMAT,
5861 + dm_kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
5862 + sc->stripe[i].physical_start);
5869 +static struct target_type stripe_target = {
5870 + .name = "striped",
5871 + .module = THIS_MODULE,
5872 + .ctr = stripe_ctr,
5873 + .dtr = stripe_dtr,
5874 + .map = stripe_map,
5875 + .status = stripe_status,
5878 +int __init dm_stripe_init(void)
5882 + r = dm_register_target(&stripe_target);
5884 + DMWARN("striped target registration failed");
5889 +void dm_stripe_exit(void)
5891 + if (dm_unregister_target(&stripe_target))
5892 + DMWARN("striped target unregistration failed");
5896 diff -urN linux-2.4.24.org/drivers/md/dm-table.c linux-2.4.24/drivers/md/dm-table.c
5897 --- linux-2.4.24.org/drivers/md/dm-table.c 1970-01-01 01:00:00.000000000 +0100
5898 +++ linux-2.4.24/drivers/md/dm-table.c 2004-01-18 15:01:13.786710320 +0100
5901 + * Copyright (C) 2001 Sistina Software (UK) Limited.
5903 + * This file is released under the GPL.
5908 +#include <linux/module.h>
5909 +#include <linux/vmalloc.h>
5910 +#include <linux/blkdev.h>
5911 +#include <linux/ctype.h>
5912 +#include <linux/slab.h>
5913 +#include <asm/atomic.h>
5915 +#define MAX_DEPTH 16
5916 +#define NODE_SIZE L1_CACHE_BYTES
5917 +#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
5918 +#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
5924 + unsigned int depth;
5925 + unsigned int counts[MAX_DEPTH]; /* in nodes */
5926 + sector_t *index[MAX_DEPTH];
5928 + unsigned int num_targets;
5929 + unsigned int num_allocated;
5931 + struct dm_target *targets;
5934 + * Indicates the rw permissions for the new logical
5935 + * device. This should be a combination of FMODE_READ
5936 + * and FMODE_WRITE.
5940 + /* a list of devices used by this table */
5941 + struct list_head devices;
5943 + /* events get handed up using this callback */
5944 + void (*event_fn)(void *);
5945 + void *event_context;
5949 + * Similar to ceiling(log_size(n))
5951 +static unsigned int int_log(unsigned long n, unsigned long base)
5956 + n = dm_div_up(n, base);
5964 + * Calculate the index of the child node of the n'th node k'th key.
5966 +static inline unsigned int get_child(unsigned int n, unsigned int k)
5968 + return (n * CHILDREN_PER_NODE) + k;
5972 + * Return the n'th node of level l from table t.
5974 +static inline sector_t *get_node(struct dm_table *t, unsigned int l,
5977 + return t->index[l] + (n * KEYS_PER_NODE);
5981 + * Return the highest key that you could lookup from the n'th
5982 + * node on level l of the btree.
5984 +static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
5986 + for (; l < t->depth - 1; l++)
5987 + n = get_child(n, CHILDREN_PER_NODE - 1);
5989 + if (n >= t->counts[l])
5990 + return (sector_t) - 1;
5992 + return get_node(t, l, n)[KEYS_PER_NODE - 1];
5996 + * Fills in a level of the btree based on the highs of the level
5999 +static int setup_btree_index(unsigned int l, struct dm_table *t)
6001 + unsigned int n, k;
6004 + for (n = 0U; n < t->counts[l]; n++) {
6005 + node = get_node(t, l, n);
6007 + for (k = 0U; k < KEYS_PER_NODE; k++)
6008 + node[k] = high(t, l + 1, get_child(n, k));
6014 +void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
6016 + unsigned long size;
6020 + * Check that we're not going to overflow.
6022 + if (nmemb > (ULONG_MAX / elem_size))
6025 + size = nmemb * elem_size;
6026 + addr = vmalloc(size);
6028 + memset(addr, 0, size);
6033 +int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
6035 + struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
6040 + memset(t, 0, sizeof(*t));
6041 + INIT_LIST_HEAD(&t->devices);
6042 + atomic_set(&t->holders, 1);
6044 + num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
6046 + /* Allocate both the target array and offset array at once. */
6047 + t->highs = (sector_t *) dm_vcalloc(sizeof(struct dm_target) +
6048 + sizeof(sector_t), num_targets);
6054 + memset(t->highs, -1, sizeof(*t->highs) * num_targets);
6056 + t->targets = (struct dm_target *) (t->highs + num_targets);
6057 + t->num_allocated = num_targets;
6063 +static void free_devices(struct list_head *devices)
6065 + struct list_head *tmp, *next;
6067 + for (tmp = devices->next; tmp != devices; tmp = next) {
6068 + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
6074 +void table_destroy(struct dm_table *t)
6078 + /* free the indexes (see dm_table_complete) */
6079 + if (t->depth >= 2)
6080 + vfree(t->index[t->depth - 2]);
6082 + /* free the targets */
6083 + for (i = 0; i < t->num_targets; i++) {
6084 + struct dm_target *tgt = t->targets + i;
6086 + if (tgt->type->dtr)
6087 + tgt->type->dtr(tgt);
6089 + dm_put_target_type(tgt->type);
6094 + /* free the device list */
6095 + if (t->devices.next != &t->devices) {
6096 + DMWARN("devices still present during destroy: "
6097 + "dm_table_remove_device calls missing");
6099 + free_devices(&t->devices);
6105 +void dm_table_get(struct dm_table *t)
6107 + atomic_inc(&t->holders);
6110 +void dm_table_put(struct dm_table *t)
6112 + if (atomic_dec_and_test(&t->holders))
6117 + * Convert a device path to a dev_t.
6119 +static int lookup_device(const char *path, kdev_t *dev)
6122 + struct nameidata nd;
6123 + struct inode *inode;
6125 + if (!path_init(path, LOOKUP_FOLLOW, &nd))
6128 + if ((r = path_walk(path, &nd)))
6131 + inode = nd.dentry->d_inode;
6137 + if (!S_ISBLK(inode->i_mode)) {
6142 + *dev = inode->i_rdev;
6145 + path_release(&nd);
6150 + * See if we've already got a device in the list.
6152 +static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
6154 + struct list_head *tmp;
6156 + list_for_each(tmp, l) {
6157 + struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
6158 + if (kdev_same(dd->dev, dev))
6166 + * Open a device so we can use it as a map destination.
6168 +static int open_dev(struct dm_dev *dd)
6173 + dd->bdev = bdget(kdev_t_to_nr(dd->dev));
6177 + return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
6181 + * Close a device that we've been using.
6183 +static void close_dev(struct dm_dev *dd)
6188 + blkdev_put(dd->bdev, BDEV_RAW);
6193 + * If possible (ie. blk_size[major] is set), this checks an area
6194 + * of a destination device is valid.
6196 +static int check_device_area(kdev_t dev, sector_t start, sector_t len)
6199 + sector_t dev_size;
6201 + if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
6202 + /* we don't know the device details,
6203 + * so give the benefit of the doubt */
6206 + /* convert to 512-byte sectors */
6209 + return ((start < dev_size) && (len <= (dev_size - start)));
6213 + * This upgrades the mode on an already open dm_dev. Being
6214 + * careful to leave things as they were if we fail to reopen the
6217 +static int upgrade_mode(struct dm_dev *dd, int new_mode)
6220 + struct dm_dev dd_copy;
6222 + memcpy(&dd_copy, dd, sizeof(dd_copy));
6224 + dd->mode |= new_mode;
6228 + close_dev(&dd_copy);
6230 + memcpy(dd, &dd_copy, sizeof(dd_copy));
6236 + * Add a device to the list, or just increment the usage count if
6237 + * it's already present.
6239 +int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
6240 + sector_t len, int mode, struct dm_dev **result)
6244 + struct dm_dev *dd;
6245 + unsigned major, minor;
6246 + struct dm_table *t = ti->table;
6251 + if (sscanf(path, "%u:%u", &major, &minor) == 2) {
6252 + /* Extract the major/minor numbers */
6253 + dev = mk_kdev(major, minor);
6255 + /* convert the path to a device */
6256 + if ((r = lookup_device(path, &dev)))
6260 + dd = find_device(&t->devices, dev);
6262 + dd = kmalloc(sizeof(*dd), GFP_KERNEL);
6270 + if ((r = open_dev(dd))) {
6275 + atomic_set(&dd->count, 0);
6276 + list_add(&dd->list, &t->devices);
6278 + } else if (dd->mode != (mode | dd->mode)) {
6279 + r = upgrade_mode(dd, mode);
6283 + atomic_inc(&dd->count);
6285 + if (!check_device_area(dd->dev, start, len)) {
6286 + DMWARN("device %s too small for target", path);
6287 + dm_put_device(ti, dd);
6297 + * Decrement a devices use count and remove it if neccessary.
6299 +void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
6301 + if (atomic_dec_and_test(&dd->count)) {
6303 + list_del(&dd->list);
6309 + * Checks to see if the target joins onto the end of the table.
6311 +static int adjoin(struct dm_table *table, struct dm_target *ti)
6313 + struct dm_target *prev;
6315 + if (!table->num_targets)
6316 + return !ti->begin;
6318 + prev = &table->targets[table->num_targets - 1];
6319 + return (ti->begin == (prev->begin + prev->len));
6323 + * Used to dynamically allocate the arg array.
6325 +static char **realloc_argv(unsigned *array_size, char **old_argv)
6328 + unsigned new_size;
6330 + new_size = *array_size ? *array_size * 2 : 64;
6331 + argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
6333 + memcpy(argv, old_argv, *array_size * sizeof(*argv));
6334 + *array_size = new_size;
6342 + * Destructively splits up the argument list to pass to ctr.
6344 +static int split_args(int *argc, char ***argvp, char *input)
6346 + char *start, *end = input, *out, **argv = NULL;
6347 + unsigned array_size = 0;
6350 + argv = realloc_argv(&array_size, argv);
6357 + /* Skip whitespace */
6358 + while (*start && isspace(*start))
6362 + break; /* success, we hit the end */
6364 + /* 'out' is used to remove any back-quotes */
6365 + end = out = start;
6367 + /* Everything apart from '\0' can be quoted */
6368 + if (*end == '\\' && *(end + 1)) {
6369 + *out++ = *(end + 1);
6374 + if (isspace(*end))
6375 + break; /* end of token */
6380 + /* have we already filled the array ? */
6381 + if ((*argc + 1) > array_size) {
6382 + argv = realloc_argv(&array_size, argv);
6387 + /* we know this is whitespace */
6391 + /* terminate the string and put it in the array */
6393 + argv[*argc] = start;
6401 +int dm_table_add_target(struct dm_table *t, const char *type,
6402 + sector_t start, sector_t len, char *params)
6404 + int r = -EINVAL, argc;
6406 + struct dm_target *tgt;
6408 + if (t->num_targets >= t->num_allocated)
6411 + tgt = t->targets + t->num_targets;
6412 + memset(tgt, 0, sizeof(*tgt));
6414 + tgt->type = dm_get_target_type(type);
6416 + tgt->error = "unknown target type";
6421 + tgt->begin = start;
6423 + tgt->error = "Unknown error";
6426 + * Does this target adjoin the previous one ?
6428 + if (!adjoin(t, tgt)) {
6429 + tgt->error = "Gap in table";
6434 + r = split_args(&argc, &argv, params);
6436 + tgt->error = "couldn't split parameters (insufficient memory)";
6440 + r = tgt->type->ctr(tgt, argc, argv);
6445 + t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
6449 + printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
6450 + dm_put_target_type(tgt->type);
6454 +static int setup_indexes(struct dm_table *t)
6457 + unsigned int total = 0;
6458 + sector_t *indexes;
6460 + /* allocate the space for *all* the indexes */
6461 + for (i = t->depth - 2; i >= 0; i--) {
6462 + t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
6463 + total += t->counts[i];
6466 + indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
6470 + /* set up internal nodes, bottom-up */
6471 + for (i = t->depth - 2, total = 0; i >= 0; i--) {
6472 + t->index[i] = indexes;
6473 + indexes += (KEYS_PER_NODE * t->counts[i]);
6474 + setup_btree_index(i, t);
6481 + * Builds the btree to index the map.
6483 +int dm_table_complete(struct dm_table *t)
6486 + unsigned int leaf_nodes;
6488 + /* how many indexes will the btree have ? */
6489 + leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
6490 + t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
6492 + /* leaf layer has already been set up */
6493 + t->counts[t->depth - 1] = leaf_nodes;
6494 + t->index[t->depth - 1] = t->highs;
6496 + if (t->depth >= 2)
6497 + r = setup_indexes(t);
6502 +static spinlock_t _event_lock = SPIN_LOCK_UNLOCKED;
6503 +void dm_table_event_callback(struct dm_table *t,
6504 + void (*fn)(void *), void *context)
6506 + spin_lock_irq(&_event_lock);
6508 + t->event_context = context;
6509 + spin_unlock_irq(&_event_lock);
6512 +void dm_table_event(struct dm_table *t)
6514 + spin_lock(&_event_lock);
6516 + t->event_fn(t->event_context);
6517 + spin_unlock(&_event_lock);
6520 +sector_t dm_table_get_size(struct dm_table *t)
6522 + return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
6525 +struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
6527 + if (index > t->num_targets)
6530 + return t->targets + index;
6534 + * Search the btree for the correct target.
6536 +struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
6538 + unsigned int l, n = 0, k = 0;
6541 + for (l = 0; l < t->depth; l++) {
6542 + n = get_child(n, k);
6543 + node = get_node(t, l, n);
6545 + for (k = 0; k < KEYS_PER_NODE; k++)
6546 + if (node[k] >= sector)
6550 + return &t->targets[(KEYS_PER_NODE * n) + k];
6553 +unsigned int dm_table_get_num_targets(struct dm_table *t)
6555 + return t->num_targets;
6558 +struct list_head *dm_table_get_devices(struct dm_table *t)
6560 + return &t->devices;
6563 +int dm_table_get_mode(struct dm_table *t)
6568 +void dm_table_suspend_targets(struct dm_table *t)
6572 + for (i = 0; i < t->num_targets; i++) {
6573 + struct dm_target *ti = t->targets + i;
6575 + if (ti->type->suspend)
6576 + ti->type->suspend(ti);
6580 +void dm_table_resume_targets(struct dm_table *t)
6584 + for (i = 0; i < t->num_targets; i++) {
6585 + struct dm_target *ti = t->targets + i;
6587 + if (ti->type->resume)
6588 + ti->type->resume(ti);
6592 +EXPORT_SYMBOL(dm_get_device);
6593 +EXPORT_SYMBOL(dm_put_device);
6594 +EXPORT_SYMBOL(dm_table_event);
6595 +EXPORT_SYMBOL(dm_table_get_mode);
6596 diff -urN linux-2.4.24.org/drivers/md/dm-target.c linux-2.4.24/drivers/md/dm-target.c
6597 --- linux-2.4.24.org/drivers/md/dm-target.c 1970-01-01 01:00:00.000000000 +0100
6598 +++ linux-2.4.24/drivers/md/dm-target.c 2004-01-18 15:01:13.789709690 +0100
6601 + * Copyright (C) 2001 Sistina Software (UK) Limited
6603 + * This file is released under the GPL.
6608 +#include <linux/module.h>
6609 +#include <linux/kmod.h>
6610 +#include <linux/slab.h>
6612 +struct tt_internal {
6613 + struct target_type tt;
6615 + struct list_head list;
6619 +static LIST_HEAD(_targets);
6620 +static DECLARE_RWSEM(_lock);
6622 +#define DM_MOD_NAME_SIZE 32
6624 +static inline struct tt_internal *__find_target_type(const char *name)
6626 + struct list_head *tih;
6627 + struct tt_internal *ti;
6629 + list_for_each(tih, &_targets) {
6630 + ti = list_entry(tih, struct tt_internal, list);
6632 + if (!strcmp(name, ti->tt.name))
6639 +static struct tt_internal *get_target_type(const char *name)
6641 + struct tt_internal *ti;
6643 + down_read(&_lock);
6644 + ti = __find_target_type(name);
6647 + if (ti->use == 0 && ti->tt.module)
6648 + __MOD_INC_USE_COUNT(ti->tt.module);
6656 +static void load_module(const char *name)
6658 + char module_name[DM_MOD_NAME_SIZE] = "dm-";
6660 + /* Length check for strcat() below */
6661 + if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
6664 + strcat(module_name, name);
6665 + request_module(module_name);
6668 +struct target_type *dm_get_target_type(const char *name)
6670 + struct tt_internal *ti = get_target_type(name);
6673 + load_module(name);
6674 + ti = get_target_type(name);
6677 + return ti ? &ti->tt : NULL;
6680 +void dm_put_target_type(struct target_type *t)
6682 + struct tt_internal *ti = (struct tt_internal *) t;
6684 + down_read(&_lock);
6685 + if (--ti->use == 0 && ti->tt.module)
6686 + __MOD_DEC_USE_COUNT(ti->tt.module);
6695 +static struct tt_internal *alloc_target(struct target_type *t)
6697 + struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
6700 + memset(ti, 0, sizeof(*ti));
6707 +int dm_register_target(struct target_type *t)
6710 + struct tt_internal *ti = alloc_target(t);
6715 + down_write(&_lock);
6716 + if (__find_target_type(t->name)) {
6720 + list_add(&ti->list, &_targets);
6726 +int dm_unregister_target(struct target_type *t)
6728 + struct tt_internal *ti;
6730 + down_write(&_lock);
6731 + if (!(ti = __find_target_type(t->name))) {
6741 + list_del(&ti->list);
6749 + * io-err: always fails an io, useful for bringing
6750 + * up LVs that have holes in them.
6752 +static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
6757 +static void io_err_dtr(struct dm_target *ti)
6762 +static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
6763 + union map_info *map_context)
6768 +static struct target_type error_target = {
6770 + .ctr = io_err_ctr,
6771 + .dtr = io_err_dtr,
6772 + .map = io_err_map,
6775 +int dm_target_init(void)
6777 + return dm_register_target(&error_target);
6780 +void dm_target_exit(void)
6782 + if (dm_unregister_target(&error_target))
6783 + DMWARN("error target unregistration failed");
6786 +EXPORT_SYMBOL(dm_register_target);
6787 +EXPORT_SYMBOL(dm_unregister_target);
6788 diff -urN linux-2.4.24.org/drivers/md/kcopyd.c linux-2.4.24/drivers/md/kcopyd.c
6789 --- linux-2.4.24.org/drivers/md/kcopyd.c 1970-01-01 01:00:00.000000000 +0100
6790 +++ linux-2.4.24/drivers/md/kcopyd.c 2004-01-18 15:01:25.797189646 +0100
6793 + * Copyright (C) 2002 Sistina Software (UK) Limited.
6795 + * This file is released under the GPL.
6798 +#include <asm/atomic.h>
6800 +#include <linux/blkdev.h>
6801 +#include <linux/config.h>
6802 +#include <linux/device-mapper.h>
6803 +#include <linux/fs.h>
6804 +#include <linux/init.h>
6805 +#include <linux/list.h>
6806 +#include <linux/locks.h>
6807 +#include <linux/mempool.h>
6808 +#include <linux/module.h>
6809 +#include <linux/pagemap.h>
6810 +#include <linux/slab.h>
6811 +#include <linux/vmalloc.h>
6813 +#include "kcopyd.h"
6814 +#include "dm-daemon.h"
6816 +/* FIXME: this is only needed for the DMERR macros */
6819 +static struct dm_daemon _kcopyd;
6821 +#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
6822 +#define SUB_JOB_SIZE 128
6823 +#define PAGES_PER_SUB_JOB (SUB_JOB_SIZE / SECTORS_PER_PAGE)
6824 +#define SUB_JOB_COUNT 8
6826 +/*-----------------------------------------------------------------
6827 + * Each kcopyd client has its own little pool of preallocated
6828 + * pages for kcopyd io.
6829 + *---------------------------------------------------------------*/
6830 +struct kcopyd_client {
6831 + struct list_head list;
6834 + struct list_head pages;
6835 + unsigned int nr_pages;
6836 + unsigned int nr_free_pages;
6837 + unsigned int max_split;
6840 +static inline void __push_page(struct kcopyd_client *kc, struct page *p)
6842 + list_add(&p->list, &kc->pages);
6843 + kc->nr_free_pages++;
6846 +static inline struct page *__pop_page(struct kcopyd_client *kc)
6850 + p = list_entry(kc->pages.next, struct page, list);
6851 + list_del(&p->list);
6852 + kc->nr_free_pages--;
6857 +static int kcopyd_get_pages(struct kcopyd_client *kc,
6858 + unsigned int nr, struct list_head *pages)
6861 + INIT_LIST_HEAD(pages);
6863 + spin_lock(&kc->lock);
6864 + if (kc->nr_free_pages < nr) {
6865 + spin_unlock(&kc->lock);
6870 + p = __pop_page(kc);
6871 + list_add(&p->list, pages);
6873 + spin_unlock(&kc->lock);
6878 +static void kcopyd_put_pages(struct kcopyd_client *kc, struct list_head *pages)
6880 + struct list_head *tmp, *tmp2;
6882 + spin_lock(&kc->lock);
6883 + list_for_each_safe (tmp, tmp2, pages)
6884 + __push_page(kc, list_entry(tmp, struct page, list));
6885 + spin_unlock(&kc->lock);
6889 + * These three functions resize the page pool.
6891 +static void release_pages(struct list_head *pages)
6894 + struct list_head *tmp, *tmp2;
6896 + list_for_each_safe (tmp, tmp2, pages) {
6897 + p = list_entry(tmp, struct page, list);
6903 +static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
6909 + for (i = 0; i < nr; i++) {
6910 + p = alloc_page(GFP_KERNEL);
6912 + release_pages(&new);
6917 + list_add(&p->list, &new);
6920 + kcopyd_put_pages(kc, &new);
6921 + kc->nr_pages += nr;
6922 + kc->max_split = kc->nr_pages / PAGES_PER_SUB_JOB;
6923 + if (kc->max_split > SUB_JOB_COUNT)
6924 + kc->max_split = SUB_JOB_COUNT;
6929 +static void client_free_pages(struct kcopyd_client *kc)
6931 + BUG_ON(kc->nr_free_pages != kc->nr_pages);
6932 + release_pages(&kc->pages);
6933 + kc->nr_free_pages = kc->nr_pages = 0;
6936 +/*-----------------------------------------------------------------
6937 + * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
6938 + * for this reason we use a mempool to prevent the client from
6939 + * ever having to do io (which could cause a deadlock).
6940 + *---------------------------------------------------------------*/
6941 +struct kcopyd_job {
6942 + struct kcopyd_client *kc;
6943 + struct list_head list;
6944 + unsigned int flags;
6947 + * Error state of the job.
6950 + unsigned int write_err;
6953 + * Either READ or WRITE
6956 + struct io_region source;
6959 + * The destinations for the transfer.
6961 + unsigned int num_dests;
6962 + struct io_region dests[KCOPYD_MAX_REGIONS];
6965 + unsigned int nr_pages;
6966 + struct list_head pages;
6969 + * Set this to ensure you are notified when the job has
6970 + * completed. 'context' is for callback to use.
6972 + kcopyd_notify_fn fn;
6976 + * These fields are only used if the job has been split
6977 + * into more manageable parts.
6979 + struct semaphore lock;
6980 + atomic_t sub_jobs;
6981 + sector_t progress;
6984 +/* FIXME: this should scale with the number of pages */
6985 +#define MIN_JOBS 512
6987 +static kmem_cache_t *_job_cache;
6988 +static mempool_t *_job_pool;
6991 + * We maintain three lists of jobs:
6993 + * i) jobs waiting for pages
6994 + * ii) jobs that have pages, and are waiting for the io to be issued.
6995 + * iii) jobs that have completed.
6997 + * All three of these are protected by job_lock.
6999 +static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
7001 +static LIST_HEAD(_complete_jobs);
7002 +static LIST_HEAD(_io_jobs);
7003 +static LIST_HEAD(_pages_jobs);
7005 +static int jobs_init(void)
7007 + INIT_LIST_HEAD(&_complete_jobs);
7008 + INIT_LIST_HEAD(&_io_jobs);
7009 + INIT_LIST_HEAD(&_pages_jobs);
7011 + _job_cache = kmem_cache_create("kcopyd-jobs",
7012 + sizeof(struct kcopyd_job),
7013 + __alignof__(struct kcopyd_job),
7018 + _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
7019 + mempool_free_slab, _job_cache);
7021 + kmem_cache_destroy(_job_cache);
7028 +static void jobs_exit(void)
7030 + BUG_ON(!list_empty(&_complete_jobs));
7031 + BUG_ON(!list_empty(&_io_jobs));
7032 + BUG_ON(!list_empty(&_pages_jobs));
7034 + mempool_destroy(_job_pool);
7035 + kmem_cache_destroy(_job_cache);
7039 + * Functions to push and pop a job onto the head of a given job
7042 +static inline struct kcopyd_job *pop(struct list_head *jobs)
7044 + struct kcopyd_job *job = NULL;
7045 + unsigned long flags;
7047 + spin_lock_irqsave(&_job_lock, flags);
7049 + if (!list_empty(jobs)) {
7050 + job = list_entry(jobs->next, struct kcopyd_job, list);
7051 + list_del(&job->list);
7053 + spin_unlock_irqrestore(&_job_lock, flags);
7058 +static inline void push(struct list_head *jobs, struct kcopyd_job *job)
7060 + unsigned long flags;
7062 + spin_lock_irqsave(&_job_lock, flags);
7063 + list_add_tail(&job->list, jobs);
7064 + spin_unlock_irqrestore(&_job_lock, flags);
7068 + * These three functions process 1 item from the corresponding
7074 + * > 0: can't process yet.
7076 +static int run_complete_job(struct kcopyd_job *job)
7078 + void *context = job->context;
7079 + int read_err = job->read_err;
7080 + unsigned int write_err = job->write_err;
7081 + kcopyd_notify_fn fn = job->fn;
7083 + kcopyd_put_pages(job->kc, &job->pages);
7084 + mempool_free(job, _job_pool);
7085 + fn(read_err, write_err, context);
7089 +static void complete_io(unsigned int error, void *context)
7091 + struct kcopyd_job *job = (struct kcopyd_job *) context;
7094 + if (job->rw == WRITE)
7095 + job->write_err &= error;
7097 + job->read_err = 1;
7099 + if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
7100 + push(&_complete_jobs, job);
7101 + dm_daemon_wake(&_kcopyd);
7106 + if (job->rw == WRITE)
7107 + push(&_complete_jobs, job);
7111 + push(&_io_jobs, job);
7114 + dm_daemon_wake(&_kcopyd);
7118 + * Request io on as many buffer heads as we can currently get for
7119 + * a particular job.
7121 +static int run_io_job(struct kcopyd_job *job)
7125 + if (job->rw == READ)
7126 + r = dm_io_async(1, &job->source, job->rw,
7127 + list_entry(job->pages.next, struct page, list),
7128 + job->offset, complete_io, job);
7131 + r = dm_io_async(job->num_dests, job->dests, job->rw,
7132 + list_entry(job->pages.next, struct page, list),
7133 + job->offset, complete_io, job);
7138 +static int run_pages_job(struct kcopyd_job *job)
7142 + job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
7143 + SECTORS_PER_PAGE);
7144 + r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
7146 + /* this job is ready for io */
7147 + push(&_io_jobs, job);
7152 + /* can't complete now */
7159 + * Run through a list for as long as possible. Returns the count
7160 + * of successful jobs.
7162 +static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
7164 + struct kcopyd_job *job;
7167 + while ((job = pop(jobs))) {
7172 + /* error this rogue job */
7173 + if (job->rw == WRITE)
7174 + job->write_err = (unsigned int) -1;
7176 + job->read_err = 1;
7177 + push(&_complete_jobs, job);
7183 + * We couldn't service this job ATM, so
7184 + * push this job back onto the list.
7197 + * kcopyd does this every time it's woken up.
7199 +static void do_work(void)
7202 + * The order that these are called is *very* important.
7203 + * complete jobs can free some pages for pages jobs.
7204 + * Pages jobs when successful will jump onto the io jobs
7205 + * list. io jobs call wake when they complete and it all
7208 + process_jobs(&_complete_jobs, run_complete_job);
7209 + process_jobs(&_pages_jobs, run_pages_job);
7210 + process_jobs(&_io_jobs, run_io_job);
7211 + run_task_queue(&tq_disk);
7215 + * If we are copying a small region we just dispatch a single job
7216 + * to do the copy, otherwise the io has to be split up into many
7219 +static void dispatch_job(struct kcopyd_job *job)
7221 + push(&_pages_jobs, job);
7222 + dm_daemon_wake(&_kcopyd);
7225 +static void segment_complete(int read_err,
7226 + unsigned int write_err, void *context)
7228 + /* FIXME: tidy this function */
7229 + sector_t progress = 0;
7230 + sector_t count = 0;
7231 + struct kcopyd_job *job = (struct kcopyd_job *) context;
7235 + /* update the error */
7237 + job->read_err = 1;
7240 + job->write_err &= write_err;
7243 + * Only dispatch more work if there hasn't been an error.
7245 + if ((!job->read_err && !job->write_err) ||
7246 + test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
7247 + /* get the next chunk of work */
7248 + progress = job->progress;
7249 + count = job->source.count - progress;
7251 + if (count > SUB_JOB_SIZE)
7252 + count = SUB_JOB_SIZE;
7254 + job->progress += count;
7261 + struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO);
7263 + memcpy(sub_job, job, sizeof(*job));
7264 + sub_job->source.sector += progress;
7265 + sub_job->source.count = count;
7267 + for (i = 0; i < job->num_dests; i++) {
7268 + sub_job->dests[i].sector += progress;
7269 + sub_job->dests[i].count = count;
7272 + sub_job->fn = segment_complete;
7273 + sub_job->context = job;
7274 + dispatch_job(sub_job);
7276 + } else if (atomic_dec_and_test(&job->sub_jobs)) {
7279 + * To avoid a race we must keep the job around
7280 + * until after the notify function has completed.
7281 + * Otherwise the client may try and stop the job
7282 + * after we've completed.
7284 + job->fn(read_err, write_err, job->context);
7285 + mempool_free(job, _job_pool);
7290 + * Create some little jobs that will do the move between
7293 +static void split_job(struct kcopyd_job *job)
7297 + nr = dm_div_up(job->source.count, SUB_JOB_SIZE);
7298 + if (nr > job->kc->max_split)
7299 + nr = job->kc->max_split;
7301 + atomic_set(&job->sub_jobs, nr);
7303 + segment_complete(0, 0u, job);
7306 +int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
7307 + unsigned int num_dests, struct io_region *dests,
7308 + unsigned int flags, kcopyd_notify_fn fn, void *context)
7310 + struct kcopyd_job *job;
7313 + * Allocate a new job.
7315 + job = mempool_alloc(_job_pool, GFP_NOIO);
7318 + * set up for the read.
7321 + job->flags = flags;
7322 + job->read_err = 0;
7323 + job->write_err = 0;
7326 + memcpy(&job->source, from, sizeof(*from));
7328 + job->num_dests = num_dests;
7329 + memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
7332 + job->nr_pages = 0;
7333 + INIT_LIST_HEAD(&job->pages);
7336 + job->context = context;
7338 + if (job->source.count < SUB_JOB_SIZE)
7339 + dispatch_job(job);
7342 + init_MUTEX(&job->lock);
7343 + job->progress = 0;
7351 + * Cancels a kcopyd job, eg. someone might be deactivating a
7354 +int kcopyd_cancel(struct kcopyd_job *job, int block)
7356 + /* FIXME: finish */
7360 +/*-----------------------------------------------------------------
7362 + *---------------------------------------------------------------*/
7363 +static DECLARE_MUTEX(_client_lock);
7364 +static LIST_HEAD(_clients);
7366 +static int client_add(struct kcopyd_client *kc)
7368 + down(&_client_lock);
7369 + list_add(&kc->list, &_clients);
7370 + up(&_client_lock);
7374 +static void client_del(struct kcopyd_client *kc)
7376 + down(&_client_lock);
7377 + list_del(&kc->list);
7378 + up(&_client_lock);
7381 +int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
7384 + struct kcopyd_client *kc;
7386 + if (nr_pages * SECTORS_PER_PAGE < SUB_JOB_SIZE) {
7387 + DMERR("kcopyd client requested %u pages: minimum is %lu",
7388 + nr_pages, SUB_JOB_SIZE / SECTORS_PER_PAGE);
7392 + kc = kmalloc(sizeof(*kc), GFP_KERNEL);
7396 + kc->lock = SPIN_LOCK_UNLOCKED;
7397 + INIT_LIST_HEAD(&kc->pages);
7398 + kc->nr_pages = kc->nr_free_pages = 0;
7399 + r = client_alloc_pages(kc, nr_pages);
7405 + r = dm_io_get(nr_pages);
7407 + client_free_pages(kc);
7412 + r = client_add(kc);
7414 + dm_io_put(nr_pages);
7415 + client_free_pages(kc);
7424 +void kcopyd_client_destroy(struct kcopyd_client *kc)
7426 + dm_io_put(kc->nr_pages);
7427 + client_free_pages(kc);
7433 +int __init kcopyd_init(void)
7441 + r = dm_daemon_start(&_kcopyd, "kcopyd", do_work);
7448 +void kcopyd_exit(void)
7451 + dm_daemon_stop(&_kcopyd);
7454 +EXPORT_SYMBOL(kcopyd_client_create);
7455 +EXPORT_SYMBOL(kcopyd_client_destroy);
7456 +EXPORT_SYMBOL(kcopyd_copy);
7457 +EXPORT_SYMBOL(kcopyd_cancel);
7458 diff -urN linux-2.4.24.org/drivers/md/kcopyd.h linux-2.4.24/drivers/md/kcopyd.h
7459 --- linux-2.4.24.org/drivers/md/kcopyd.h 1970-01-01 01:00:00.000000000 +0100
7460 +++ linux-2.4.24/drivers/md/kcopyd.h 2004-01-18 15:01:25.800189017 +0100
7463 + * Copyright (C) 2001 Sistina Software
7465 + * This file is released under the GPL.
7468 +#ifndef DM_KCOPYD_H
7469 +#define DM_KCOPYD_H
7472 + * Needed for the definition of offset_t.
7474 +#include <linux/device-mapper.h>
7475 +#include <linux/iobuf.h>
7479 +int kcopyd_init(void);
7480 +void kcopyd_exit(void);
7482 +/* FIXME: make this configurable */
7483 +#define KCOPYD_MAX_REGIONS 8
7485 +#define KCOPYD_IGNORE_ERROR 1
7488 + * To use kcopyd you must first create a kcopyd client object.
7490 +struct kcopyd_client;
7491 +int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result);
7492 +void kcopyd_client_destroy(struct kcopyd_client *kc);
7495 + * Submit a copy job to kcopyd. This is built on top of the
7496 + * previous three fns.
7498 + * read_err is a boolean,
7499 + * write_err is a bitset, with 1 bit for each destination region
7501 +typedef void (*kcopyd_notify_fn)(int read_err,
7502 + unsigned int write_err, void *context);
7504 +int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
7505 + unsigned int num_dests, struct io_region *dests,
7506 + unsigned int flags, kcopyd_notify_fn fn, void *context);
7509 diff -urN linux-2.4.24.org/drivers/md/Makefile linux-2.4.24/drivers/md/Makefile
7510 --- linux-2.4.24.org/drivers/md/Makefile 2004-01-18 14:58:09.300663064 +0100
7511 +++ linux-2.4.24/drivers/md/Makefile 2004-01-18 15:01:29.209473819 +0100
7516 -export-objs := md.o xor.o
7517 -list-multi := lvm-mod.o
7518 +export-objs := md.o xor.o dm-table.o dm-target.o dm.o dm-daemon.o \
7521 +list-multi := lvm-mod.o dm-mod.o dm-mirror-mod.o
7522 lvm-mod-objs := lvm.o lvm-snap.o lvm-fs.o
7523 +dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
7524 + dm-ioctl.o dm-daemon.o kcopyd.o dm-io.o dm-snapshot.o \
7525 + dm-exception-store.o
7527 # Note: link order is important. All raid personalities
7528 # and xor.o must come before md.o, as they each initialise
7529 # themselves, and md.o may use the personalities when it
7532 -obj-$(CONFIG_MD_LINEAR) += linear.o
7533 -obj-$(CONFIG_MD_RAID0) += raid0.o
7534 -obj-$(CONFIG_MD_RAID1) += raid1.o
7535 -obj-$(CONFIG_MD_RAID5) += raid5.o xor.o
7536 -obj-$(CONFIG_MD_MULTIPATH) += multipath.o
7537 -obj-$(CONFIG_BLK_DEV_MD) += md.o
7538 -obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
7539 +obj-$(CONFIG_MD_LINEAR) += linear.o
7540 +obj-$(CONFIG_MD_RAID0) += raid0.o
7541 +obj-$(CONFIG_MD_RAID1) += raid1.o
7542 +obj-$(CONFIG_MD_RAID5) += raid5.o xor.o
7543 +obj-$(CONFIG_MD_MULTIPATH) += multipath.o
7544 +obj-$(CONFIG_BLK_DEV_MD) += md.o
7546 +obj-$(CONFIG_BLK_DEV_LVM) += lvm-mod.o
7548 +obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
7550 include $(TOPDIR)/Rules.make
7552 lvm-mod.o: $(lvm-mod-objs)
7553 $(LD) -r -o $@ $(lvm-mod-objs)
7555 +dm-mod.o: $(dm-mod-objs)
7556 + $(LD) -r -o $@ $(dm-mod-objs)
7557 diff -urN linux-2.4.24.org/include/linux/device-mapper.h linux-2.4.24/include/linux/device-mapper.h
7558 --- linux-2.4.24.org/include/linux/device-mapper.h 1970-01-01 01:00:00.000000000 +0100
7559 +++ linux-2.4.24/include/linux/device-mapper.h 2004-01-18 15:01:13.800707381 +0100
7562 + * Copyright (C) 2001 Sistina Software (UK) Limited.
7564 + * This file is released under the LGPL.
7567 +#ifndef _LINUX_DEVICE_MAPPER_H
7568 +#define _LINUX_DEVICE_MAPPER_H
7570 +typedef unsigned long sector_t;
7576 +typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
7580 + unsigned long long ll;
7584 + * In the constructor the target parameter will already have the
7585 + * table, type, begin and len fields filled in.
7587 +typedef int (*dm_ctr_fn) (struct dm_target * target, unsigned int argc,
7591 + * The destructor doesn't need to free the dm_target, just
7592 + * anything hidden ti->private.
7594 +typedef void (*dm_dtr_fn) (struct dm_target * ti);
7597 + * The map function must return:
7599 + * = 0: The target will handle the io by resubmitting it later
7600 + * > 0: simple remap complete
7602 +typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
7603 + int rw, union map_info *map_context);
7607 + * < 0 : error (currently ignored)
7608 + * 0 : ended successfully
7609 + * 1 : for some reason the io has still not completed (eg,
7610 + * multipath target might want to requeue a failed io).
7612 +typedef int (*dm_endio_fn) (struct dm_target * ti,
7613 + struct buffer_head * bh, int rw, int error,
7614 + union map_info *map_context);
7615 +typedef void (*dm_suspend_fn) (struct dm_target *ti);
7616 +typedef void (*dm_resume_fn) (struct dm_target *ti);
7617 +typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
7618 + char *result, unsigned int maxlen);
7620 +void dm_error(const char *message);
7623 + * Constructors should call these functions to ensure destination devices
7624 + * are opened/closed correctly.
7625 + * FIXME: too many arguments.
7627 +int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
7628 + sector_t len, int mode, struct dm_dev **result);
7629 +void dm_put_device(struct dm_target *ti, struct dm_dev *d);
7632 + * Information about a target type
7634 +struct target_type {
7636 + struct module *module;
7640 + dm_endio_fn end_io;
7641 + dm_suspend_fn suspend;
7642 + dm_resume_fn resume;
7643 + dm_status_fn status;
7647 + struct dm_table *table;
7648 + struct target_type *type;
7650 + /* target limits */
7654 + /* target specific data */
7657 + /* Used to provide an error string from the ctr */
7661 +int dm_register_target(struct target_type *t);
7662 +int dm_unregister_target(struct target_type *t);
7664 +#endif /* _LINUX_DEVICE_MAPPER_H */
7665 diff -urN linux-2.4.24.org/include/linux/dm-ioctl.h linux-2.4.24/include/linux/dm-ioctl.h
7666 --- linux-2.4.24.org/include/linux/dm-ioctl.h 1970-01-01 01:00:00.000000000 +0100
7667 +++ linux-2.4.24/include/linux/dm-ioctl.h 2004-01-18 15:01:17.793869131 +0100
7670 + * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
7672 + * This file is released under the LGPL.
7675 +#ifndef _LINUX_DM_IOCTL_H
7676 +#define _LINUX_DM_IOCTL_H
7678 +#include <linux/types.h>
7680 +#define DM_DIR "mapper" /* Slashes not supported */
7681 +#define DM_MAX_TYPE_NAME 16
7682 +#define DM_NAME_LEN 128
7683 +#define DM_UUID_LEN 129
7686 + * A traditional ioctl interface for the device mapper.
7688 + * Each device can have two tables associated with it, an
7689 + * 'active' table which is the one currently used by io passing
7690 + * through the device, and an 'inactive' one which is a table
7691 + * that is being prepared as a replacement for the 'active' one.
7694 + * Just get the version information for the ioctl interface.
7697 + * Remove all dm devices, destroy all tables. Only really used
7700 + * DM_LIST_DEVICES:
7701 + * Get a list of all the dm device names.
7704 + * Create a new device, neither the 'active' or 'inactive' table
7705 + * slots will be filled. The device will be in suspended state
7706 + * after creation, however any io to the device will get errored
7707 + * since it will be out-of-bounds.
7710 + * Remove a device, destroy any tables.
7713 + * Rename a device.
7716 + * This performs both suspend and resume, depending which flag is
7718 + * Suspend: This command will not return until all pending io to
7719 + * the device has completed. Further io will be deferred until
7720 + * the device is resumed.
7721 + * Resume: It is no longer an error to issue this command on an
7722 + * unsuspended device. If a table is present in the 'inactive'
7723 + * slot, it will be moved to the active slot, then the old table
7724 + * from the active slot will be _destroyed_. Finally the device
7728 + * Retrieves the status for the table in the 'active' slot.
7731 + * Wait for a significant event to occur to the device. This
7732 + * could either be caused by an event triggered by one of the
7733 + * targets of the table in the 'active' slot, or a table change.
7736 + * Load a table into the 'inactive' slot for the device. The
7737 + * device does _not_ need to be suspended prior to this command.
7740 + * Destroy any table in the 'inactive' slot (ie. abort).
7743 + * Return a set of device dependencies for the 'active' table.
7745 + * DM_TABLE_STATUS:
7746 + * Return the targets status for the 'active' table.
7750 + * All ioctl arguments consist of a single chunk of memory, with
7751 + * this structure at the start. If a uuid is specified any
7752 + * lookup (eg. for a DM_INFO) will be done on that, *not* the
7757 + * The version number is made up of three parts:
7758 + * major - no backward or forward compatibility,
7759 + * minor - only backwards compatible,
7760 + * patch - both backwards and forwards compatible.
7762 + * All clients of the ioctl interface should fill in the
7763 + * version number of the interface that they were
7766 + * All recognised ioctl commands (ie. those that don't
7767 + * return -ENOTTY) fill out this field, even if the
7770 + uint32_t version[3]; /* in/out */
7771 + uint32_t data_size; /* total size of data passed in
7772 + * including this struct */
7774 + uint32_t data_start; /* offset to start of data
7775 + * relative to start of this struct */
7777 + uint32_t target_count; /* in/out */
7778 + int32_t open_count; /* out */
7779 + uint32_t flags; /* in/out */
7780 + uint32_t event_nr; /* in/out */
7783 + uint64_t dev; /* in/out */
7785 + char name[DM_NAME_LEN]; /* device name */
7786 + char uuid[DM_UUID_LEN]; /* unique identifier for
7787 + * the block device */
7791 + * Used to specify tables. These structures appear after the
7794 +struct dm_target_spec {
7795 + uint64_t sector_start;
7797 + int32_t status; /* used when reading from kernel only */
7800 + * Offset in bytes (from the start of this struct) to
7801 + * next target_spec.
7805 + char target_type[DM_MAX_TYPE_NAME];
7808 + * Parameter string starts immediately after this object.
7809 + * Be careful to add padding after string to ensure correct
7810 + * alignment of subsequent dm_target_spec.
7815 + * Used to retrieve the target dependencies.
7817 +struct dm_target_deps {
7818 + uint32_t count; /* Array size */
7819 + uint32_t padding; /* unused */
7820 + uint64_t dev[0]; /* out */
7824 + * Used to get a list of all dm devices.
7826 +struct dm_name_list {
7828 + uint32_t next; /* offset to the next record from
7829 + the _start_ of this */
7834 + * If you change this make sure you make the corresponding change
7835 + * to dm-ioctl.c:lookup_ioctl()
7838 + /* Top level cmds */
7839 + DM_VERSION_CMD = 0,
7840 + DM_REMOVE_ALL_CMD,
7841 + DM_LIST_DEVICES_CMD,
7843 + /* device level cmds */
7844 + DM_DEV_CREATE_CMD,
7845 + DM_DEV_REMOVE_CMD,
7846 + DM_DEV_RENAME_CMD,
7847 + DM_DEV_SUSPEND_CMD,
7848 + DM_DEV_STATUS_CMD,
7851 + /* Table level cmds */
7852 + DM_TABLE_LOAD_CMD,
7853 + DM_TABLE_CLEAR_CMD,
7854 + DM_TABLE_DEPS_CMD,
7855 + DM_TABLE_STATUS_CMD,
7858 +#define DM_IOCTL 0xfd
7860 +#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
7861 +#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
7862 +#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
7864 +#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
7865 +#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
7866 +#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
7867 +#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
7868 +#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
7869 +#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
7871 +#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
7872 +#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
7873 +#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
7874 +#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
7876 +#define DM_VERSION_MAJOR 4
7877 +#define DM_VERSION_MINOR 0
7878 +#define DM_VERSION_PATCHLEVEL 5
7879 +#define DM_VERSION_EXTRA "-ioctl (2003-11-18)"
7882 +#define DM_READONLY_FLAG (1 << 0) /* In/Out */
7883 +#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
7884 +#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
7887 + * Flag passed into ioctl STATUS command to get table information
7888 + * rather than current status.
7890 +#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
7893 + * Flags that indicate whether a table is present in either of
7894 + * the two table slots that a device has.
7896 +#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
7897 +#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
7900 + * Indicates that the buffer passed in wasn't big enough for the
7903 +#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
7905 +#endif /* _LINUX_DM_IOCTL_H */
7906 diff -urN linux-2.4.24.org/include/linux/mempool.h linux-2.4.24/include/linux/mempool.h
7907 --- linux-2.4.24.org/include/linux/mempool.h 1970-01-01 01:00:00.000000000 +0100
7908 +++ linux-2.4.24/include/linux/mempool.h 2004-01-18 15:01:09.522605662 +0100
7911 + * memory buffer pool support
7913 +#ifndef _LINUX_MEMPOOL_H
7914 +#define _LINUX_MEMPOOL_H
7916 +#include <linux/list.h>
7917 +#include <linux/wait.h>
7920 +typedef struct mempool_s mempool_t;
7922 +typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
7923 +typedef void (mempool_free_t)(void *element, void *pool_data);
7925 +extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
7926 + mempool_free_t *free_fn, void *pool_data);
7927 +extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
7928 +extern void mempool_destroy(mempool_t *pool);
7929 +extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
7930 +extern void mempool_free(void *element, mempool_t *pool);
7933 + * A mempool_alloc_t and mempool_free_t that get the memory from
7934 + * a slab that is passed in through pool_data.
7936 +void *mempool_alloc_slab(int gfp_mask, void *pool_data);
7937 +void mempool_free_slab(void *element, void *pool_data);
7940 +#endif /* _LINUX_MEMPOOL_H */
7941 diff -urN linux-2.4.24.org/MAINTAINERS linux-2.4.24/MAINTAINERS
7942 --- linux-2.4.24.org/MAINTAINERS 2004-01-18 14:59:47.570857618 +0100
7943 +++ linux-2.4.24/MAINTAINERS 2004-01-18 15:01:13.766714518 +0100
7944 @@ -581,6 +581,13 @@
7945 W: http://www.debian.org/~dz/i8k/
7950 +M: dm@uk.sistina.com
7951 +L: linux-LVM@sistina.com
7952 +W: http://www.sistina.com/lvm
7955 DEVICE NUMBER REGISTRY
7958 diff -urN linux-2.4.24.org/mm/Makefile linux-2.4.24/mm/Makefile
7959 --- linux-2.4.24.org/mm/Makefile 2004-01-18 14:55:23.909936044 +0100
7960 +++ linux-2.4.24/mm/Makefile 2004-01-18 15:01:09.497610911 +0100
7965 -export-objs := shmem.o filemap.o memory.o page_alloc.o
7966 +export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
7968 obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
7969 vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
7970 page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
7974 obj-$(CONFIG_HIGHMEM) += highmem.o
7975 obj-$(CONFIG_PROC_MM) += proc_mm.o
7976 diff -urN linux-2.4.24.org/mm/mempool.c linux-2.4.24/mm/mempool.c
7977 --- linux-2.4.24.org/mm/mempool.c 1970-01-01 01:00:00.000000000 +0100
7978 +++ linux-2.4.24/mm/mempool.c 2004-01-18 15:01:09.525605032 +0100
7981 + * linux/mm/mempool.c
7983 + * memory buffer pool support. Such pools are mostly used
7984 + * for guaranteed, deadlock-free memory allocations during
7985 + * extreme VM load.
7987 + * started by Ingo Molnar, Copyright (C) 2001
7990 +#include <linux/mm.h>
7991 +#include <linux/slab.h>
7992 +#include <linux/module.h>
7993 +#include <linux/mempool.h>
7997 + int min_nr; /* nr of elements at *elements */
7998 + int curr_nr; /* Current nr of elements at *elements */
8002 + mempool_alloc_t *alloc;
8003 + mempool_free_t *free;
8004 + wait_queue_head_t wait;
8007 +static void add_element(mempool_t *pool, void *element)
8009 + BUG_ON(pool->curr_nr >= pool->min_nr);
8010 + pool->elements[pool->curr_nr++] = element;
8013 +static void *remove_element(mempool_t *pool)
8015 + BUG_ON(pool->curr_nr <= 0);
8016 + return pool->elements[--pool->curr_nr];
8019 +static void free_pool(mempool_t *pool)
8021 + while (pool->curr_nr) {
8022 + void *element = remove_element(pool);
8023 + pool->free(element, pool->pool_data);
8025 + kfree(pool->elements);
8030 + * mempool_create - create a memory pool
8031 + * @min_nr: the minimum number of elements guaranteed to be
8032 + * allocated for this pool.
8033 + * @alloc_fn: user-defined element-allocation function.
8034 + * @free_fn: user-defined element-freeing function.
8035 + * @pool_data: optional private data available to the user-defined functions.
8037 + * this function creates and allocates a guaranteed size, preallocated
8038 + * memory pool. The pool can be used from the mempool_alloc and mempool_free
8039 + * functions. This function might sleep. Both the alloc_fn() and the free_fn()
8040 + * functions might sleep - as long as the mempool_alloc function is not called
8041 + * from IRQ contexts.
8043 +mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
8044 + mempool_free_t *free_fn, void *pool_data)
8048 + pool = kmalloc(sizeof(*pool), GFP_KERNEL);
8051 + memset(pool, 0, sizeof(*pool));
8052 + pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL);
8053 + if (!pool->elements) {
8057 + spin_lock_init(&pool->lock);
8058 + pool->min_nr = min_nr;
8059 + pool->pool_data = pool_data;
8060 + init_waitqueue_head(&pool->wait);
8061 + pool->alloc = alloc_fn;
8062 + pool->free = free_fn;
8065 + * First pre-allocate the guaranteed number of buffers.
8067 + while (pool->curr_nr < pool->min_nr) {
8070 + element = pool->alloc(GFP_KERNEL, pool->pool_data);
8071 + if (unlikely(!element)) {
8075 + add_element(pool, element);
8081 + * mempool_resize - resize an existing memory pool
8082 + * @pool: pointer to the memory pool which was allocated via
8083 + * mempool_create().
8084 + * @new_min_nr: the new minimum number of elements guaranteed to be
8085 + * allocated for this pool.
8086 + * @gfp_mask: the usual allocation bitmask.
8088 + * This function shrinks/grows the pool. In the case of growing,
8089 + * it cannot be guaranteed that the pool will be grown to the new
8090 + * size immediately, but new mempool_free() calls will refill it.
8092 + * Note, the caller must guarantee that no mempool_destroy is called
8093 + * while this function is running. mempool_alloc() & mempool_free()
8094 + * might be called (eg. from IRQ contexts) while this function executes.
8096 +int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
8099 + void **new_elements;
8100 + unsigned long flags;
8102 + BUG_ON(new_min_nr <= 0);
8104 + spin_lock_irqsave(&pool->lock, flags);
8105 + if (new_min_nr < pool->min_nr) {
8106 + while (pool->curr_nr > new_min_nr) {
8107 + element = remove_element(pool);
8108 + spin_unlock_irqrestore(&pool->lock, flags);
8109 + pool->free(element, pool->pool_data);
8110 + spin_lock_irqsave(&pool->lock, flags);
8112 + pool->min_nr = new_min_nr;
8115 + spin_unlock_irqrestore(&pool->lock, flags);
8117 + /* Grow the pool */
8118 + new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
8119 + if (!new_elements)
8122 + spin_lock_irqsave(&pool->lock, flags);
8123 + memcpy(new_elements, pool->elements,
8124 + pool->curr_nr * sizeof(*new_elements));
8125 + kfree(pool->elements);
8126 + pool->elements = new_elements;
8127 + pool->min_nr = new_min_nr;
8129 + while (pool->curr_nr < pool->min_nr) {
8130 + spin_unlock_irqrestore(&pool->lock, flags);
8131 + element = pool->alloc(gfp_mask, pool->pool_data);
8134 + spin_lock_irqsave(&pool->lock, flags);
8135 + if (pool->curr_nr < pool->min_nr)
8136 + add_element(pool, element);
8138 + kfree(element); /* Raced */
8141 + spin_unlock_irqrestore(&pool->lock, flags);
8147 + * mempool_destroy - deallocate a memory pool
8148 + * @pool: pointer to the memory pool which was allocated via
8149 + * mempool_create().
8151 + * this function only sleeps if the free_fn() function sleeps. The caller
8152 + * has to guarantee that all elements have been returned to the pool (ie:
8153 + * freed) prior to calling mempool_destroy().
8155 +void mempool_destroy(mempool_t *pool)
8157 + if (pool->curr_nr != pool->min_nr)
8158 + BUG(); /* There were outstanding elements */
8163 + * mempool_alloc - allocate an element from a specific memory pool
8164 + * @pool: pointer to the memory pool which was allocated via
8165 + * mempool_create().
8166 + * @gfp_mask: the usual allocation bitmask.
8168 + * this function only sleeps if the alloc_fn function sleeps or
8169 + * returns NULL. Note that due to preallocation, this function
8170 + * *never* fails when called from process contexts. (it might
8171 + * fail if called from an IRQ context.)
8173 +void * mempool_alloc(mempool_t *pool, int gfp_mask)
8176 + unsigned long flags;
8178 + DECLARE_WAITQUEUE(wait, current);
8179 + int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
8182 + element = pool->alloc(gfp_nowait, pool->pool_data);
8183 + if (likely(element != NULL))
8187 + * If the pool is less than 50% full then try harder
8188 + * to allocate an element:
8190 + if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
8191 + element = pool->alloc(gfp_mask, pool->pool_data);
8192 + if (likely(element != NULL))
8197 + * Kick the VM at this point.
8201 + spin_lock_irqsave(&pool->lock, flags);
8202 + if (likely(pool->curr_nr)) {
8203 + element = remove_element(pool);
8204 + spin_unlock_irqrestore(&pool->lock, flags);
8207 + spin_unlock_irqrestore(&pool->lock, flags);
8209 + /* We must not sleep in the GFP_ATOMIC case */
8210 + if (gfp_mask == gfp_nowait)
8213 + run_task_queue(&tq_disk);
8215 + add_wait_queue_exclusive(&pool->wait, &wait);
8216 + set_task_state(current, TASK_UNINTERRUPTIBLE);
8218 + spin_lock_irqsave(&pool->lock, flags);
8219 + curr_nr = pool->curr_nr;
8220 + spin_unlock_irqrestore(&pool->lock, flags);
8225 + current->state = TASK_RUNNING;
8226 + remove_wait_queue(&pool->wait, &wait);
8228 + goto repeat_alloc;
8232 + * mempool_free - return an element to the pool.
8233 + * @element: pool element pointer.
8234 + * @pool: pointer to the memory pool which was allocated via
8235 + * mempool_create().
8237 + * this function only sleeps if the free_fn() function sleeps.
8239 +void mempool_free(void *element, mempool_t *pool)
8241 + unsigned long flags;
8243 + if (pool->curr_nr < pool->min_nr) {
8244 + spin_lock_irqsave(&pool->lock, flags);
8245 + if (pool->curr_nr < pool->min_nr) {
8246 + add_element(pool, element);
8247 + spin_unlock_irqrestore(&pool->lock, flags);
8248 + wake_up(&pool->wait);
8251 + spin_unlock_irqrestore(&pool->lock, flags);
8253 + pool->free(element, pool->pool_data);
8257 + * A commonly used alloc and free fn.
8259 +void *mempool_alloc_slab(int gfp_mask, void *pool_data)
8261 + kmem_cache_t *mem = (kmem_cache_t *) pool_data;
8262 + return kmem_cache_alloc(mem, gfp_mask);
8265 +void mempool_free_slab(void *element, void *pool_data)
8267 + kmem_cache_t *mem = (kmem_cache_t *) pool_data;
8268 + kmem_cache_free(mem, element);
8272 +EXPORT_SYMBOL(mempool_create);
8273 +EXPORT_SYMBOL(mempool_resize);
8274 +EXPORT_SYMBOL(mempool_destroy);
8275 +EXPORT_SYMBOL(mempool_alloc);
8276 +EXPORT_SYMBOL(mempool_free);
8277 +EXPORT_SYMBOL(mempool_alloc_slab);
8278 +EXPORT_SYMBOL(mempool_free_slab);