- latest one

author Arkadiusz Miśkiewicz <arekm@maven.pl>

Fri, 18 Jul 2003 23:06:16 +0000 (23:06 +0000)

committer cvs2git <feedback@pld-linux.org>

Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
author Arkadiusz Miśkiewicz <arekm@maven.pl>
Fri, 18 Jul 2003 23:06:16 +0000 (23:06 +0000)
committer cvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
diff --git a/combined-linux-2.4.21-devmapper-ioctl.patch b/combined-linux-2.4.21-devmapper-ioctl.patch

new file mode 100644 (file)

index 0000000..7cd9343
--- /dev/null
+++ b/combined-linux-2.4.21-devmapper-ioctl.patch
@@ -0,0 +1,10097 @@
+diff -ruN linux-2.4.21-dm-real/Documentation/Configure.help linux-2.4.21/Documentation/Configure.help
+--- linux-2.4.21-dm-real/Documentation/Configure.help  Fri Jun 13 16:32:30 2003
++++ linux-2.4.21/Documentation/Configure.help  Sat Jul 12 18:14:01 2003
+@@ -1839,6 +1839,20 @@
+   want), say M here and read <file:Documentation/modules.txt>.  The
+   module will be called lvm-mod.o.
+ 
++Device-mapper support
++CONFIG_BLK_DEV_DM
++  Device-mapper is a low level volume manager.  It works by allowing
++  people to specify mappings for ranges of logical sectors.  Various
++  mapping types are available, in addition people may write their own
++  modules containing custom mappings if they wish.
++
++  Higher level volume managers such as LVM2 use this driver.
++
++  If you want to compile this as a module, say M here and read 
++  <file:Documentation/modules.txt>.  The module will be called dm-mod.o.
++
++  If unsure, say N.
++
+ Multiple devices driver support (RAID and LVM)
+ CONFIG_MD
+   Support multiple physical spindles through a single logical device.
+diff -ruN linux-2.4.21-dm-real/MAINTAINERS linux-2.4.21/MAINTAINERS
+--- linux-2.4.21-dm-real/MAINTAINERS   Fri Jun 13 16:32:30 2003
++++ linux-2.4.21/MAINTAINERS   Sat Jul 12 18:14:01 2003
+@@ -476,6 +476,13 @@
+ W:    http://www.debian.org/~dz/i8k/
+ S:    Maintained
+ 
++DEVICE MAPPER
++P:    Joe Thornber
++M:    dm@uk.sistina.com
++L:    linux-LVM@sistina.com
++W:    http://www.sistina.com/lvm
++S:    Maintained
++
+ DEVICE NUMBER REGISTRY
+ P:    H. Peter Anvin
+ M:    hpa@zytor.com
+diff -ruN linux-2.4.21-dm-real/arch/mips64/kernel/ioctl32.c linux-2.4.21/arch/mips64/kernel/ioctl32.c
+--- linux-2.4.21-dm-real/arch/mips64/kernel/ioctl32.c  Fri Jan 10 16:34:18 2003
++++ linux-2.4.21/arch/mips64/kernel/ioctl32.c  Sat Jul 12 18:13:51 2003
+@@ -33,6 +33,7 @@
+ #include <linux/auto_fs.h>
+ #include <linux/ext2_fs.h>
+ #include <linux/raid/md_u.h>
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ #undef __KERNEL__             /* This file was born to be ugly ...  */
+@@ -914,6 +915,22 @@
+       IOCTL32_DEFAULT(STOP_ARRAY_RO),
+       IOCTL32_DEFAULT(RESTART_ARRAY_RW),
+ #endif /* CONFIG_MD */
++
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++      IOCTL32_DEFAULT(DM_VERSION),
++      IOCTL32_DEFAULT(DM_REMOVE_ALL),
++      IOCTL32_DEFAULT(DM_DEV_CREATE),
++      IOCTL32_DEFAULT(DM_DEV_REMOVE),
++      IOCTL32_DEFAULT(DM_TABLE_LOAD),
++      IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++      IOCTL32_DEFAULT(DM_DEV_RENAME),
++      IOCTL32_DEFAULT(DM_TABLE_DEPS),
++      IOCTL32_DEFAULT(DM_DEV_STATUS),
++      IOCTL32_DEFAULT(DM_TABLE_STATUS),
++      IOCTL32_DEFAULT(DM_DEV_WAIT),
++      IOCTL32_DEFAULT(DM_LIST_DEVICES),
++      IOCTL32_DEFAULT(DM_TABLE_CLEAR),
++#endif /* CONFIG_BLK_DEV_DM */
+ 
+       IOCTL32_DEFAULT(MTIOCTOP),                      /* mtio.h ioctls  */
+       IOCTL32_HANDLER(MTIOCGET32, mt_ioctl_trans),
+diff -ruN linux-2.4.21-dm-real/arch/parisc/kernel/ioctl32.c linux-2.4.21/arch/parisc/kernel/ioctl32.c
+--- linux-2.4.21-dm-real/arch/parisc/kernel/ioctl32.c  Fri Jun 13 16:32:32 2003
++++ linux-2.4.21/arch/parisc/kernel/ioctl32.c  Sat Jul 12 18:13:51 2003
+@@ -55,6 +55,7 @@
+ #define max max */
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -3418,6 +3419,22 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_TABLE_LOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_TABLE_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TABLE_STATUS)
++COMPATIBLE_IOCTL(DM_DEV_WAIT)
++COMPATIBLE_IOCTL(DM_LIST_DEVICES)
++COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
++#endif /* CONFIG_BLK_DEV_DM */
+ #if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+ COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+ COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
+diff -ruN linux-2.4.21-dm-real/arch/ppc64/kernel/ioctl32.c linux-2.4.21/arch/ppc64/kernel/ioctl32.c
+--- linux-2.4.21-dm-real/arch/ppc64/kernel/ioctl32.c   Fri Jun 13 16:32:33 2003
++++ linux-2.4.21/arch/ppc64/kernel/ioctl32.c   Sat Jul 12 18:13:51 2003
+@@ -66,6 +66,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -4423,6 +4424,22 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG),
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS),
+ COMPATIBLE_IOCTL(NBD_DISCONNECT),
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION),
++COMPATIBLE_IOCTL(DM_REMOVE_ALL),
++COMPATIBLE_IOCTL(DM_DEV_CREATE),
++COMPATIBLE_IOCTL(DM_DEV_REMOVE),
++COMPATIBLE_IOCTL(DM_TABLE_LOAD),
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND),
++COMPATIBLE_IOCTL(DM_DEV_RENAME),
++COMPATIBLE_IOCTL(DM_TABLE_DEPS),
++COMPATIBLE_IOCTL(DM_DEV_STATUS),
++COMPATIBLE_IOCTL(DM_TABLE_STATUS),
++COMPATIBLE_IOCTL(DM_DEV_WAIT),
++COMPATIBLE_IOCTL(DM_LIST_DEVICES),
++COMPATIBLE_IOCTL(DM_TABLE_CLEAR),
++#endif /* CONFIG_BLK_DEV_DM */
+ /* Remove *PRIVATE in 2.5 */
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE),
+ COMPATIBLE_IOCTL(SIOCDEVPRIVATE+1),
+diff -ruN linux-2.4.21-dm-real/arch/s390x/kernel/ioctl32.c linux-2.4.21/arch/s390x/kernel/ioctl32.c
+--- linux-2.4.21-dm-real/arch/s390x/kernel/ioctl32.c   Fri Jan 10 16:34:26 2003
++++ linux-2.4.21/arch/s390x/kernel/ioctl32.c   Sat Jul 12 18:13:51 2003
+@@ -25,6 +25,7 @@
+ #include <linux/ext2_fs.h>
+ #include <linux/hdreg.h>
+ #include <linux/if_bonding.h>
++#include <linux/dm-ioctl.h>
+ #include <asm/types.h>
+ #include <asm/uaccess.h>
+ #include <asm/dasd.h>
+@@ -507,6 +508,20 @@
+       IOCTL32_DEFAULT(VT_UNLOCKSWITCH),
+ 
+       IOCTL32_DEFAULT(SIOCGSTAMP),
++
++      IOCTL32_DEFAULT(DM_VERSION),
++      IOCTL32_DEFAULT(DM_REMOVE_ALL),
++      IOCTL32_DEFAULT(DM_DEV_CREATE),
++      IOCTL32_DEFAULT(DM_DEV_REMOVE),
++      IOCTL32_DEFAULT(DM_TABLE_LOAD),
++      IOCTL32_DEFAULT(DM_DEV_SUSPEND),
++      IOCTL32_DEFAULT(DM_DEV_RENAME),
++      IOCTL32_DEFAULT(DM_TABLE_DEPS),
++      IOCTL32_DEFAULT(DM_DEV_STATUS),
++      IOCTL32_DEFAULT(DM_TABLE_STATUS),
++      IOCTL32_DEFAULT(DM_DEV_WAIT),
++      IOCTL32_DEFAULT(DM_LIST_DEVICES),
++      IOCTL32_DEFAULT(DM_TABLE_CLEAR),
+ 
+       IOCTL32_HANDLER(SIOCGIFNAME, dev_ifname32),
+       IOCTL32_HANDLER(SIOCGIFCONF, dev_ifconf),
+diff -ruN linux-2.4.21-dm-real/arch/sparc64/kernel/ioctl32.c linux-2.4.21/arch/sparc64/kernel/ioctl32.c
+--- linux-2.4.21-dm-real/arch/sparc64/kernel/ioctl32.c Fri Jun 13 16:32:34 2003
++++ linux-2.4.21/arch/sparc64/kernel/ioctl32.c Sat Jul 12 18:13:51 2003
+@@ -56,6 +56,7 @@
+ #if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -5076,6 +5077,22 @@
+ COMPATIBLE_IOCTL(NBD_PRINT_DEBUG)
+ COMPATIBLE_IOCTL(NBD_SET_SIZE_BLOCKS)
+ COMPATIBLE_IOCTL(NBD_DISCONNECT)
++/* device-mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_TABLE_LOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_TABLE_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TABLE_STATUS)
++COMPATIBLE_IOCTL(DM_DEV_WAIT)
++COMPATIBLE_IOCTL(DM_LIST_DEVICES)
++COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
++#endif /* CONFIG_BLK_DEV_DM */
+ /* Linux-1394 */
+ #if defined(CONFIG_IEEE1394) || defined(CONFIG_IEEE1394_MODULE)
+ COMPATIBLE_IOCTL(AMDTP_IOC_CHANNEL)
+diff -ruN linux-2.4.21-dm-real/arch/x86_64/ia32/ia32_ioctl.c linux-2.4.21/arch/x86_64/ia32/ia32_ioctl.c
+--- linux-2.4.21-dm-real/arch/x86_64/ia32/ia32_ioctl.c Fri Jun 13 16:32:35 2003
++++ linux-2.4.21/arch/x86_64/ia32/ia32_ioctl.c Sat Jul 12 18:13:51 2003
+@@ -67,6 +67,7 @@
+ #define max max
+ #include <linux/lvm.h>
+ #endif /* LVM */
++#include <linux/dm-ioctl.h>
+ 
+ #include <scsi/scsi.h>
+ /* Ugly hack. */
+@@ -4047,6 +4048,22 @@
+ COMPATIBLE_IOCTL(LV_BMAP)
+ COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+ #endif /* LVM */
++/* Device-Mapper */
++#if defined(CONFIG_BLK_DEV_DM) || defined(CONFIG_BLK_DEV_DM_MODULE)
++COMPATIBLE_IOCTL(DM_VERSION)
++COMPATIBLE_IOCTL(DM_REMOVE_ALL)
++COMPATIBLE_IOCTL(DM_DEV_CREATE)
++COMPATIBLE_IOCTL(DM_DEV_REMOVE)
++COMPATIBLE_IOCTL(DM_TABLE_LOAD)
++COMPATIBLE_IOCTL(DM_DEV_SUSPEND)
++COMPATIBLE_IOCTL(DM_DEV_RENAME)
++COMPATIBLE_IOCTL(DM_TABLE_DEPS)
++COMPATIBLE_IOCTL(DM_DEV_STATUS)
++COMPATIBLE_IOCTL(DM_TABLE_STATUS)
++COMPATIBLE_IOCTL(DM_DEV_WAIT)
++COMPATIBLE_IOCTL(DM_LIST_DEVICES)
++COMPATIBLE_IOCTL(DM_TABLE_CLEAR)
++#endif /* CONFIG_BLK_DEV_DM */
+ #ifdef CONFIG_AUTOFS_FS
+ COMPATIBLE_IOCTL(AUTOFS_IOC_READY)
+ COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
+diff -ruN linux-2.4.21-dm-real/drivers/md/Config.in linux-2.4.21/drivers/md/Config.in
+--- linux-2.4.21-dm-real/drivers/md/Config.in  Fri Jan 10 16:34:50 2003
++++ linux-2.4.21/drivers/md/Config.in  Sat Jul 12 18:14:01 2003
+@@ -14,5 +14,9 @@
+ dep_tristate '  Multipath I/O support' CONFIG_MD_MULTIPATH $CONFIG_BLK_DEV_MD
+ 
+ dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
++if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
++   dep_tristate ' Device-mapper support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM $CONFIG_MD
++   dep_tristate '  Mirror (RAID-1) support (EXPERIMENTAL)' CONFIG_BLK_DEV_DM_MIRROR $CONFIG_BLK_DEV_DM
++fi
+ 
+ endmenu
+diff -ruN linux-2.4.21-dm-real/drivers/md/Makefile linux-2.4.21/drivers/md/Makefile
+--- linux-2.4.21-dm-real/drivers/md/Makefile   Fri Jan 10 16:34:50 2003
++++ linux-2.4.21/drivers/md/Makefile   Sat Jul 12 18:14:11 2003
+@@ -4,24 +4,41 @@
+ 
+ O_TARGET      := mddev.o
+ 
+-export-objs   := md.o xor.o
+-list-multi    := lvm-mod.o
++export-objs   := md.o xor.o dm-table.o dm-target.o kcopyd.o dm-daemon.o \
++                 dm-log.o dm-io.o dm.o
++
++list-multi    := lvm-mod.o dm-mod.o dm-mirror-mod.o
+ lvm-mod-objs  := lvm.o lvm-snap.o lvm-fs.o
++dm-mod-objs   := dm.o dm-table.o dm-target.o dm-ioctl.o \
++                 dm-linear.o dm-stripe.o dm-snapshot.o dm-exception-store.o \
++                 kcopyd.o dm-daemon.o dm-io.o
++dm-mirror-mod-objs := dm-raid1.o dm-log.o
+ 
+ # Note: link order is important.  All raid personalities
+ # and xor.o must come before md.o, as they each initialise 
+ # themselves, and md.o may use the personalities when it 
+ # auto-initialised.
+ 
+-obj-$(CONFIG_MD_LINEAR)               += linear.o
+-obj-$(CONFIG_MD_RAID0)                += raid0.o
+-obj-$(CONFIG_MD_RAID1)                += raid1.o
+-obj-$(CONFIG_MD_RAID5)                += raid5.o xor.o
+-obj-$(CONFIG_MD_MULTIPATH)    += multipath.o
+-obj-$(CONFIG_BLK_DEV_MD)      += md.o
+-obj-$(CONFIG_BLK_DEV_LVM)     += lvm-mod.o
++obj-$(CONFIG_MD_LINEAR)                       += linear.o
++obj-$(CONFIG_MD_RAID0)                        += raid0.o
++obj-$(CONFIG_MD_RAID1)                        += raid1.o
++obj-$(CONFIG_MD_RAID5)                        += raid5.o xor.o
++obj-$(CONFIG_MD_MULTIPATH)            += multipath.o
++obj-$(CONFIG_BLK_DEV_MD)              += md.o
++
++obj-$(CONFIG_BLK_DEV_LVM)             += lvm-mod.o
++
++obj-$(CONFIG_BLK_DEV_DM)              += dm-mod.o
++obj-$(CONFIG_BLK_DEV_DM_MIRROR)               += dm-mirror.o
+ 
+ include $(TOPDIR)/Rules.make
+ 
+ lvm-mod.o: $(lvm-mod-objs)
+       $(LD) -r -o $@ $(lvm-mod-objs)
++
++dm-mod.o: $(dm-mod-objs)
++      $(LD) -r -o $@ $(dm-mod-objs)
++
++dm-mirror.o: $(dm-mirror-mod-objs)
++      $(LD) -r -o $@ $(dm-mirror-mod-objs)
++
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-daemon.c linux-2.4.21/drivers/md/dm-daemon.c
+--- linux-2.4.21-dm-real/drivers/md/dm-daemon.c        Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-daemon.c        Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,113 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#include "dm.h"
++#include "dm-daemon.h"
++
++#include <linux/module.h>
++#include <linux/sched.h>
++
++static int daemon(void *arg)
++{
++      struct dm_daemon *dd = (struct dm_daemon *) arg;
++      DECLARE_WAITQUEUE(wq, current);
++
++      daemonize();
++      reparent_to_init();
++
++      /* block all signals */
++      spin_lock_irq(&current->sigmask_lock);
++      sigfillset(&current->blocked);
++      flush_signals(current);
++      spin_unlock_irq(&current->sigmask_lock);
++
++      strcpy(current->comm, dd->name);
++      atomic_set(&dd->please_die, 0);
++
++      add_wait_queue(&dd->job_queue, &wq);
++
++      down(&dd->run_lock);
++      up(&dd->start_lock);
++
++      /*
++       * dd->fn() could do anything, very likely it will
++       * suspend.  So we can't set the state to
++       * TASK_INTERRUPTIBLE before calling it.  In order to
++       * prevent a race with a waking thread we do this little
++       * dance with the dd->woken variable.
++       */
++      while (1) {
++              do {
++                      set_current_state(TASK_RUNNING);
++
++                      if (atomic_read(&dd->please_die))
++                              goto out;
++
++                      atomic_set(&dd->woken, 0);
++                      dd->fn();
++                      yield();
++
++                      set_current_state(TASK_INTERRUPTIBLE);
++              } while (atomic_read(&dd->woken));
++
++              schedule();
++      }
++
++ out:
++      remove_wait_queue(&dd->job_queue, &wq);
++      up(&dd->run_lock);
++      return 0;
++}
++
++int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void))
++{
++      pid_t pid = 0;
++
++      /*
++       * Initialise the dm_daemon.
++       */
++      dd->fn = fn;
++      strncpy(dd->name, name, sizeof(dd->name) - 1);
++      sema_init(&dd->start_lock, 1);
++      sema_init(&dd->run_lock, 1);
++      init_waitqueue_head(&dd->job_queue);
++
++      /*
++       * Start the new thread.
++       */
++      down(&dd->start_lock);
++      pid = kernel_thread(daemon, dd, 0);
++      if (pid <= 0) {
++              DMERR("Failed to start kcopyd thread");
++              return -EAGAIN;
++      }
++
++      /*
++       * wait for the daemon to up this mutex.
++       */
++      down(&dd->start_lock);
++      up(&dd->start_lock);
++
++      return 0;
++}
++
++void dm_daemon_stop(struct dm_daemon *dd)
++{
++      atomic_set(&dd->please_die, 1);
++      dm_daemon_wake(dd);
++      down(&dd->run_lock);
++      up(&dd->run_lock);
++}
++
++void dm_daemon_wake(struct dm_daemon *dd)
++{
++      atomic_set(&dd->woken, 1);
++      wake_up_interruptible(&dd->job_queue);
++}
++
++EXPORT_SYMBOL(dm_daemon_start);
++EXPORT_SYMBOL(dm_daemon_stop);
++EXPORT_SYMBOL(dm_daemon_wake);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-daemon.h linux-2.4.21/drivers/md/dm-daemon.h
+--- linux-2.4.21-dm-real/drivers/md/dm-daemon.h        Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-daemon.h        Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_DAEMON_H
++#define DM_DAEMON_H
++
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++
++struct dm_daemon {
++      void (*fn)(void);
++      char name[16];
++      atomic_t please_die;
++      struct semaphore start_lock;
++      struct semaphore run_lock;
++
++      atomic_t woken;
++      wait_queue_head_t job_queue;
++};
++
++int dm_daemon_start(struct dm_daemon *dd, const char *name, void (*fn)(void));
++void dm_daemon_stop(struct dm_daemon *dd);
++void dm_daemon_wake(struct dm_daemon *dd);
++int dm_daemon_running(struct dm_daemon *dd);
++
++#endif
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-exception-store.c linux-2.4.21/drivers/md/dm-exception-store.c
+--- linux-2.4.21-dm-real/drivers/md/dm-exception-store.c       Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-exception-store.c       Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,675 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-snapshot.h"
++#include "dm-io.h"
++#include "kcopyd.h"
++
++#include <linux/mm.h>
++#include <linux/pagemap.h>
++#include <linux/vmalloc.h>
++#include <linux/slab.h>
++
++/*-----------------------------------------------------------------
++ * Persistent snapshots, by persistent we mean that the snapshot
++ * will survive a reboot.
++ *---------------------------------------------------------------*/
++
++/*
++ * We need to store a record of which parts of the origin have
++ * been copied to the snapshot device.  The snapshot code
++ * requires that we copy exception chunks to chunk aligned areas
++ * of the COW store.  It makes sense therefore, to store the
++ * metadata in chunk size blocks.
++ *
++ * There is no backward or forward compatibility implemented,
++ * snapshots with different disk versions than the kernel will
++ * not be usable.  It is expected that "lvcreate" will blank out
++ * the start of a fresh COW device before calling the snapshot
++ * constructor.
++ *
++ * The first chunk of the COW device just contains the header.
++ * After this there is a chunk filled with exception metadata,
++ * followed by as many exception chunks as can fit in the
++ * metadata areas.
++ *
++ * All on disk structures are in little-endian format.  The end
++ * of the exceptions info is indicated by an exception with a
++ * new_chunk of 0, which is invalid since it would point to the
++ * header chunk.
++ */
++
++/*
++ * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
++ */
++#define SNAP_MAGIC 0x70416e53
++
++/*
++ * The on-disk version of the metadata.
++ */
++#define SNAPSHOT_DISK_VERSION 1
++
++struct disk_header {
++      uint32_t magic;
++
++      /*
++       * Is this snapshot valid.  There is no way of recovering
++       * an invalid snapshot.
++       */
++      uint32_t valid;
++
++      /*
++       * Simple, incrementing version. no backward
++       * compatibility.
++       */
++      uint32_t version;
++
++      /* In sectors */
++      uint32_t chunk_size;
++};
++
++struct disk_exception {
++      uint64_t old_chunk;
++      uint64_t new_chunk;
++};
++
++struct commit_callback {
++      void (*callback)(void *, int success);
++      void *context;
++};
++
++/*
++ * The top level structure for a persistent exception store.
++ */
++struct pstore {
++      struct dm_snapshot *snap;       /* up pointer to my snapshot */
++      int version;
++      int valid;
++      uint32_t chunk_size;
++      uint32_t exceptions_per_area;
++
++      /*
++       * Now that we have an asynchronous kcopyd there is no
++       * need for large chunk sizes, so it wont hurt to have a
++       * whole chunks worth of metadata in memory at once.
++       */
++      void *area;
++
++      /*
++       * Used to keep track of which metadata area the data in
++       * 'chunk' refers to.
++       */
++      uint32_t current_area;
++
++      /*
++       * The next free chunk for an exception.
++       */
++      uint32_t next_free;
++
++      /*
++       * The index of next free exception in the current
++       * metadata area.
++       */
++      uint32_t current_committed;
++
++      atomic_t pending_count;
++      uint32_t callback_count;
++      struct commit_callback *callbacks;
++};
++
++static inline unsigned int sectors_to_pages(unsigned int sectors)
++{
++      return sectors / (PAGE_SIZE / SECTOR_SIZE);
++}
++
++static int alloc_area(struct pstore *ps)
++{
++      int r = -ENOMEM;
++      size_t i, len, nr_pages;
++      struct page *page, *last = NULL;
++
++      len = ps->chunk_size << SECTOR_SHIFT;
++
++      /*
++       * Allocate the chunk_size block of memory that will hold
++       * a single metadata area.
++       */
++      ps->area = vmalloc(len);
++      if (!ps->area)
++              return r;
++
++      nr_pages = sectors_to_pages(ps->chunk_size);
++
++      /*
++       * We lock the pages for ps->area into memory since
++       * they'll be doing a lot of io.  We also chain them
++       * together ready for dm-io.
++       */
++      for (i = 0; i < nr_pages; i++) {
++              page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
++              LockPage(page);
++              if (last)
++                      last->list.next = &page->list;
++              last = page;
++      }
++
++      return 0;
++}
++
++static void free_area(struct pstore *ps)
++{
++      size_t i, nr_pages;
++      struct page *page;
++
++      nr_pages = sectors_to_pages(ps->chunk_size);
++      for (i = 0; i < nr_pages; i++) {
++              page = vmalloc_to_page(ps->area + (i * PAGE_SIZE));
++              page->list.next = NULL;
++              UnlockPage(page);
++      }
++
++      vfree(ps->area);
++}
++
++/*
++ * Read or write a chunk aligned and sized block of data from a device.
++ */
++static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
++{
++      struct io_region where;
++      unsigned int bits;
++
++      where.dev = ps->snap->cow->dev;
++      where.sector = ps->chunk_size * chunk;
++      where.count = ps->chunk_size;
++
++      return dm_io_sync(1, &where, rw, vmalloc_to_page(ps->area), 0, &bits);
++}
++
++/*
++ * Read or write a metadata area.  Remembering to skip the first
++ * chunk which holds the header.
++ */
++static int area_io(struct pstore *ps, uint32_t area, int rw)
++{
++      int r;
++      uint32_t chunk;
++
++      /* convert a metadata area index to a chunk index */
++      chunk = 1 + ((ps->exceptions_per_area + 1) * area);
++
++      r = chunk_io(ps, chunk, rw);
++      if (r)
++              return r;
++
++      ps->current_area = area;
++      return 0;
++}
++
++static int zero_area(struct pstore *ps, uint32_t area)
++{
++      memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++      return area_io(ps, area, WRITE);
++}
++
++static int read_header(struct pstore *ps, int *new_snapshot)
++{
++      int r;
++      struct disk_header *dh;
++
++      r = chunk_io(ps, 0, READ);
++      if (r)
++              return r;
++
++      dh = (struct disk_header *) ps->area;
++
++      if (le32_to_cpu(dh->magic) == 0) {
++              *new_snapshot = 1;
++
++      } else if (le32_to_cpu(dh->magic) == SNAP_MAGIC) {
++              *new_snapshot = 0;
++              ps->valid = le32_to_cpu(dh->valid);
++              ps->version = le32_to_cpu(dh->version);
++              ps->chunk_size = le32_to_cpu(dh->chunk_size);
++
++      } else {
++              DMWARN("Invalid/corrupt snapshot");
++              r = -ENXIO;
++      }
++
++      return r;
++}
++
++static int write_header(struct pstore *ps)
++{
++      struct disk_header *dh;
++
++      memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
++
++      dh = (struct disk_header *) ps->area;
++      dh->magic = cpu_to_le32(SNAP_MAGIC);
++      dh->valid = cpu_to_le32(ps->valid);
++      dh->version = cpu_to_le32(ps->version);
++      dh->chunk_size = cpu_to_le32(ps->chunk_size);
++
++      return chunk_io(ps, 0, WRITE);
++}
++
++/*
++ * Access functions for the disk exceptions, these do the endian conversions.
++ */
++static struct disk_exception *get_exception(struct pstore *ps, uint32_t index)
++{
++      if (index >= ps->exceptions_per_area)
++              return NULL;
++
++      return ((struct disk_exception *) ps->area) + index;
++}
++
++static int read_exception(struct pstore *ps,
++                        uint32_t index, struct disk_exception *result)
++{
++      struct disk_exception *e;
++
++      e = get_exception(ps, index);
++      if (!e)
++              return -EINVAL;
++
++      /* copy it */
++      result->old_chunk = le64_to_cpu(e->old_chunk);
++      result->new_chunk = le64_to_cpu(e->new_chunk);
++
++      return 0;
++}
++
++static int write_exception(struct pstore *ps,
++                         uint32_t index, struct disk_exception *de)
++{
++      struct disk_exception *e;
++
++      e = get_exception(ps, index);
++      if (!e)
++              return -EINVAL;
++
++      /* copy it */
++      e->old_chunk = cpu_to_le64(de->old_chunk);
++      e->new_chunk = cpu_to_le64(de->new_chunk);
++
++      return 0;
++}
++
++/*
++ * Registers the exceptions that are present in the current area.
++ * 'full' is filled in to indicate if the area has been
++ * filled.
++ */
++static int insert_exceptions(struct pstore *ps, int *full)
++{
++      int r;
++      unsigned int i;
++      struct disk_exception de;
++
++      /* presume the area is full */
++      *full = 1;
++
++      for (i = 0; i < ps->exceptions_per_area; i++) {
++              r = read_exception(ps, i, &de);
++
++              if (r)
++                      return r;
++
++              /*
++               * If the new_chunk is pointing at the start of
++               * the COW device, where the first metadata area
++               * is we know that we've hit the end of the
++               * exceptions.  Therefore the area is not full.
++               */
++              if (de.new_chunk == 0LL) {
++                      ps->current_committed = i;
++                      *full = 0;
++                      break;
++              }
++
++              /*
++               * Keep track of the start of the free chunks.
++               */
++              if (ps->next_free <= de.new_chunk)
++                      ps->next_free = de.new_chunk + 1;
++
++              /*
++               * Otherwise we add the exception to the snapshot.
++               */
++              r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
++              if (r)
++                      return r;
++      }
++
++      return 0;
++}
++
++static int read_exceptions(struct pstore *ps)
++{
++      uint32_t area;
++      int r, full = 1;
++
++      /*
++       * Keeping reading chunks and inserting exceptions until
++       * we find a partially full area.
++       */
++      for (area = 0; full; area++) {
++              r = area_io(ps, area, READ);
++              if (r)
++                      return r;
++
++              r = insert_exceptions(ps, &full);
++              if (r)
++                      return r;
++
++              area++;
++      }
++
++      return 0;
++}
++
++static inline struct pstore *get_info(struct exception_store *store)
++{
++      return (struct pstore *) store->context;
++}
++
++static void persistent_fraction_full(struct exception_store *store,
++                                   sector_t *numerator, sector_t *denominator)
++{
++      *numerator = get_info(store)->next_free * store->snap->chunk_size;
++      *denominator = get_dev_size(store->snap->cow->dev);
++}
++
++static void persistent_destroy(struct exception_store *store)
++{
++      struct pstore *ps = get_info(store);
++
++      dm_io_put(sectors_to_pages(ps->chunk_size));
++      vfree(ps->callbacks);
++      free_area(ps);
++      kfree(ps);
++}
++
++static int persistent_read_metadata(struct exception_store *store)
++{
++      int r, new_snapshot;
++      struct pstore *ps = get_info(store);
++
++      /*
++       * Read the snapshot header.
++       */
++      r = read_header(ps, &new_snapshot);
++      if (r)
++              return r;
++
++      /*
++       * Do we need to setup a new snapshot ?
++       */
++      if (new_snapshot) {
++              r = write_header(ps);
++              if (r) {
++                      DMWARN("write_header failed");
++                      return r;
++              }
++
++              r = zero_area(ps, 0);
++              if (r) {
++                      DMWARN("zero_area(0) failed");
++                      return r;
++              }
++
++      } else {
++              /*
++               * Sanity checks.
++               */
++              if (!ps->valid) {
++                      DMWARN("snapshot is marked invalid");
++                      return -EINVAL;
++              }
++
++              if (ps->version != SNAPSHOT_DISK_VERSION) {
++                      DMWARN("unable to handle snapshot disk version %d",
++                             ps->version);
++                      return -EINVAL;
++              }
++
++              /*
++               * Read the metadata.
++               */
++              r = read_exceptions(ps);
++              if (r)
++                      return r;
++      }
++
++      return 0;
++}
++
++static int persistent_prepare(struct exception_store *store,
++                            struct exception *e)
++{
++      struct pstore *ps = get_info(store);
++      uint32_t stride;
++      sector_t size = get_dev_size(store->snap->cow->dev);
++
++      /* Is there enough room ? */
++      if (size < ((ps->next_free + 1) * store->snap->chunk_size))
++              return -ENOSPC;
++
++      e->new_chunk = ps->next_free;
++
++      /*
++       * Move onto the next free pending, making sure to take
++       * into account the location of the metadata chunks.
++       */
++      stride = (ps->exceptions_per_area + 1);
++      if ((++ps->next_free % stride) == 1)
++              ps->next_free++;
++
++      atomic_inc(&ps->pending_count);
++      return 0;
++}
++
++static void persistent_commit(struct exception_store *store,
++                            struct exception *e,
++                            void (*callback) (void *, int success),
++                            void *callback_context)
++{
++      int r;
++      unsigned int i;
++      struct pstore *ps = get_info(store);
++      struct disk_exception de;
++      struct commit_callback *cb;
++
++      de.old_chunk = e->old_chunk;
++      de.new_chunk = e->new_chunk;
++      write_exception(ps, ps->current_committed++, &de);
++
++      /*
++       * Add the callback to the back of the array.  This code
++       * is the only place where the callback array is
++       * manipulated, and we know that it will never be called
++       * multiple times concurrently.
++       */
++      cb = ps->callbacks + ps->callback_count++;
++      cb->callback = callback;
++      cb->context = callback_context;
++
++      /*
++       * If there are no more exceptions in flight, or we have
++       * filled this metadata area we commit the exceptions to
++       * disk.
++       */
++      if (atomic_dec_and_test(&ps->pending_count) ||
++          (ps->current_committed == ps->exceptions_per_area)) {
++              r = area_io(ps, ps->current_area, WRITE);
++              if (r)
++                      ps->valid = 0;
++
++              for (i = 0; i < ps->callback_count; i++) {
++                      cb = ps->callbacks + i;
++                      cb->callback(cb->context, r == 0 ? 1 : 0);
++              }
++
++              ps->callback_count = 0;
++      }
++
++      /*
++       * Have we completely filled the current area ?
++       */
++      if (ps->current_committed == ps->exceptions_per_area) {
++              ps->current_committed = 0;
++              r = zero_area(ps, ps->current_area + 1);
++              if (r)
++                      ps->valid = 0;
++      }
++}
++
++static void persistent_drop(struct exception_store *store)
++{
++      struct pstore *ps = get_info(store);
++
++      ps->valid = 0;
++      if (write_header(ps))
++              DMWARN("write header failed");
++}
++
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
++{
++      int r;
++      struct pstore *ps;
++
++      r = dm_io_get(sectors_to_pages(chunk_size));
++      if (r)
++              return r;
++
++      /* allocate the pstore */
++      ps = kmalloc(sizeof(*ps), GFP_KERNEL);
++      if (!ps) {
++              r = -ENOMEM;
++              goto bad;
++      }
++
++      ps->snap = store->snap;
++      ps->valid = 1;
++      ps->version = SNAPSHOT_DISK_VERSION;
++      ps->chunk_size = chunk_size;
++      ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
++          sizeof(struct disk_exception);
++      ps->next_free = 2;      /* skipping the header and first area */
++      ps->current_committed = 0;
++
++      r = alloc_area(ps);
++      if (r)
++              goto bad;
++
++      /*
++       * Allocate space for all the callbacks.
++       */
++      ps->callback_count = 0;
++      atomic_set(&ps->pending_count, 0);
++      ps->callbacks = vcalloc(ps->exceptions_per_area,
++                              sizeof(*ps->callbacks));
++
++      if (!ps->callbacks) {
++              r = -ENOMEM;
++              goto bad;
++      }
++
++      store->destroy = persistent_destroy;
++      store->read_metadata = persistent_read_metadata;
++      store->prepare_exception = persistent_prepare;
++      store->commit_exception = persistent_commit;
++      store->drop_snapshot = persistent_drop;
++      store->fraction_full = persistent_fraction_full;
++      store->context = ps;
++
++      return 0;
++
++      bad:
++      dm_io_put(sectors_to_pages(chunk_size));
++      if (ps) {
++              if (ps->callbacks)
++                      vfree(ps->callbacks);
++
++              kfree(ps);
++      }
++      return r;
++}
++
++/*-----------------------------------------------------------------
++ * Implementation of the store for non-persistent snapshots.
++ *---------------------------------------------------------------*/
++struct transient_c {
++      sector_t next_free;
++};
++
++void transient_destroy(struct exception_store *store)
++{
++      kfree(store->context);
++}
++
++int transient_read_metadata(struct exception_store *store)
++{
++      return 0;
++}
++
++int transient_prepare(struct exception_store *store, struct exception *e)
++{
++      struct transient_c *tc = (struct transient_c *) store->context;
++      sector_t size = get_dev_size(store->snap->cow->dev);
++
++      if (size < (tc->next_free + store->snap->chunk_size))
++              return -1;
++
++      e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
++      tc->next_free += store->snap->chunk_size;
++
++      return 0;
++}
++
++void transient_commit(struct exception_store *store,
++                    struct exception *e,
++                    void (*callback) (void *, int success),
++                    void *callback_context)
++{
++      /* Just succeed */
++      callback(callback_context, 1);
++}
++
++static void transient_fraction_full(struct exception_store *store,
++                                  sector_t *numerator, sector_t *denominator)
++{
++      *numerator = ((struct transient_c *) store->context)->next_free;
++      *denominator = get_dev_size(store->snap->cow->dev);
++}
++
++int dm_create_transient(struct exception_store *store,
++                      struct dm_snapshot *s, int blocksize)
++{
++      struct transient_c *tc;
++
++      memset(store, 0, sizeof(*store));
++      store->destroy = transient_destroy;
++      store->read_metadata = transient_read_metadata;
++      store->prepare_exception = transient_prepare;
++      store->commit_exception = transient_commit;
++      store->fraction_full = transient_fraction_full;
++      store->snap = s;
++
++      tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
++      if (!tc)
++              return -ENOMEM;
++
++      tc->next_free = 0;
++      store->context = tc;
++
++      return 0;
++}
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-io.c linux-2.4.21/drivers/md/dm-io.c
+--- linux-2.4.21-dm-real/drivers/md/dm-io.c    Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-io.c    Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,344 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm-io.h"
++
++#include <linux/mempool.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/* FIXME: can we shrink this ? */
++struct io_context {
++      int rw;
++      unsigned int error;
++      atomic_t count;
++      struct task_struct *sleeper;
++      io_notify_fn callback;
++      void *context;
++};
++
++/*
++ * We maintain a pool of buffer heads for dispatching the io.
++ */
++static unsigned int _num_bhs;
++static mempool_t *_buffer_pool;
++
++/*
++ * io contexts are only dynamically allocated for asynchronous
++ * io.  Since async io is likely to be the majority of io we'll
++ * have the same number of io contexts as buffer heads ! (FIXME:
++ * must reduce this).
++ */
++mempool_t *_io_pool;
++
++static void *alloc_bh(int gfp_mask, void *pool_data)
++{
++      struct buffer_head *bh;
++
++      bh = kmem_cache_alloc(bh_cachep, gfp_mask);
++      if (bh) {
++              bh->b_reqnext = NULL;
++              init_waitqueue_head(&bh->b_wait);
++              INIT_LIST_HEAD(&bh->b_inode_buffers);
++      }
++
++      return bh;
++}
++
++static void *alloc_io(int gfp_mask, void *pool_data)
++{
++      return kmalloc(sizeof(struct io_context), gfp_mask);
++}
++
++static void free_io(void *element, void *pool_data)
++{
++      kfree(element);
++}
++
++static unsigned int pages_to_buffers(unsigned int pages)
++{
++      return 4 * pages;       /* too many ? */
++}
++
++static int resize_pool(unsigned int new_bhs)
++{
++      int r = 0;
++
++      if (_buffer_pool) {
++              if (new_bhs == 0) {
++                      /* free off the pools */
++                      mempool_destroy(_buffer_pool);
++                      mempool_destroy(_io_pool);
++                      _buffer_pool = _io_pool = NULL;
++              } else {
++                      /* resize the pools */
++                      r = mempool_resize(_buffer_pool, new_bhs, GFP_KERNEL);
++                      if (!r)
++                              r = mempool_resize(_io_pool,
++                                                 new_bhs, GFP_KERNEL);
++              }
++      } else {
++              /* create new pools */
++              _buffer_pool = mempool_create(new_bhs, alloc_bh,
++                                            mempool_free_slab, bh_cachep);
++              if (!_buffer_pool)
++                      r = -ENOMEM;
++
++              _io_pool = mempool_create(new_bhs, alloc_io, free_io, NULL);
++              if (!_io_pool) {
++                      mempool_destroy(_buffer_pool);
++                      _buffer_pool = NULL;
++                      r = -ENOMEM;
++              }
++      }
++
++      if (!r)
++              _num_bhs = new_bhs;
++
++      return r;
++}
++
++int dm_io_get(unsigned int num_pages)
++{
++      return resize_pool(_num_bhs + pages_to_buffers(num_pages));
++}
++
++void dm_io_put(unsigned int num_pages)
++{
++      resize_pool(_num_bhs - pages_to_buffers(num_pages));
++}
++
++/*-----------------------------------------------------------------
++ * We need to keep track of which region a buffer is doing io
++ * for.  In order to save a memory allocation we store this in an
++ * unused field of the buffer head, and provide these access
++ * functions.
++ *
++ * FIXME: add compile time check that an unsigned int can fit
++ * into a pointer.
++ *
++ *---------------------------------------------------------------*/
++static inline void bh_set_region(struct buffer_head *bh, unsigned int region)
++{
++      bh->b_journal_head = (void *) region;
++}
++
++static inline int bh_get_region(struct buffer_head *bh)
++{
++      return (unsigned int) bh->b_journal_head;
++}
++
++/*-----------------------------------------------------------------
++ * We need an io object to keep track of the number of bhs that
++ * have been dispatched for a particular io.
++ *---------------------------------------------------------------*/
++static void dec_count(struct io_context *io, unsigned int region, int error)
++{
++      if (error)
++              set_bit(region, &io->error);
++
++      if (atomic_dec_and_test(&io->count)) {
++              if (io->sleeper)
++                      wake_up_process(io->sleeper);
++
++              else {
++                      int r = io->error;
++                      io_notify_fn fn = io->callback;
++                      void *context = io->context;
++
++                      mempool_free(io, _io_pool);
++                      fn(r, context);
++              }
++      }
++}
++
++static void endio(struct buffer_head *bh, int uptodate)
++{
++      struct io_context *io = (struct io_context *) bh->b_private;
++
++      if (!uptodate && io->rw != WRITE) {
++              /*
++               * We need to zero this region, otherwise people
++               * like kcopyd may write the arbitrary contents
++               * of the page.
++               */
++              memset(bh->b_data, 0, bh->b_size);
++      }
++
++      dec_count((struct io_context *) bh->b_private,
++                bh_get_region(bh), !uptodate);
++      mempool_free(bh, _buffer_pool);
++}
++
++/*
++ * Primitives for alignment calculations.
++ */
++int fls(unsigned n)
++{
++      return generic_fls32(n);
++}
++
++static inline int log2_floor(unsigned n)
++{
++      return ffs(n) - 1;
++}
++
++static inline int log2_align(unsigned n)
++{
++      return fls(n) - 1;
++}
++
++/*
++ * Returns the next block for io.
++ */
++static int do_page(kdev_t dev, sector_t *block, sector_t end_block,
++                 unsigned int block_size,
++                 struct page *p, unsigned int offset,
++                 unsigned int region, struct io_context *io)
++{
++      struct buffer_head *bh;
++      sector_t b = *block;
++      sector_t blocks_per_page = PAGE_SIZE / block_size;
++      unsigned int this_size; /* holds the size of the current io */
++      unsigned int len;
++
++      while ((offset < PAGE_SIZE) && (b != end_block)) {
++              bh = mempool_alloc(_buffer_pool, GFP_NOIO);
++              init_buffer(bh, endio, io);
++              bh_set_region(bh, region);
++
++              /*
++               * Block size must be a power of 2 and aligned
++               * correctly.
++               */
++              len = end_block - b;
++              this_size = min((sector_t) 1 << log2_floor(b), blocks_per_page);
++              if (this_size > len)
++                      this_size = 1 << log2_align(len);
++
++              /*
++               * Add in the job offset.
++               */
++              bh->b_blocknr = (b / this_size);
++              bh->b_size = block_size * this_size;
++              set_bh_page(bh, p, offset);
++              bh->b_this_page = bh;
++
++              bh->b_dev = dev;
++              atomic_set(&bh->b_count, 1);
++
++              bh->b_state = ((1 << BH_Uptodate) | (1 << BH_Mapped) |
++                             (1 << BH_Lock));
++
++              if (io->rw == WRITE)
++                      clear_bit(BH_Dirty, &bh->b_state);
++
++              atomic_inc(&io->count);
++              submit_bh(io->rw, bh);
++
++              b += this_size;
++              offset += block_size * this_size;
++      }
++
++      *block = b;
++      return (b == end_block);
++}
++
++static void do_region(unsigned int region, struct io_region *where,
++                    struct page *page, unsigned int offset,
++                    struct io_context *io)
++{
++      unsigned int block_size = get_hardsect_size(where->dev);
++      unsigned int sblock_size = block_size >> 9;
++      sector_t block = where->sector / sblock_size;
++      sector_t end_block = (where->sector + where->count) / sblock_size;
++
++      while (1) {
++              if (do_page(where->dev, &block, end_block, block_size,
++                          page, offset, region, io))
++                      break;
++
++              offset = 0;     /* only offset the first page */
++
++              page = list_entry(page->list.next, struct page, list);
++      }
++}
++
++static void dispatch_io(unsigned int num_regions, struct io_region *where,
++                      struct page *pages, unsigned int offset,
++                      struct io_context *io)
++{
++      int i;
++
++      for (i = 0; i < num_regions; i++)
++              if (where[i].count)
++                      do_region(i, where + i, pages, offset, io);
++
++      /*
++       * Drop the extra refence that we were holding to avoid
++       * the io being completed too early.
++       */
++      dec_count(io, 0, 0);
++}
++
++/*
++ * Synchronous io
++ */
++int dm_io_sync(unsigned int num_regions, struct io_region *where,
++             int rw, struct page *pages, unsigned int offset,
++             unsigned int *error_bits)
++{
++      struct io_context io;
++
++      BUG_ON(num_regions > 1 && rw != WRITE);
++
++      io.rw = rw;
++      io.error = 0;
++      atomic_set(&io.count, 1); /* see dispatch_io() */
++      io.sleeper = current;
++
++      dispatch_io(num_regions, where, pages, offset, &io);
++      run_task_queue(&tq_disk);
++
++      while (1) {
++              set_current_state(TASK_UNINTERRUPTIBLE);
++
++              if (!atomic_read(&io.count))
++                      break;
++
++              schedule();
++      }
++      set_current_state(TASK_RUNNING);
++
++      *error_bits = io.error;
++      return io.error ? -EIO : 0;
++}
++
++/*
++ * Asynchronous io
++ */
++int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
++              struct page *pages, unsigned int offset,
++              io_notify_fn fn, void *context)
++{
++      struct io_context *io = mempool_alloc(_io_pool, GFP_NOIO);
++
++      io->rw = rw;
++      io->error = 0;
++      atomic_set(&io->count, 1); /* see dispatch_io() */
++      io->sleeper = NULL;
++      io->callback = fn;
++      io->context = context;
++
++      dispatch_io(num_regions, where, pages, offset, io);
++      return 0;
++}
++
++EXPORT_SYMBOL(dm_io_get);
++EXPORT_SYMBOL(dm_io_put);
++EXPORT_SYMBOL(dm_io_sync);
++EXPORT_SYMBOL(dm_io_async);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-io.h linux-2.4.21/drivers/md/dm-io.h
+--- linux-2.4.21-dm-real/drivers/md/dm-io.h    Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-io.h    Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,86 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef _DM_IO_H
++#define _DM_IO_H
++
++#include "dm.h"
++
++#include <linux/list.h>
++
++/* Move these to bitops.h eventually */
++/* Improved generic_fls algorithm (in 2.4 there is no generic_fls so far) */
++/* (c) 2002, D.Phillips and Sistina Software */
++/* Licensed under Version 2 of the GPL */
++
++static unsigned generic_fls8(unsigned n)
++{
++      return n & 0xf0 ?
++          n & 0xc0 ? (n >> 7) + 7 : (n >> 5) + 5:
++          n & 0x0c ? (n >> 3) + 3 : n - ((n + 1) >> 2);
++}
++
++static inline unsigned generic_fls16(unsigned n)
++{
++      return  n & 0xff00? generic_fls8(n >> 8) + 8 : generic_fls8(n);
++}
++
++static inline unsigned generic_fls32(unsigned n)
++{
++      return  n & 0xffff0000 ? generic_fls16(n >> 16) + 16 : generic_fls16(n);
++}
++
++/* FIXME make this configurable */
++#define DM_MAX_IO_REGIONS 8
++
++struct io_region {
++      kdev_t dev;
++      sector_t sector;
++      sector_t count;
++};
++
++
++/*
++ * 'error' is a bitset, with each bit indicating whether an error
++ * occurred doing io to the corresponding region.
++ */
++typedef void (*io_notify_fn)(unsigned int error, void *context);
++
++
++/*
++ * Before anyone uses the IO interface they should call
++ * dm_io_get(), specifying roughly how many pages they are
++ * expecting to perform io on concurrently.
++ *
++ * This function may block.
++ */
++int dm_io_get(unsigned int num_pages);
++void dm_io_put(unsigned int num_pages);
++
++
++/*
++ * Synchronous IO.
++ *
++ * Please ensure that the rw flag in the next two functions is
++ * either READ or WRITE, ie. we don't take READA.  Any
++ * regions with a zero count field will be ignored.
++ */
++int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
++             struct page *pages, unsigned int offset,
++             unsigned int *error_bits);
++
++
++/*
++ * Aynchronous IO.
++ *
++ * The 'where' array may be safely allocated on the stack since
++ * the function takes a copy.
++ */
++int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
++              struct page *pages, unsigned int offset,
++              io_notify_fn fn, void *context);
++
++#endif
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-ioctl.c linux-2.4.21/drivers/md/dm-ioctl.c
+--- linux-2.4.21-dm-real/drivers/md/dm-ioctl.c Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-ioctl.c Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,1272 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/miscdevice.h>
++#include <linux/dm-ioctl.h>
++#include <linux/init.h>
++#include <linux/wait.h>
++#include <linux/blk.h>
++#include <linux/slab.h>
++
++#include <asm/uaccess.h>
++
++#define DM_DRIVER_EMAIL "dm@uk.sistina.com"
++
++/*-----------------------------------------------------------------
++ * The ioctl interface needs to be able to look up devices by
++ * name or uuid.
++ *---------------------------------------------------------------*/
++struct hash_cell {
++      struct list_head name_list;
++      struct list_head uuid_list;
++
++      char *name;
++      char *uuid;
++      struct mapped_device *md;
++      struct dm_table *new_map;
++
++      /* I hate devfs */
++      devfs_handle_t devfs_entry;
++};
++
++#define NUM_BUCKETS 64
++#define MASK_BUCKETS (NUM_BUCKETS - 1)
++static struct list_head _name_buckets[NUM_BUCKETS];
++static struct list_head _uuid_buckets[NUM_BUCKETS];
++
++static devfs_handle_t _dev_dir;
++void dm_hash_remove_all(void);
++
++/*
++ * Guards access to both hash tables.
++ */
++static DECLARE_RWSEM(_hash_lock);
++
++static void init_buckets(struct list_head *buckets)
++{
++      unsigned int i;
++
++      for (i = 0; i < NUM_BUCKETS; i++)
++              INIT_LIST_HEAD(buckets + i);
++}
++
++int dm_hash_init(void)
++{
++      init_buckets(_name_buckets);
++      init_buckets(_uuid_buckets);
++      _dev_dir = devfs_mk_dir(0, DM_DIR, NULL);
++      return 0;
++}
++
++void dm_hash_exit(void)
++{
++      dm_hash_remove_all();
++      devfs_unregister(_dev_dir);
++}
++
++/*-----------------------------------------------------------------
++ * Hash function:
++ * We're not really concerned with the str hash function being
++ * fast since it's only used by the ioctl interface.
++ *---------------------------------------------------------------*/
++static unsigned int hash_str(const char *str)
++{
++      const unsigned int hash_mult = 2654435387U;
++      unsigned int h = 0;
++
++      while (*str)
++              h = (h + (unsigned int) *str++) * hash_mult;
++
++      return h & MASK_BUCKETS;
++}
++
++/*-----------------------------------------------------------------
++ * Code for looking up a device by name
++ *---------------------------------------------------------------*/
++static struct hash_cell *__get_name_cell(const char *str)
++{
++      struct list_head *tmp;
++      struct hash_cell *hc;
++      unsigned int h = hash_str(str);
++
++      list_for_each (tmp, _name_buckets + h) {
++              hc = list_entry(tmp, struct hash_cell, name_list);
++              if (!strcmp(hc->name, str))
++                      return hc;
++      }
++
++      return NULL;
++}
++
++static struct hash_cell *__get_uuid_cell(const char *str)
++{
++      struct list_head *tmp;
++      struct hash_cell *hc;
++      unsigned int h = hash_str(str);
++
++      list_for_each (tmp, _uuid_buckets + h) {
++              hc = list_entry(tmp, struct hash_cell, uuid_list);
++              if (!strcmp(hc->uuid, str))
++                      return hc;
++      }
++
++      return NULL;
++}
++
++/*-----------------------------------------------------------------
++ * Inserting, removing and renaming a device.
++ *---------------------------------------------------------------*/
++static inline char *kstrdup(const char *str)
++{
++      char *r = kmalloc(strlen(str) + 1, GFP_KERNEL);
++      if (r)
++              strcpy(r, str);
++      return r;
++}
++
++static struct hash_cell *alloc_cell(const char *name, const char *uuid,
++                                  struct mapped_device *md)
++{
++      struct hash_cell *hc;
++
++      hc = kmalloc(sizeof(*hc), GFP_KERNEL);
++      if (!hc)
++              return NULL;
++
++      hc->name = kstrdup(name);
++      if (!hc->name) {
++              kfree(hc);
++              return NULL;
++      }
++
++      if (!uuid)
++              hc->uuid = NULL;
++
++      else {
++              hc->uuid = kstrdup(uuid);
++              if (!hc->uuid) {
++                      kfree(hc->name);
++                      kfree(hc);
++                      return NULL;
++              }
++      }
++
++      INIT_LIST_HEAD(&hc->name_list);
++      INIT_LIST_HEAD(&hc->uuid_list);
++      hc->md = md;
++      hc->new_map = NULL;
++      return hc;
++}
++
++static void free_cell(struct hash_cell *hc)
++{
++      if (hc) {
++              kfree(hc->name);
++              kfree(hc->uuid);
++              kfree(hc);
++      }
++}
++
++/*
++ * devfs stuff.
++ */
++static int register_with_devfs(struct hash_cell *hc)
++{
++      kdev_t dev = dm_kdev(hc->md);
++
++      hc->devfs_entry =
++          devfs_register(_dev_dir, hc->name, DEVFS_FL_CURRENT_OWNER,
++                         major(dev), minor(dev),
++                         S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
++                         &dm_blk_dops, NULL);
++
++      return 0;
++}
++
++static int unregister_with_devfs(struct hash_cell *hc)
++{
++      devfs_unregister(hc->devfs_entry);
++      return 0;
++}
++
++/*
++ * The kdev_t and uuid of a device can never change once it is
++ * initially inserted.
++ */
++int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
++{
++      struct hash_cell *cell;
++
++      /*
++       * Allocate the new cells.
++       */
++      cell = alloc_cell(name, uuid, md);
++      if (!cell)
++              return -ENOMEM;
++
++      /*
++       * Insert the cell into both hash tables.
++       */
++      down_write(&_hash_lock);
++      if (__get_name_cell(name))
++              goto bad;
++
++      list_add(&cell->name_list, _name_buckets + hash_str(name));
++
++      if (uuid) {
++              if (__get_uuid_cell(uuid)) {
++                      list_del(&cell->name_list);
++                      goto bad;
++              }
++              list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
++      }
++      register_with_devfs(cell);
++      dm_get(md);
++      up_write(&_hash_lock);
++
++      return 0;
++
++      bad:
++      up_write(&_hash_lock);
++      free_cell(cell);
++      return -EBUSY;
++}
++
++void __hash_remove(struct hash_cell *hc)
++{
++      /* remove from the dev hash */
++      list_del(&hc->uuid_list);
++      list_del(&hc->name_list);
++      unregister_with_devfs(hc);
++      dm_put(hc->md);
++      if (hc->new_map)
++              dm_table_put(hc->new_map);
++      free_cell(hc);
++}
++
++void dm_hash_remove_all(void)
++{
++      int i;
++      struct hash_cell *hc;
++      struct list_head *tmp, *n;
++
++      down_write(&_hash_lock);
++      for (i = 0; i < NUM_BUCKETS; i++) {
++              list_for_each_safe (tmp, n, _name_buckets + i) {
++                      hc = list_entry(tmp, struct hash_cell, name_list);
++                      __hash_remove(hc);
++              }
++      }
++      up_write(&_hash_lock);
++}
++
++int dm_hash_rename(const char *old, const char *new)
++{
++      char *new_name, *old_name;
++      struct hash_cell *hc;
++
++      /*
++       * duplicate new.
++       */
++      new_name = kstrdup(new);
++      if (!new_name)
++              return -ENOMEM;
++
++      down_write(&_hash_lock);
++
++      /*
++       * Is new free ?
++       */
++      hc = __get_name_cell(new);
++      if (hc) {
++              DMWARN("asked to rename to an already existing name %s -> %s",
++                     old, new);
++              up_write(&_hash_lock);
++              kfree(new_name);
++              return -EBUSY;
++      }
++
++      /*
++       * Is there such a device as 'old' ?
++       */
++      hc = __get_name_cell(old);
++      if (!hc) {
++              DMWARN("asked to rename a non existent device %s -> %s",
++                     old, new);
++              up_write(&_hash_lock);
++              kfree(new_name);
++              return -ENXIO;
++      }
++
++      /*
++       * rename and move the name cell.
++       */
++      list_del(&hc->name_list);
++      old_name = hc->name;
++      hc->name = new_name;
++      list_add(&hc->name_list, _name_buckets + hash_str(new_name));
++
++      /* rename the device node in devfs */
++      unregister_with_devfs(hc);
++      register_with_devfs(hc);
++
++      up_write(&_hash_lock);
++      kfree(old_name);
++      return 0;
++}
++
++/*-----------------------------------------------------------------
++ * Implementation of the ioctl commands
++ *---------------------------------------------------------------*/
++/*
++ * All the ioctl commands get dispatched to functions with this
++ * prototype.
++ */
++typedef int (*ioctl_fn)(struct dm_ioctl *param, size_t param_size);
++
++static int remove_all(struct dm_ioctl *param, size_t param_size)
++{
++      dm_hash_remove_all();
++      param->data_size = 0;
++      return 0;
++}
++
++/*
++ * Round up the ptr to an 8-byte boundary.
++ */
++#define ALIGN_MASK 7
++static inline void *align_ptr(void *ptr)
++{
++      return (void *) (((size_t) (ptr + ALIGN_MASK)) & ~ALIGN_MASK);
++}
++
++/*
++ * Retrieves the data payload buffer from an already allocated
++ * struct dm_ioctl.
++ */
++static void *get_result_buffer(struct dm_ioctl *param, size_t param_size,
++                             size_t *len)
++{
++      param->data_start = align_ptr(param + 1) - (void *) param;
++
++      if (param->data_start < param_size)
++              *len = param_size - param->data_start;
++      else
++              *len = 0;
++
++      return ((void *) param) + param->data_start;
++}
++
++static int list_devices(struct dm_ioctl *param, size_t param_size)
++{
++      unsigned int i;
++      struct hash_cell *hc;
++      size_t len, needed = 0;
++      struct dm_name_list *nl, *old_nl = NULL;
++
++      down_write(&_hash_lock);
++
++      /*
++       * Loop through all the devices working out how much
++       * space we need.
++       */
++      for (i = 0; i < NUM_BUCKETS; i++) {
++              list_for_each_entry (hc, _name_buckets + i, name_list) {
++                      needed += sizeof(struct dm_name_list);
++                      needed += strlen(hc->name);
++                      needed += ALIGN_MASK;
++              }
++      }
++
++      /*
++       * Grab our output buffer.
++       */
++      nl = get_result_buffer(param, param_size, &len);
++      if (len < needed) {
++              param->flags |= DM_BUFFER_FULL_FLAG;
++              goto out;
++      }
++      param->data_size = param->data_start + needed;
++
++      nl->dev = 0;    /* Flags no data */
++
++      /*
++       * Now loop through filling out the names.
++       */
++      for (i = 0; i < NUM_BUCKETS; i++) {
++              list_for_each_entry (hc, _name_buckets + i, name_list) {
++                      if (old_nl)
++                              old_nl->next = (uint32_t) ((void *) nl -
++                                                         (void *) old_nl);
++
++                      nl->dev = dm_kdev(hc->md);
++                      nl->next = 0;
++                      strcpy(nl->name, hc->name);
++
++                      old_nl = nl;
++                      nl = align_ptr(((void *) ++nl) + strlen(hc->name) + 1);
++              }
++      }
++
++ out:
++      up_write(&_hash_lock);
++      return 0;
++}
++
++static int check_name(const char *name)
++{
++      if (strchr(name, '/')) {
++              DMWARN("invalid device name");
++              return -EINVAL;
++      }
++
++      return 0;
++}
++
++/*
++ * Fills in a dm_ioctl structure, ready for sending back to
++ * userland.
++ */
++static int __dev_status(struct mapped_device *md, struct dm_ioctl *param)
++{
++      kdev_t dev = dm_kdev(md);
++      struct dm_table *table;
++      struct block_device *bdev;
++
++      param->flags &= ~(DM_SUSPEND_FLAG | DM_READONLY_FLAG |
++                        DM_ACTIVE_PRESENT_FLAG);
++
++      if (dm_suspended(md))
++              param->flags |= DM_SUSPEND_FLAG;
++
++      param->dev = kdev_t_to_nr(dev);
++
++      if (is_read_only(dev))
++              param->flags |= DM_READONLY_FLAG;
++
++      param->event_nr = dm_get_event_nr(md);
++
++      table = dm_get_table(md);
++      if (table) {
++              param->flags |= DM_ACTIVE_PRESENT_FLAG;
++              param->target_count = dm_table_get_num_targets(table);
++              dm_table_put(table);
++      } else
++              param->target_count = 0;
++
++      bdev = bdget(param->dev);
++      if (!bdev)
++              return -ENXIO;
++      param->open_count = bdev->bd_openers;
++      bdput(bdev);
++
++      return 0;
++}
++
++static int dev_create(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      kdev_t dev = 0;
++      struct mapped_device *md;
++
++      r = check_name(param->name);
++      if (r)
++              return r;
++
++      if (param->flags & DM_PERSISTENT_DEV_FLAG)
++              dev = to_kdev_t(param->dev);
++
++      r = dm_create(dev, &md);
++      if (r)
++              return r;
++
++      r = dm_hash_insert(param->name, *param->uuid ? param->uuid : NULL, md);
++      if (r) {
++              dm_put(md);
++              return r;
++      }
++
++      param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
++
++      r = __dev_status(md, param);
++      dm_put(md);
++
++      return r;
++}
++
++/*
++ * Always use UUID for lookups if it's present, otherwise use name.
++ */
++static inline struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
++{
++      return *param->uuid ?
++          __get_uuid_cell(param->uuid) : __get_name_cell(param->name);
++}
++
++static inline struct mapped_device *find_device(struct dm_ioctl *param)
++{
++      struct hash_cell *hc;
++      struct mapped_device *md = NULL;
++
++      down_read(&_hash_lock);
++      hc = __find_device_hash_cell(param);
++      if (hc) {
++              md = hc->md;
++
++              /*
++               * Sneakily write in both the name and the uuid
++               * while we have the cell.
++               */
++              strncpy(param->name, hc->name, sizeof(param->name));
++              if (hc->uuid)
++                      strncpy(param->uuid, hc->uuid, sizeof(param->uuid) - 1);
++              else
++                      param->uuid[0] = '\0';
++
++              if (hc->new_map)
++                      param->flags |= DM_INACTIVE_PRESENT_FLAG;
++              else
++                      param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
++
++              dm_get(md);
++      }
++      up_read(&_hash_lock);
++
++      return md;
++}
++
++static int dev_remove(struct dm_ioctl *param, size_t param_size)
++{
++      struct hash_cell *hc;
++
++      down_write(&_hash_lock);
++      hc = __find_device_hash_cell(param);
++
++      if (!hc) {
++              DMWARN("device doesn't appear to be in the dev hash table.");
++              up_write(&_hash_lock);
++              return -ENXIO;
++      }
++
++      __hash_remove(hc);
++      up_write(&_hash_lock);
++      param->data_size = 0;
++      return 0;
++}
++
++/*
++ * Check a string doesn't overrun the chunk of
++ * memory we copied from userland.
++ */
++static int invalid_str(char *str, void *end)
++{
++      while ((void *) str < end)
++              if (!*str++)
++                      return 0;
++
++      return -EINVAL;
++}
++
++static int dev_rename(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      char *new_name = (char *) param + param->data_start;
++
++      if (new_name < (char *) (param + 1) ||
++          invalid_str(new_name, (void *) param + param_size)) {
++              DMWARN("Invalid new logical volume name supplied.");
++              return -EINVAL;
++      }
++
++      r = check_name(new_name);
++      if (r)
++              return r;
++
++      param->data_size = 0;
++      return dm_hash_rename(param->name, new_name);
++}
++
++static int suspend(struct dm_ioctl *param)
++{
++      int r = 0;
++      struct mapped_device *md;
++
++      md = find_device(param);
++      if (!md)
++              return -ENXIO;
++
++      if (!dm_suspended(md))
++              r = dm_suspend(md);
++
++      if (!r)
++              r = __dev_status(md, param);
++
++      dm_put(md);
++      return r;
++}
++
++static int resume(struct dm_ioctl *param)
++{
++      int r = 0;
++      struct hash_cell *hc;
++      struct mapped_device *md;
++      struct dm_table *new_map;
++
++      down_write(&_hash_lock);
++
++      hc = __find_device_hash_cell(param);
++      if (!hc) {
++              DMWARN("device doesn't appear to be in the dev hash table.");
++              up_write(&_hash_lock);
++              return -ENXIO;
++      }
++
++      md = hc->md;
++      dm_get(md);
++
++      new_map = hc->new_map;
++      hc->new_map = NULL;
++      param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
++
++      up_write(&_hash_lock);
++
++      /* Do we need to load a new map ? */
++      if (new_map) {
++              /* Suspend if it isn't already suspended */
++              if (!dm_suspended(md))
++                      dm_suspend(md);
++
++              r = dm_swap_table(md, new_map);
++              if (r) {
++                      dm_put(md);
++                      dm_table_put(new_map);
++                      return r;
++              }
++
++              if (dm_table_get_mode(new_map) & FMODE_WRITE)
++                      set_device_ro(dm_kdev(md), 0);
++              else
++                      set_device_ro(dm_kdev(md), 1);
++
++              dm_table_put(new_map);
++      }
++
++      if (dm_suspended(md))
++              r = dm_resume(md);
++
++      if (!r)
++              r = __dev_status(md, param);
++
++      dm_put(md);
++      return r;
++}
++
++/*
++ * Set or unset the suspension state of a device.
++ * If the device already is in the requested state we just return its status.
++ */
++static int dev_suspend(struct dm_ioctl *param, size_t param_size)
++{
++      if (param->flags & DM_SUSPEND_FLAG)
++              return suspend(param);
++
++      return resume(param);
++}
++
++/*
++ * Copies device info back to user space, used by
++ * the create and info ioctls.
++ */
++static int dev_status(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct mapped_device *md;
++
++      md = find_device(param);
++      if (!md)
++              return -ENXIO;
++
++      r = __dev_status(md, param);
++      dm_put(md);
++      return r;
++}
++
++/*
++ * Wait for a device to report an event
++ */
++static int dev_wait(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct mapped_device *md;
++      DECLARE_WAITQUEUE(wq, current);
++
++      md = find_device(param);
++      if (!md)
++              return -ENXIO;
++
++      /*
++       * Wait for a notification event
++       */
++      set_current_state(TASK_INTERRUPTIBLE);
++      if (!dm_add_wait_queue(md, &wq, param->event_nr)) {
++              schedule();
++              dm_remove_wait_queue(md, &wq);
++      }
++      set_current_state(TASK_RUNNING);
++
++      /*
++       * The userland program is going to want to know what
++       * changed to trigger the event, so we may as well tell
++       * him and save an ioctl.
++       */
++      r = __dev_status(md, param);
++
++      dm_put(md);
++      return r;
++}
++
++static inline int get_mode(struct dm_ioctl *param)
++{
++      int mode = FMODE_READ | FMODE_WRITE;
++
++      if (param->flags & DM_READONLY_FLAG)
++              mode = FMODE_READ;
++
++      return mode;
++}
++
++static int next_target(struct dm_target_spec *last, uint32_t next, void *end,
++                     struct dm_target_spec **spec, char **target_params)
++{
++      *spec = (struct dm_target_spec *) ((unsigned char *) last + next);
++      *target_params = (char *) (*spec + 1);
++
++      if (*spec < (last + 1))
++              return -EINVAL;
++
++      return invalid_str(*target_params, end);
++}
++
++static int populate_table(struct dm_table *table, struct dm_ioctl *param,
++                        size_t param_size)
++{
++      int r;
++      unsigned int i = 0;
++      struct dm_target_spec *spec = (struct dm_target_spec *) param;
++      uint32_t next = param->data_start;
++      void *end = (void *) param + param_size;
++      char *target_params;
++
++      if (!param->target_count) {
++              DMWARN("populate_table: no targets specified");
++              return -EINVAL;
++      }
++
++      for (i = 0; i < param->target_count; i++) {
++
++              r = next_target(spec, next, end, &spec, &target_params);
++              if (r) {
++                      DMWARN("unable to find target");
++                      return r;
++              }
++
++              r = dm_table_add_target(table, spec->target_type,
++                                      (sector_t) spec->sector_start,
++                                      (sector_t) spec->length,
++                                      target_params);
++              if (r) {
++                      DMWARN("error adding target to table");
++                      return r;
++              }
++
++              next = spec->next;
++      }
++
++      return dm_table_complete(table);
++}
++
++static int table_load(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct hash_cell *hc;
++      struct dm_table *t;
++
++      r = dm_table_create(&t, get_mode(param));
++      if (r)
++              return r;
++
++      r = populate_table(t, param, param_size);
++      if (r) {
++              dm_table_put(t);
++              return r;
++      }
++
++      down_write(&_hash_lock);
++      hc = __find_device_hash_cell(param);
++      if (!hc) {
++              DMWARN("device doesn't appear to be in the dev hash table.");
++              up_write(&_hash_lock);
++              return -ENXIO;
++      }
++
++      hc->new_map = t;
++      param->flags |= DM_INACTIVE_PRESENT_FLAG;
++
++      r = __dev_status(hc->md, param);
++      up_write(&_hash_lock);
++      return r;
++}
++
++static int table_clear(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct hash_cell *hc;
++
++      down_write(&_hash_lock);
++
++      hc = __find_device_hash_cell(param);
++      if (!hc) {
++              DMWARN("device doesn't appear to be in the dev hash table.");
++              up_write(&_hash_lock);
++              return -ENXIO;
++      }
++
++      if (hc->new_map) {
++              dm_table_put(hc->new_map);
++              hc->new_map = NULL;
++      }
++
++      param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
++
++      r = __dev_status(hc->md, param);
++      up_write(&_hash_lock);
++      return r;
++}
++
++/*
++ * Retrieves a list of devices used by a particular dm device.
++ */
++static void retrieve_deps(struct dm_table *table, struct dm_ioctl *param,
++                        size_t param_size)
++{
++      unsigned int count = 0;
++      struct list_head *tmp;
++      size_t len, needed;
++      struct dm_target_deps *deps;
++
++      deps = get_result_buffer(param, param_size, &len);
++
++      /*
++       * Count the devices.
++       */
++      list_for_each(tmp, dm_table_get_devices(table))
++              count++;
++
++      /*
++       * Check we have enough space.
++       */
++      needed = sizeof(*deps) + (sizeof(*deps->dev) * count);
++      if (len < needed) {
++              param->flags |= DM_BUFFER_FULL_FLAG;
++              return;
++      }
++
++      /*
++       * Fill in the devices.
++       */
++      deps->count = count;
++      count = 0;
++      list_for_each(tmp, dm_table_get_devices(table)) {
++              struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++              deps->dev[count++] = dd->bdev->bd_dev;
++      }
++
++      param->data_size = param->data_start + needed;
++}
++
++static int table_deps(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct mapped_device *md;
++      struct dm_table *table;
++
++      md = find_device(param);
++      if (!md)
++              return -ENXIO;
++
++      r = __dev_status(md, param);
++      if (r)
++              goto out;
++
++      table = dm_get_table(md);
++      if (table) {
++              retrieve_deps(table, param, param_size);
++              dm_table_put(table);
++      }
++
++ out:
++      dm_put(md);
++      return r;
++}
++
++/*
++ * Build up the status struct for each target
++ */
++static void retrieve_status(struct dm_table *table, struct dm_ioctl *param,
++                          size_t param_size)
++{
++      unsigned int i, num_targets;
++      struct dm_target_spec *spec;
++      char *outbuf, *outptr;
++      status_type_t type;
++      size_t remaining, len, used = 0;
++
++      outptr = outbuf = get_result_buffer(param, param_size, &len);
++
++      if (param->flags & DM_STATUS_TABLE_FLAG)
++              type = STATUSTYPE_TABLE;
++      else
++              type = STATUSTYPE_INFO;
++
++      /* Get all the target info */
++      num_targets = dm_table_get_num_targets(table);
++      for (i = 0; i < num_targets; i++) {
++              struct dm_target *ti = dm_table_get_target(table, i);
++
++              remaining = len - (outptr - outbuf);
++              if (remaining < sizeof(struct dm_target_spec)) {
++                      param->flags |= DM_BUFFER_FULL_FLAG;
++                      break;
++              }
++
++              spec = (struct dm_target_spec *) outptr;
++
++              spec->status = 0;
++              spec->sector_start = ti->begin;
++              spec->length = ti->len;
++              strncpy(spec->target_type, ti->type->name,
++                      sizeof(spec->target_type));
++
++              outptr += sizeof(struct dm_target_spec);
++              remaining = len - (outptr - outbuf);
++
++              /* Get the status/table string from the target driver */
++              if (ti->type->status) {
++                      if (ti->type->status(ti, type, outptr, remaining)) {
++                              param->flags |= DM_BUFFER_FULL_FLAG;
++                              break;
++                      }
++              } else
++                      outptr[0] = '\0';
++
++              outptr += strlen(outptr) + 1;
++              used = param->data_start + (outptr - outbuf);
++
++              align_ptr(outptr);
++              spec->next = outptr - outbuf;
++      }
++
++      if (used)
++              param->data_size = used;
++
++      param->target_count = num_targets;
++}
++
++/*
++ * Return the status of a device as a text string for each
++ * target.
++ */
++static int table_status(struct dm_ioctl *param, size_t param_size)
++{
++      int r;
++      struct mapped_device *md;
++      struct dm_table *table;
++
++      md = find_device(param);
++      if (!md)
++              return -ENXIO;
++
++      r = __dev_status(md, param);
++      if (r)
++              goto out;
++ 
++      table = dm_get_table(md);
++      if (table) {
++              retrieve_status(table, param, param_size);
++              dm_table_put(table);
++      }
++
++ out:
++      dm_put(md);
++      return r;
++}
++
++/*-----------------------------------------------------------------
++ * Implementation of open/close/ioctl on the special char
++ * device.
++ *---------------------------------------------------------------*/
++static ioctl_fn lookup_ioctl(unsigned int cmd)
++{
++      static struct {
++              int cmd;
++              ioctl_fn fn;
++      } _ioctls[] = {
++              {DM_VERSION_CMD, NULL}, /* version is dealt with elsewhere */
++              {DM_REMOVE_ALL_CMD, remove_all},
++              {DM_LIST_DEVICES_CMD, list_devices},
++
++              {DM_DEV_CREATE_CMD, dev_create},
++              {DM_DEV_REMOVE_CMD, dev_remove},
++              {DM_DEV_RENAME_CMD, dev_rename},
++              {DM_DEV_SUSPEND_CMD, dev_suspend},
++              {DM_DEV_STATUS_CMD, dev_status},
++              {DM_DEV_WAIT_CMD, dev_wait},
++
++              {DM_TABLE_LOAD_CMD, table_load},
++              {DM_TABLE_CLEAR_CMD, table_clear},
++              {DM_TABLE_DEPS_CMD, table_deps},
++              {DM_TABLE_STATUS_CMD, table_status}
++      };
++
++      return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
++}
++
++/*
++ * As well as checking the version compatibility this always
++ * copies the kernel interface version out.
++ */
++static int check_version(unsigned int cmd, struct dm_ioctl *user)
++{
++      uint32_t version[3];
++      int r = 0;
++
++      if (copy_from_user(version, user->version, sizeof(version)))
++              return -EFAULT;
++
++      if ((DM_VERSION_MAJOR != version[0]) ||
++          (DM_VERSION_MINOR < version[1])) {
++              DMWARN("ioctl interface mismatch: "
++                     "kernel(%u.%u.%u), user(%u.%u.%u), cmd(%d)",
++                     DM_VERSION_MAJOR, DM_VERSION_MINOR,
++                     DM_VERSION_PATCHLEVEL,
++                     version[0], version[1], version[2], cmd);
++              r = -EINVAL;
++      }
++
++      /*
++       * Fill in the kernel version.
++       */
++      version[0] = DM_VERSION_MAJOR;
++      version[1] = DM_VERSION_MINOR;
++      version[2] = DM_VERSION_PATCHLEVEL;
++      if (copy_to_user(user->version, version, sizeof(version)))
++              return -EFAULT;
++
++      return r;
++}
++
++static void free_params(struct dm_ioctl *param)
++{
++      vfree(param);
++}
++
++static int copy_params(struct dm_ioctl *user, struct dm_ioctl **param)
++{
++      struct dm_ioctl tmp, *dmi;
++
++      if (copy_from_user(&tmp, user, sizeof(tmp)))
++              return -EFAULT;
++
++      if (tmp.data_size < sizeof(tmp))
++              return -EINVAL;
++
++      dmi = (struct dm_ioctl *) vmalloc(tmp.data_size);
++      if (!dmi)
++              return -ENOMEM;
++
++      if (copy_from_user(dmi, user, tmp.data_size)) {
++              vfree(dmi);
++              return -EFAULT;
++      }
++
++      *param = dmi;
++      return 0;
++}
++
++static int validate_params(uint cmd, struct dm_ioctl *param)
++{
++      /* Always clear this flag */
++      param->flags &= ~DM_BUFFER_FULL_FLAG;
++
++      /* Ignores parameters */
++      if (cmd == DM_REMOVE_ALL_CMD || cmd == DM_LIST_DEVICES_CMD)
++              return 0;
++
++      /* Unless creating, either name or uuid but not both */
++      if (cmd != DM_DEV_CREATE_CMD) {
++              if ((!*param->uuid && !*param->name) ||
++                  (*param->uuid && *param->name)) {
++                      DMWARN("one of name or uuid must be supplied, cmd(%u)",
++                             cmd);
++                      return -EINVAL;
++              }
++      }
++
++      /* Ensure strings are terminated */
++      param->name[DM_NAME_LEN - 1] = '\0';
++      param->uuid[DM_UUID_LEN - 1] = '\0';
++
++      return 0;
++}
++
++static int ctl_ioctl(struct inode *inode, struct file *file,
++                   uint command, ulong u)
++{
++      int r = 0;
++      unsigned int cmd;
++      struct dm_ioctl *param;
++      struct dm_ioctl *user = (struct dm_ioctl *) u;
++      ioctl_fn fn = NULL;
++      size_t param_size;
++
++      /* only root can play with this */
++      if (!capable(CAP_SYS_ADMIN))
++              return -EACCES;
++
++      if (_IOC_TYPE(command) != DM_IOCTL)
++              return -ENOTTY;
++
++      cmd = _IOC_NR(command);
++
++      /*
++       * Check the interface version passed in.  This also
++       * writes out the kernel's interface version.
++       */
++      r = check_version(cmd, user);
++      if (r)
++              return r;
++
++      /*
++       * Nothing more to do for the version command.
++       */
++      if (cmd == DM_VERSION_CMD)
++              return 0;
++
++      fn = lookup_ioctl(cmd);
++      if (!fn) {
++              DMWARN("dm_ctl_ioctl: unknown command 0x%x", command);
++              return -ENOTTY;
++      }
++
++      /*
++       * FIXME: I don't like this, we're trying to avoid low
++       * memory issues when a device is suspended.
++       */
++      current->flags |= PF_MEMALLOC;
++
++      /*
++       * Copy the parameters into kernel space.
++       */
++      r = copy_params(user, &param);
++      if (r) {
++              current->flags &= ~PF_MEMALLOC;
++              return r;
++      }
++
++      r = validate_params(cmd, param);
++      if (r)
++              goto out;
++
++      param_size = param->data_size;
++      param->data_size = sizeof(*param);
++      r = fn(param, param_size);
++
++      /*
++       * Copy the results back to userland.
++       */
++      if (!r && copy_to_user(user, param, param->data_size))
++              r = -EFAULT;
++
++ out:
++      free_params(param);
++      current->flags &= ~PF_MEMALLOC;
++      return r;
++}
++
++static struct file_operations _ctl_fops = {
++      .ioctl   = ctl_ioctl,
++      .owner   = THIS_MODULE,
++};
++
++static devfs_handle_t _ctl_handle;
++
++static struct miscdevice _dm_misc = {
++      .minor = MISC_DYNAMIC_MINOR,
++      .name  = DM_NAME,
++      .fops  = &_ctl_fops
++};
++
++/*
++ * Create misc character device and link to DM_DIR/control.
++ */
++int __init dm_interface_init(void)
++{
++      int r;
++      char rname[64];
++
++      r = dm_hash_init();
++      if (r)
++              return r;
++
++      r = misc_register(&_dm_misc);
++      if (r) {
++              DMERR("misc_register failed for control device");
++              dm_hash_exit();
++              return r;
++      }
++
++      r = devfs_generate_path(_dm_misc.devfs_handle, rname + 3,
++                              sizeof rname - 3);
++      if (r == -ENOSYS)
++              goto done;      /* devfs not present */
++
++      if (r < 0) {
++              DMERR("devfs_generate_path failed for control device");
++              goto failed;
++      }
++
++      strncpy(rname + r, "../", 3);
++      r = devfs_mk_symlink(NULL, DM_DIR "/control",
++                           DEVFS_FL_DEFAULT, rname + r, &_ctl_handle, NULL);
++      if (r) {
++              DMERR("devfs_mk_symlink failed for control device");
++              goto failed;
++      }
++      devfs_auto_unregister(_dm_misc.devfs_handle, _ctl_handle);
++
++      done:
++      DMINFO("%d.%d.%d%s initialised: %s", DM_VERSION_MAJOR,
++             DM_VERSION_MINOR, DM_VERSION_PATCHLEVEL, DM_VERSION_EXTRA,
++             DM_DRIVER_EMAIL);
++      return 0;
++
++      failed:
++      misc_deregister(&_dm_misc);
++      dm_hash_exit();
++      return r;
++}
++
++void dm_interface_exit(void)
++{
++      if (misc_deregister(&_dm_misc) < 0)
++              DMERR("misc_deregister failed for control device");
++
++      dm_hash_exit();
++}
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-linear.c linux-2.4.21/drivers/md/dm-linear.c
+--- linux-2.4.21-dm-real/drivers/md/dm-linear.c        Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-linear.c        Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,123 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++/*
++ * Linear: maps a linear range of a device.
++ */
++struct linear_c {
++      struct dm_dev *dev;
++      sector_t start;
++};
++
++/*
++ * Construct a linear mapping: <dev_path> <offset>
++ */
++static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++      struct linear_c *lc;
++
++      if (argc != 2) {
++              ti->error = "dm-linear: Not enough arguments";
++              return -EINVAL;
++      }
++
++      lc = kmalloc(sizeof(*lc), GFP_KERNEL);
++      if (lc == NULL) {
++              ti->error = "dm-linear: Cannot allocate linear context";
++              return -ENOMEM;
++      }
++
++      if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
++              ti->error = "dm-linear: Invalid device sector";
++              goto bad;
++      }
++
++      if (dm_get_device(ti, argv[0], lc->start, ti->len,
++                        dm_table_get_mode(ti->table), &lc->dev)) {
++              ti->error = "dm-linear: Device lookup failed";
++              goto bad;
++      }
++
++      ti->private = lc;
++      return 0;
++
++      bad:
++      kfree(lc);
++      return -EINVAL;
++}
++
++static void linear_dtr(struct dm_target *ti)
++{
++      struct linear_c *lc = (struct linear_c *) ti->private;
++
++      dm_put_device(ti, lc->dev);
++      kfree(lc);
++}
++
++static int linear_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++                    union map_info *map_context)
++{
++      struct linear_c *lc = (struct linear_c *) ti->private;
++
++      bh->b_rdev = lc->dev->dev;
++      bh->b_rsector = lc->start + (bh->b_rsector - ti->begin);
++
++      return 1;
++}
++
++static int linear_status(struct dm_target *ti, status_type_t type,
++                       char *result, unsigned int maxlen)
++{
++      struct linear_c *lc = (struct linear_c *) ti->private;
++      kdev_t kdev;
++
++      switch (type) {
++      case STATUSTYPE_INFO:
++              result[0] = '\0';
++              break;
++
++      case STATUSTYPE_TABLE:
++              kdev = to_kdev_t(lc->dev->bdev->bd_dev);
++              snprintf(result, maxlen, "%s " SECTOR_FORMAT,
++                       dm_kdevname(kdev), lc->start);
++              break;
++      }
++      return 0;
++}
++
++static struct target_type linear_target = {
++      .name   = "linear",
++      .module = THIS_MODULE,
++      .ctr    = linear_ctr,
++      .dtr    = linear_dtr,
++      .map    = linear_map,
++      .status = linear_status,
++};
++
++int __init dm_linear_init(void)
++{
++      int r = dm_register_target(&linear_target);
++
++      if (r < 0)
++              DMERR("linear: register failed %d", r);
++
++      return r;
++}
++
++void dm_linear_exit(void)
++{
++      int r = dm_unregister_target(&linear_target);
++
++      if (r < 0)
++              DMERR("linear: unregister failed %d", r);
++}
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-log.c linux-2.4.21/drivers/md/dm-log.c
+--- linux-2.4.21-dm-real/drivers/md/dm-log.c   Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-log.c   Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,302 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++
++#include "dm-log.h"
++#include "dm-io.h"
++
++static LIST_HEAD(_log_types);
++static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
++
++int dm_register_dirty_log_type(struct dirty_log_type *type)
++{
++      spin_lock(&_lock);
++      type->use_count = 0;
++      if (type->module)
++              __MOD_INC_USE_COUNT(type->module);
++
++      list_add(&type->list, &_log_types);
++      spin_unlock(&_lock);
++
++      return 0;
++}
++
++int dm_unregister_dirty_log_type(struct dirty_log_type *type)
++{
++      spin_lock(&_lock);
++
++      if (type->use_count)
++              DMWARN("Attempt to unregister a log type that is still in use");
++      else {
++              list_del(&type->list);
++              if (type->module)
++                      __MOD_DEC_USE_COUNT(type->module);
++      }
++
++      spin_unlock(&_lock);
++
++      return 0;
++}
++
++static struct dirty_log_type *get_type(const char *type_name)
++{
++      struct dirty_log_type *type;
++      struct list_head *tmp;
++
++      spin_lock(&_lock);
++      list_for_each (tmp, &_log_types) {
++              type = list_entry(tmp, struct dirty_log_type, list);
++              if (!strcmp(type_name, type->name)) {
++                      type->use_count++;
++                      spin_unlock(&_lock);
++                      return type;
++              }
++      }
++
++      spin_unlock(&_lock);
++      return NULL;
++}
++
++static void put_type(struct dirty_log_type *type)
++{
++      spin_lock(&_lock);
++      type->use_count--;
++      spin_unlock(&_lock);
++}
++
++struct dirty_log *dm_create_dirty_log(const char *type_name, sector_t dev_size,
++                                    unsigned int argc, char **argv)
++{
++      struct dirty_log_type *type;
++      struct dirty_log *log;
++
++      log = kmalloc(sizeof(*log), GFP_KERNEL);
++      if (!log)
++              return NULL;
++
++      type = get_type(type_name);
++      if (!type) {
++              kfree(log);
++              return NULL;
++      }
++
++      log->type = type;
++      if (type->ctr(log, dev_size, argc, argv)) {
++              kfree(log);
++              put_type(type);
++              return NULL;
++      }
++
++      return log;
++}
++
++void dm_destroy_dirty_log(struct dirty_log *log)
++{
++      log->type->dtr(log);
++      put_type(log->type);
++      kfree(log);
++}
++
++
++/*-----------------------------------------------------------------
++ * In core log, ie. trivial, non-persistent
++ *
++ * For now we'll keep this simple and just have 2 bitsets, one
++ * for clean/dirty, the other for sync/nosync.  The sync bitset
++ * will be freed when everything is in sync.
++ *
++ * FIXME: problems with a 64bit sector_t
++ *---------------------------------------------------------------*/
++struct core_log {
++      sector_t region_size;
++      unsigned int region_count;
++      unsigned long *clean_bits;
++      unsigned long *sync_bits;
++      unsigned long *recovering_bits; /* FIXME: this seems excessive */
++
++      int sync_search;
++};
++
++static int core_ctr(struct dirty_log *log, sector_t dev_size,
++                  unsigned int argc, char **argv)
++{
++      struct core_log *clog;
++      sector_t region_size;
++      unsigned int region_count;
++      size_t bitset_size;
++
++      if (argc != 1) {
++              DMWARN("wrong number of arguments to core_log");
++              return -EINVAL;
++      }
++
++      if (sscanf(argv[0], SECTOR_FORMAT, &region_size) != 1) {
++              DMWARN("invalid region size string");
++              return -EINVAL;
++      }
++
++      region_count = dm_div_up(dev_size, region_size);
++
++      clog = kmalloc(sizeof(*clog), GFP_KERNEL);
++      if (!clog) {
++              DMWARN("couldn't allocate core log");
++              return -ENOMEM;
++      }
++
++      clog->region_size = region_size;
++      clog->region_count = region_count;
++
++      bitset_size = dm_round_up(region_count >> 3, sizeof(*clog->clean_bits));
++      clog->clean_bits = vmalloc(bitset_size);
++      if (!clog->clean_bits) {
++              DMWARN("couldn't allocate clean bitset");
++              kfree(clog);
++              return -ENOMEM;
++      }
++      memset(clog->clean_bits, -1, bitset_size);
++
++      clog->sync_bits = vmalloc(bitset_size);
++      if (!clog->sync_bits) {
++              DMWARN("couldn't allocate sync bitset");
++              vfree(clog->clean_bits);
++              kfree(clog);
++              return -ENOMEM;
++      }
++      memset(clog->sync_bits, 0, bitset_size);
++
++      clog->recovering_bits = vmalloc(bitset_size);
++      if (!clog->recovering_bits) {
++              DMWARN("couldn't allocate sync bitset");
++              vfree(clog->sync_bits);
++              vfree(clog->clean_bits);
++              kfree(clog);
++              return -ENOMEM;
++      }
++      memset(clog->recovering_bits, 0, bitset_size);
++      clog->sync_search = 0;
++      log->context = clog;
++      return 0;
++}
++
++static void core_dtr(struct dirty_log *log)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++      vfree(clog->clean_bits);
++      vfree(clog->sync_bits);
++      vfree(clog->recovering_bits);
++      kfree(clog);
++}
++
++static sector_t core_get_region_size(struct dirty_log *log)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++      return clog->region_size;
++}
++
++static int core_is_clean(struct dirty_log *log, region_t region)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++      return test_bit(region, clog->clean_bits);
++}
++
++static int core_in_sync(struct dirty_log *log, region_t region, int block)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++
++      return test_bit(region, clog->sync_bits) ? 1 : 0;
++}
++
++static int core_flush(struct dirty_log *log)
++{
++      /* no op */
++      return 0;
++}
++
++static void core_mark_region(struct dirty_log *log, region_t region)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++      clear_bit(region, clog->clean_bits);
++}
++
++static void core_clear_region(struct dirty_log *log, region_t region)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++      set_bit(region, clog->clean_bits);
++}
++
++static int core_get_resync_work(struct dirty_log *log, region_t *region)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++
++      if (clog->sync_search >= clog->region_count)
++              return 0;
++
++      do {
++              *region = find_next_zero_bit(clog->sync_bits,
++                                           clog->region_count,
++                                           clog->sync_search);
++              clog->sync_search = *region + 1;
++
++              if (*region == clog->region_count)
++                      return 0;
++
++      } while (test_bit(*region, clog->recovering_bits));
++
++      set_bit(*region, clog->recovering_bits);
++      return 1;
++}
++
++static void core_complete_resync_work(struct dirty_log *log, region_t region,
++                                    int success)
++{
++      struct core_log *clog = (struct core_log *) log->context;
++
++      clear_bit(region, clog->recovering_bits);
++      if (success)
++              set_bit(region, clog->sync_bits);
++}
++
++static struct dirty_log_type _core_type = {
++      .name = "core",
++
++      .ctr = core_ctr,
++      .dtr = core_dtr,
++      .get_region_size = core_get_region_size,
++      .is_clean = core_is_clean,
++      .in_sync = core_in_sync,
++      .flush = core_flush,
++      .mark_region = core_mark_region,
++      .clear_region = core_clear_region,
++      .get_resync_work = core_get_resync_work,
++      .complete_resync_work = core_complete_resync_work
++};
++
++__init int dm_dirty_log_init(void)
++{
++      int r;
++
++      r = dm_register_dirty_log_type(&_core_type);
++      if (r)
++              DMWARN("couldn't register core log");
++
++      return r;
++}
++
++void dm_dirty_log_exit(void)
++{
++      dm_unregister_dirty_log_type(&_core_type);
++}
++
++EXPORT_SYMBOL(dm_register_dirty_log_type);
++EXPORT_SYMBOL(dm_unregister_dirty_log_type);
++EXPORT_SYMBOL(dm_dirty_log_init);
++EXPORT_SYMBOL(dm_dirty_log_exit);
++EXPORT_SYMBOL(dm_create_dirty_log);
++EXPORT_SYMBOL(dm_destroy_dirty_log);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-log.h linux-2.4.21/drivers/md/dm-log.h
+--- linux-2.4.21-dm-real/drivers/md/dm-log.h   Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-log.h   Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,112 @@
++/*
++ * Copyright (C) 2003 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_DIRTY_LOG
++#define DM_DIRTY_LOG
++
++#include "dm.h"
++
++typedef sector_t region_t;
++
++struct dirty_log_type;
++
++struct dirty_log {
++      struct dirty_log_type *type;
++      void *context;
++};
++
++struct dirty_log_type {
++      struct list_head list;
++      const char *name;
++      struct module *module;
++      unsigned int use_count;
++
++      int (*ctr)(struct dirty_log *log, sector_t dev_size,
++                 unsigned int argc, char **argv);
++      void (*dtr)(struct dirty_log *log);
++
++      /*
++       * Retrieves the smallest size of region that the log can
++       * deal with.
++       */
++      sector_t (*get_region_size)(struct dirty_log *log);
++
++        /*
++       * A predicate to say whether a region is clean or not.
++       * May block.
++       */
++      int (*is_clean)(struct dirty_log *log, region_t region);
++
++      /*
++       *  Returns: 0, 1, -EWOULDBLOCK, < 0
++       *
++       * A predicate function to check the area given by
++       * [sector, sector + len) is in sync.
++       *
++       * If -EWOULDBLOCK is returned the state of the region is
++       * unknown, typically this will result in a read being
++       * passed to a daemon to deal with, since a daemon is
++       * allowed to block.
++       */
++      int (*in_sync)(struct dirty_log *log, region_t region, int can_block);
++
++      /*
++       * Flush the current log state (eg, to disk).  This
++       * function may block.
++       */
++      int (*flush)(struct dirty_log *log);
++
++      /*
++       * Mark an area as clean or dirty.  These functions may
++       * block, though for performance reasons blocking should
++       * be extremely rare (eg, allocating another chunk of
++       * memory for some reason).
++       */
++      void (*mark_region)(struct dirty_log *log, region_t region);
++      void (*clear_region)(struct dirty_log *log, region_t region);
++
++      /*
++       * Returns: <0 (error), 0 (no region), 1 (region)
++       *
++       * The mirrord will need perform recovery on regions of
++       * the mirror that are in the NOSYNC state.  This
++       * function asks the log to tell the caller about the
++       * next region that this machine should recover.
++       *
++       * Do not confuse this function with 'in_sync()', one
++       * tells you if an area is synchronised, the other
++       * assigns recovery work.
++      */
++      int (*get_resync_work)(struct dirty_log *log, region_t *region);
++
++      /*
++       * This notifies the log that the resync of an area has
++       * been completed.  The log should then mark this region
++       * as CLEAN.
++       */
++      void (*complete_resync_work)(struct dirty_log *log,
++                                   region_t region, int success);
++};
++
++int dm_register_dirty_log_type(struct dirty_log_type *type);
++int dm_unregister_dirty_log_type(struct dirty_log_type *type);
++
++
++/*
++ * Make sure you use these two functions, rather than calling
++ * type->constructor/destructor() directly.
++ */
++struct dirty_log *dm_create_dirty_log(const char *type_name, sector_t dev_size,
++                                    unsigned int argc, char **argv);
++void dm_destroy_dirty_log(struct dirty_log *log);
++
++/*
++ * init/exit functions.
++ */
++int dm_dirty_log_init(void);
++void dm_dirty_log_exit(void);
++
++#endif
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-raid1.c linux-2.4.21/drivers/md/dm-raid1.c
+--- linux-2.4.21-dm-real/drivers/md/dm-raid1.c Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-raid1.c Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,1297 @@
++/*
++ * Copyright (C) 2003 Sistina Software Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++#include "dm-daemon.h"
++#include "dm-io.h"
++#include "dm-log.h"
++#include "kcopyd.h"
++
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/mempool.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/time.h>
++#include <linux/vmalloc.h>
++
++static struct dm_daemon _kmirrord;
++
++/*-----------------------------------------------------------------
++ * buffer lists:
++ *
++ * We play with singly linked lists of buffers, but we want to be
++ * careful to add new buffers to the back of the list, to avoid
++ * buffers being starved of attention.
++ *---------------------------------------------------------------*/
++struct buffer_list {
++      struct buffer_head *head;
++      struct buffer_head *tail;
++};
++
++static inline void buffer_list_init(struct buffer_list *bl)
++{
++      bl->head = bl->tail = NULL;
++}
++
++static inline void buffer_list_add(struct buffer_list *bl,
++                                 struct buffer_head *bh)
++{
++      bh->b_reqnext = NULL;
++
++      if (bl->tail) {
++              bl->tail->b_reqnext = bh;
++              bl->tail = bh;
++      } else
++              bl->head = bl->tail = bh;
++}
++
++static struct buffer_head *buffer_list_pop(struct buffer_list *bl)
++{
++      struct buffer_head *bh = bl->head;
++
++      if (bh) {
++              bl->head = bl->head->b_reqnext;
++              if (!bl->head)
++                      bl->tail = NULL;
++
++              bh->b_reqnext = NULL;
++      }
++
++      return bh;
++}
++
++/*-----------------------------------------------------------------
++ * Region hash
++ *
++ * The mirror splits itself up into discrete regions.  Each
++ * region can be in one of three states: clean, dirty,
++ * nosync.  There is no need to put clean regions in the hash.
++ *
++ * In addition to being present in the hash table a region _may_
++ * be present on one of three lists.
++ *
++ *   clean_regions: Regions on this list have no io pending to
++ *   them, they are in sync, we are no longer interested in them,
++ *   they are dull.  rh_update_states() will remove them from the
++ *   hash table.
++ *
++ *   quiesced_regions: These regions have been spun down, ready
++ *   for recovery.  rh_recovery_start() will remove regions from
++ *   this list and hand them to kmirrord, which will schedule the
++ *   recovery io with kcopyd.
++ *
++ *   recovered_regions: Regions that kcopyd has successfully
++ *   recovered.  rh_update_states() will now schedule any delayed
++ *   io, up the recovery_count, and remove the region from the
++ *   hash.
++ *
++ * There are 2 locks:
++ *   A rw spin lock 'hash_lock' protects just the hash table,
++ *   this is never held in write mode from interrupt context,
++ *   which I believe means that we only have to disable irqs when
++ *   doing a write lock.
++ *
++ *   An ordinary spin lock 'region_lock' that protects the three
++ *   lists in the region_hash, with the 'state', 'list' and
++ *   'bhs_delayed' fields of the regions.  This is used from irq
++ *   context, so all other uses will have to suspend local irqs.
++ *---------------------------------------------------------------*/
++struct mirror_set;
++struct region_hash {
++      struct mirror_set *ms;
++      sector_t region_size;
++
++      /* holds persistent region state */
++      struct dirty_log *log;
++
++      /* hash table */
++      rwlock_t hash_lock;
++      mempool_t *region_pool;
++      unsigned int mask;
++      unsigned int nr_buckets;
++      struct list_head *buckets;
++
++      spinlock_t region_lock;
++      struct semaphore recovery_count;
++      struct list_head clean_regions;
++      struct list_head quiesced_regions;
++      struct list_head recovered_regions;
++};
++
++enum {
++      RH_CLEAN,
++      RH_DIRTY,
++      RH_NOSYNC,
++      RH_RECOVERING
++};
++
++struct region {
++      struct region_hash *rh; /* FIXME: can we get rid of this ? */
++      region_t key;
++      int state;
++
++      struct list_head hash_list;
++      struct list_head list;
++
++      atomic_t pending;
++      struct buffer_head *delayed_bhs;
++};
++
++/*
++ * Conversion fns
++ */
++static inline region_t bh_to_region(struct region_hash *rh,
++                                  struct buffer_head *bh)
++{
++      return bh->b_rsector / rh->region_size;
++}
++
++static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
++{
++      return region * rh->region_size;
++}
++
++/* FIXME move this */
++static void queue_bh(struct mirror_set *ms, struct buffer_head *bh, int rw);
++
++static void *region_alloc(int gfp_mask, void *pool_data)
++{
++      return kmalloc(sizeof(struct region), gfp_mask);
++}
++
++static void region_free(void *element, void *pool_data)
++{
++      kfree(element);
++}
++
++#define MIN_REGIONS 64
++#define MAX_RECOVERY 1
++static int rh_init(struct region_hash *rh, struct mirror_set *ms,
++                 struct dirty_log *log, sector_t region_size,
++                 region_t nr_regions)
++{
++      unsigned int nr_buckets, max_buckets;
++      size_t i;
++
++      /*
++       * Calculate a suitable number of buckets for our hash
++       * table.
++       */
++      max_buckets = nr_regions >> 6;
++      for (nr_buckets = 128u; nr_buckets < max_buckets; nr_buckets <<= 1)
++              ;
++      nr_buckets >>= 1;
++
++      rh->ms = ms;
++      rh->log = log;
++      rh->region_size = region_size;
++      rwlock_init(&rh->hash_lock);
++      rh->mask = nr_buckets - 1;
++      rh->nr_buckets = nr_buckets;
++
++      rh->buckets = vmalloc(nr_buckets * sizeof(*rh->buckets));
++      if (!rh->buckets) {
++              DMERR("unable to allocate region hash memory");
++              return -ENOMEM;
++      }
++
++      for (i = 0; i < nr_buckets; i++)
++              INIT_LIST_HEAD(rh->buckets + i);
++
++      spin_lock_init(&rh->region_lock);
++      sema_init(&rh->recovery_count, 0);
++      INIT_LIST_HEAD(&rh->clean_regions);
++      INIT_LIST_HEAD(&rh->quiesced_regions);
++      INIT_LIST_HEAD(&rh->recovered_regions);
++
++      rh->region_pool = mempool_create(MIN_REGIONS, region_alloc,
++                                       region_free, NULL);
++      if (!rh->region_pool) {
++              vfree(rh->buckets);
++              rh->buckets = NULL;
++              return -ENOMEM;
++      }
++
++      return 0;
++}
++
++static void rh_exit(struct region_hash *rh)
++{
++      unsigned int h;
++      struct region *reg;
++      struct list_head *tmp, *tmp2;
++
++      BUG_ON(!list_empty(&rh->quiesced_regions));
++      for (h = 0; h < rh->nr_buckets; h++) {
++              list_for_each_safe (tmp, tmp2, rh->buckets + h) {
++                      reg = list_entry(tmp, struct region, hash_list);
++                      BUG_ON(atomic_read(&reg->pending));
++                      mempool_free(reg, rh->region_pool);
++              }
++      }
++
++      if (rh->log)
++              dm_destroy_dirty_log(rh->log);
++      if (rh->region_pool)
++              mempool_destroy(rh->region_pool);
++      vfree(rh->buckets);
++}
++
++#define RH_HASH_MULT 2654435387U
++
++static inline unsigned int rh_hash(struct region_hash *rh, region_t region)
++{
++      return (unsigned int) ((region * RH_HASH_MULT) >> 12) & rh->mask;
++}
++
++static struct region *__rh_lookup(struct region_hash *rh, region_t region)
++{
++      struct region *reg;
++
++      list_for_each_entry (reg, rh->buckets + rh_hash(rh, region), hash_list)
++              if (reg->key == region)
++                      return reg;
++
++      return NULL;
++}
++
++static void __rh_insert(struct region_hash *rh, struct region *reg)
++{
++      unsigned int h = rh_hash(rh, reg->key);
++      list_add(&reg->hash_list, rh->buckets + h);
++}
++
++static struct region *__rh_alloc(struct region_hash *rh, region_t region)
++{
++      struct region *reg, *nreg;
++
++      read_unlock(&rh->hash_lock);
++      nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
++      nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
++              RH_CLEAN : RH_NOSYNC;
++      nreg->rh = rh;
++      nreg->key = region;
++
++      INIT_LIST_HEAD(&nreg->list);
++
++      atomic_set(&nreg->pending, 0);
++      nreg->delayed_bhs = NULL;
++      write_lock_irq(&rh->hash_lock);
++
++      reg = __rh_lookup(rh, region);
++      if (reg)
++              /* we lost the race */
++              mempool_free(nreg, rh->region_pool);
++
++      else {
++              __rh_insert(rh, nreg);
++              if (nreg->state == RH_CLEAN) {
++                      spin_lock_irq(&rh->region_lock);
++                      list_add(&nreg->list, &rh->clean_regions);
++                      spin_unlock_irq(&rh->region_lock);
++              }
++              reg = nreg;
++      }
++      write_unlock_irq(&rh->hash_lock);
++      read_lock(&rh->hash_lock);
++
++      return reg;
++}
++
++static inline struct region *__rh_find(struct region_hash *rh, region_t region)
++{
++      struct region *reg;
++
++      reg = __rh_lookup(rh, region);
++      if (!reg)
++              reg = __rh_alloc(rh, region);
++
++      return reg;
++}
++
++static int rh_state(struct region_hash *rh, region_t region, int may_block)
++{
++      int r;
++      struct region *reg;
++
++      read_lock(&rh->hash_lock);
++      reg = __rh_lookup(rh, region);
++      read_unlock(&rh->hash_lock);
++
++      if (reg)
++              return reg->state;
++
++      /*
++       * The region wasn't in the hash, so we fall back to the
++       * dirty log.
++       */
++      r = rh->log->type->in_sync(rh->log, region, may_block);
++
++      /*
++       * Any error from the dirty log (eg. -EWOULDBLOCK) gets
++       * taken as a RH_NOSYNC
++       */
++      return r == 1 ? RH_CLEAN : RH_NOSYNC;
++}
++
++static inline int rh_in_sync(struct region_hash *rh,
++                           region_t region, int may_block)
++{
++      int state = rh_state(rh, region, may_block);
++      return state == RH_CLEAN || state == RH_DIRTY;
++}
++
++static void dispatch_buffers(struct mirror_set *ms, struct buffer_head *bh)
++{
++      struct buffer_head *nbh;
++
++      while (bh) {
++              nbh = bh->b_reqnext;
++              queue_bh(ms, bh, WRITE);
++              bh = nbh;
++      }
++}
++
++static void rh_update_states(struct region_hash *rh)
++{
++      struct list_head *tmp, *tmp2;
++      struct region *reg;
++
++      LIST_HEAD(clean);
++      LIST_HEAD(recovered);
++
++      /*
++       * Quickly grab the lists.
++       */
++      write_lock_irq(&rh->hash_lock);
++      spin_lock(&rh->region_lock);
++      if (!list_empty(&rh->clean_regions)) {
++              list_splice(&rh->clean_regions, &clean);
++              INIT_LIST_HEAD(&rh->clean_regions);
++
++              list_for_each_entry (reg, &clean, list) {
++                      rh->log->type->clear_region(rh->log, reg->key);
++                      list_del(&reg->hash_list);
++              }
++      }
++
++      if (!list_empty(&rh->recovered_regions)) {
++              list_splice(&rh->recovered_regions, &recovered);
++              INIT_LIST_HEAD(&rh->recovered_regions);
++
++              list_for_each_entry (reg, &recovered, list)
++                      list_del(&reg->hash_list);
++      }
++      spin_unlock(&rh->region_lock);
++      write_unlock_irq(&rh->hash_lock);
++
++      /*
++       * All the regions on the recovered and clean lists have
++       * now been pulled out of the system, so no need to do
++       * any more locking.
++       */
++      list_for_each_safe (tmp, tmp2, &recovered) {
++              reg = list_entry(tmp, struct region, list);
++
++              rh->log->type->complete_resync_work(rh->log, reg->key, 1);
++              dispatch_buffers(rh->ms, reg->delayed_bhs);
++              up(&rh->recovery_count);
++              mempool_free(reg, rh->region_pool);
++      }
++
++      list_for_each_safe (tmp, tmp2, &clean) {
++              reg = list_entry(tmp, struct region, list);
++              mempool_free(reg, rh->region_pool);
++      }
++}
++
++static void rh_inc(struct region_hash *rh, region_t region)
++{
++      struct region *reg;
++
++      read_lock(&rh->hash_lock);
++      reg = __rh_find(rh, region);
++      if (reg->state == RH_CLEAN) {
++              rh->log->type->mark_region(rh->log, reg->key);
++
++              spin_lock_irq(&rh->region_lock);
++              reg->state = RH_DIRTY;
++              list_del_init(&reg->list);      /* take off the clean list */
++              spin_unlock_irq(&rh->region_lock);
++      }
++
++      atomic_inc(&reg->pending);
++      read_unlock(&rh->hash_lock);
++}
++
++static void rh_inc_pending(struct region_hash *rh, struct buffer_list *buffers)
++{
++      struct buffer_head *bh;
++
++      for (bh = buffers->head; bh; bh = bh->b_reqnext)
++              rh_inc(rh, bh_to_region(rh, bh));
++}
++
++static void rh_dec(struct region_hash *rh, region_t region)
++{
++      unsigned long flags;
++      struct region *reg;
++      int wake = 0;
++
++      read_lock(&rh->hash_lock);
++      reg = __rh_lookup(rh, region);
++      read_unlock(&rh->hash_lock);
++
++      if (atomic_dec_and_test(&reg->pending)) {
++              spin_lock_irqsave(&rh->region_lock, flags);
++              if (reg->state == RH_RECOVERING) {
++                      list_add_tail(&reg->list, &rh->quiesced_regions);
++              } else {
++                      reg->state = RH_CLEAN;
++                      list_add(&reg->list, &rh->clean_regions);
++              }
++              spin_unlock_irqrestore(&rh->region_lock, flags);
++              wake = 1;
++      }
++
++      if (wake)
++              dm_daemon_wake(&_kmirrord);
++}
++
++/*
++ * Starts quiescing a region in preparation for recovery.
++ */
++static int __rh_recovery_prepare(struct region_hash *rh)
++{
++      int r;
++      struct region *reg;
++      region_t region;
++
++      /*
++       * Ask the dirty log what's next.
++       */
++      r = rh->log->type->get_resync_work(rh->log, &region);
++      if (r <= 0)
++              return r;
++
++      /*
++       * Get this region, and start it quiescing by setting the
++       * recovering flag.
++       */
++      read_lock(&rh->hash_lock);
++      reg = __rh_find(rh, region);
++      read_unlock(&rh->hash_lock);
++
++      spin_lock_irq(&rh->region_lock);
++      reg->state = RH_RECOVERING;
++
++      /* Already quiesced ? */
++      if (atomic_read(&reg->pending))
++              list_del_init(&reg->list);
++
++      else {
++              list_del_init(&reg->list);
++              list_add(&reg->list, &rh->quiesced_regions);
++      }
++      spin_unlock_irq(&rh->region_lock);
++
++      return 1;
++}
++
++static void rh_recovery_prepare(struct region_hash *rh)
++{
++      while (!down_trylock(&rh->recovery_count))
++              if (__rh_recovery_prepare(rh) <= 0) {
++                      up(&rh->recovery_count);
++                      break;
++              }
++}
++
++/*
++ * Returns any quiesced regions.
++ */
++static struct region *rh_recovery_start(struct region_hash *rh)
++{
++      struct region *reg = NULL;
++
++      spin_lock_irq(&rh->region_lock);
++      if (!list_empty(&rh->quiesced_regions)) {
++              reg = list_entry(rh->quiesced_regions.next,
++                               struct region, list);
++              list_del_init(&reg->list);      /* remove from the quiesced list */
++      }
++      spin_unlock_irq(&rh->region_lock);
++
++      return reg;
++}
++
++/* FIXME: success ignored for now */
++static void rh_recovery_end(struct region *reg, int success)
++{
++      struct region_hash *rh = reg->rh;
++
++      spin_lock_irq(&rh->region_lock);
++      list_add(&reg->list, &reg->rh->recovered_regions);
++      spin_unlock_irq(&rh->region_lock);
++
++      dm_daemon_wake(&_kmirrord);
++}
++
++static void rh_flush(struct region_hash *rh)
++{
++      rh->log->type->flush(rh->log);
++}
++
++static void rh_delay(struct region_hash *rh, struct buffer_head *bh)
++{
++      struct region *reg;
++
++      read_lock(&rh->hash_lock);
++      reg = __rh_find(rh, bh_to_region(rh, bh));
++      bh->b_reqnext = reg->delayed_bhs;
++      reg->delayed_bhs = bh;
++      read_unlock(&rh->hash_lock);
++}
++
++static void rh_stop_recovery(struct region_hash *rh)
++{
++      int i;
++
++      /* wait for any recovering regions */
++      for (i = 0; i < MAX_RECOVERY; i++)
++              down(&rh->recovery_count);
++}
++
++static void rh_start_recovery(struct region_hash *rh)
++{
++      int i;
++
++      for (i = 0; i < MAX_RECOVERY; i++)
++              up(&rh->recovery_count);
++
++      dm_daemon_wake(&_kmirrord);
++}
++
++/*-----------------------------------------------------------------
++ * Mirror set structures.
++ *---------------------------------------------------------------*/
++struct mirror {
++      atomic_t error_count;
++      struct dm_dev *dev;
++      sector_t offset;
++};
++
++struct mirror_set {
++      struct dm_target *ti;
++      struct list_head list;
++      struct region_hash rh;
++      struct kcopyd_client *kcopyd_client;
++
++      spinlock_t lock;        /* protects the next two lists */
++      struct buffer_list reads;
++      struct buffer_list writes;
++
++      /* recovery */
++      region_t nr_regions;
++      region_t sync_count;
++
++      unsigned int nr_mirrors;
++      struct mirror mirror[0];
++};
++
++/*
++ * Every mirror should look like this one.
++ */
++#define DEFAULT_MIRROR 0
++
++/*
++ * This is yucky.  We squirrel the mirror_set struct away inside
++ * b_reqnext for write buffers.  This is safe since the bh
++ * doesn't get submitted to the lower levels of block layer.
++ */
++static struct mirror_set *bh_get_ms(struct buffer_head *bh)
++{
++      return (struct mirror_set *) bh->b_reqnext;
++}
++
++static void bh_set_ms(struct buffer_head *bh, struct mirror_set *ms)
++{
++      bh->b_reqnext = (struct buffer_head *) ms;
++}
++
++/*-----------------------------------------------------------------
++ * Recovery.
++ *
++ * When a mirror is first activated we may find that some regions
++ * are in the no-sync state.  We have to recover these by
++ * recopying from the default mirror to all the others.
++ *---------------------------------------------------------------*/
++static void recovery_complete(int read_err, unsigned int write_err,
++                            void *context)
++{
++      struct region *reg = (struct region *) context;
++      struct mirror_set *ms = reg->rh->ms;
++
++      /* FIXME: better error handling */
++      rh_recovery_end(reg, read_err || write_err);
++      if (++ms->sync_count == ms->nr_regions)
++              /* the sync is complete */
++              dm_table_event(ms->ti->table);
++}
++
++static int recover(struct mirror_set *ms, struct region *reg)
++{
++      int r;
++      unsigned int i;
++      struct io_region from, to[ms->nr_mirrors - 1], *dest;
++      struct mirror *m;
++      unsigned int flags = 0;
++
++      /* fill in the source */
++      m = ms->mirror + DEFAULT_MIRROR;
++      from.dev = m->dev->dev;
++      from.sector = m->offset + region_to_sector(reg->rh, reg->key);
++      if (reg->key == (ms->nr_regions - 1)) {
++              /*
++               * The final region may be smaller than
++               * region_size.
++               */
++              from.count = ms->ti->len & (reg->rh->region_size - 1);
++              if (!from.count)
++                      from.count = reg->rh->region_size;
++      } else
++              from.count = reg->rh->region_size;
++
++      /* fill in the destinations */
++      for (i = 1; i < ms->nr_mirrors; i++) {
++              m = ms->mirror + i;
++              dest = to + (i - 1);
++
++              dest->dev = m->dev->dev;
++              dest->sector = m->offset + region_to_sector(reg->rh, reg->key);
++              dest->count = from.count;
++      }
++
++      /* hand to kcopyd */
++      set_bit(KCOPYD_IGNORE_ERROR, &flags);
++      r = kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags,
++                      recovery_complete, reg);
++
++      return r;
++}
++
++static void do_recovery(struct mirror_set *ms)
++{
++      int r;
++      struct region *reg;
++
++      /*
++       * Start quiescing some regions.
++       */
++      rh_recovery_prepare(&ms->rh);
++
++      /*
++       * Copy any already quiesced regions.
++       */
++      while ((reg = rh_recovery_start(&ms->rh))) {
++              r = recover(ms, reg);
++              if (r)
++                      rh_recovery_end(reg, 0);
++      }
++}
++
++/*-----------------------------------------------------------------
++ * Reads
++ *---------------------------------------------------------------*/
++static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
++{
++      /* FIXME: add read balancing */
++      return ms->mirror + DEFAULT_MIRROR;
++}
++
++/*
++ * remap a buffer to a particular mirror.
++ */
++static void map_buffer(struct mirror_set *ms,
++                     struct mirror *m, struct buffer_head *bh)
++{
++      sector_t bsize = bh->b_size >> 9;
++      sector_t rsector = bh->b_blocknr * bsize;
++
++      bh->b_rdev = m->dev->dev;
++      bh->b_rsector = m->offset + (rsector - ms->ti->begin);
++}
++
++static void do_reads(struct mirror_set *ms, struct buffer_list *reads)
++{
++      region_t region;
++      struct buffer_head *bh;
++      struct mirror *m;
++
++      while ((bh = buffer_list_pop(reads))) {
++              region = bh_to_region(&ms->rh, bh);
++
++              /*
++               * We can only read balance if the region is in sync.
++               */
++              if (rh_in_sync(&ms->rh, region, 0))
++                      m = choose_mirror(ms, bh->b_rsector);
++              else
++                      m = ms->mirror + DEFAULT_MIRROR;
++
++              map_buffer(ms, m, bh);
++              generic_make_request(READ, bh);
++      }
++}
++
++/*-----------------------------------------------------------------
++ * Writes.
++ *
++ * We do different things with the write io depending on the
++ * state of the region that it's in:
++ *
++ * SYNC:      increment pending, use kcopyd to write to *all* mirrors
++ * RECOVERING:        delay the io until recovery completes
++ * NOSYNC:    increment pending, just write to the default mirror
++ *---------------------------------------------------------------*/
++static void write_callback(unsigned int error, void *context)
++{
++      unsigned int i;
++      int uptodate = 1;
++      struct buffer_head *bh = (struct buffer_head *) context;
++      struct mirror_set *ms;
++
++      ms = bh_get_ms(bh);
++      bh_set_ms(bh, NULL);
++
++      /*
++       * NOTE: We don't decrement the pending count here,
++       * instead it is done by the targets endio function.
++       * This way we handle both writes to SYNC and NOSYNC
++       * regions with the same code.
++       */
++
++      if (error) {
++              /*
++               * only error the io if all mirrors failed.
++               * FIXME: bogus
++               */
++              uptodate = 0;
++              for (i = 0; i < ms->nr_mirrors; i++)
++                      if (!test_bit(i, &error)) {
++                              uptodate = 1;
++                              break;
++                      }
++      }
++      bh->b_end_io(bh, uptodate);
++}
++
++static void do_write(struct mirror_set *ms, struct buffer_head *bh)
++{
++      unsigned int i;
++      struct io_region io[ms->nr_mirrors];
++      struct mirror *m;
++
++      for (i = 0; i < ms->nr_mirrors; i++) {
++              m = ms->mirror + i;
++
++              io[i].dev = m->dev->dev;
++              io[i].sector = m->offset + (bh->b_rsector - ms->ti->begin);
++              io[i].count = bh->b_size >> 9;
++      }
++
++      bh_set_ms(bh, ms);
++      dm_io_async(ms->nr_mirrors, io, WRITE, bh->b_page,
++                  (unsigned int) bh->b_data & ~PAGE_MASK, write_callback, bh);
++}
++
++static void do_writes(struct mirror_set *ms, struct buffer_list *writes)
++{
++      int state;
++      struct buffer_head *bh;
++      struct buffer_list sync, nosync, recover, *this_list = NULL;
++
++      if (!writes->head)
++              return;
++
++      /*
++       * Classify each write.
++       */
++      buffer_list_init(&sync);
++      buffer_list_init(&nosync);
++      buffer_list_init(&recover);
++
++      while ((bh = buffer_list_pop(writes))) {
++              state = rh_state(&ms->rh, bh_to_region(&ms->rh, bh), 1);
++              switch (state) {
++              case RH_CLEAN:
++              case RH_DIRTY:
++                      this_list = &sync;
++                      break;
++
++              case RH_NOSYNC:
++                      this_list = &nosync;
++                      break;
++
++              case RH_RECOVERING:
++                      this_list = &recover;
++                      break;
++              }
++
++              buffer_list_add(this_list, bh);
++      }
++
++      /*
++       * Increment the pending counts for any regions that will
++       * be written to (writes to recover regions are going to
++       * be delayed).
++       */
++      rh_inc_pending(&ms->rh, &sync);
++      rh_inc_pending(&ms->rh, &nosync);
++      rh_flush(&ms->rh);
++
++      /*
++       * Dispatch io.
++       */
++      while ((bh = buffer_list_pop(&sync)))
++              do_write(ms, bh);
++
++      while ((bh = buffer_list_pop(&recover)))
++              rh_delay(&ms->rh, bh);
++
++      while ((bh = buffer_list_pop(&nosync))) {
++              map_buffer(ms, ms->mirror + DEFAULT_MIRROR, bh);
++              generic_make_request(WRITE, bh);
++      }
++}
++
++/*-----------------------------------------------------------------
++ * kmirrord
++ *---------------------------------------------------------------*/
++static LIST_HEAD(_mirror_sets);
++static DECLARE_RWSEM(_mirror_sets_lock);
++
++static void do_mirror(struct mirror_set *ms)
++{
++      struct buffer_list reads, writes;
++
++      spin_lock(&ms->lock);
++      memcpy(&reads, &ms->reads, sizeof(reads));
++      buffer_list_init(&ms->reads);
++      memcpy(&writes, &ms->writes, sizeof(writes));
++      buffer_list_init(&ms->writes);
++      spin_unlock(&ms->lock);
++
++      rh_update_states(&ms->rh);
++      do_recovery(ms);
++      do_reads(ms, &reads);
++      do_writes(ms, &writes);
++      run_task_queue(&tq_disk);
++}
++
++static void do_work(void)
++{
++      struct mirror_set *ms;
++
++      down_read(&_mirror_sets_lock);
++      list_for_each_entry (ms, &_mirror_sets, list)
++              do_mirror(ms);
++      up_read(&_mirror_sets_lock);
++}
++
++/*-----------------------------------------------------------------
++ * Target functions
++ *---------------------------------------------------------------*/
++static struct mirror_set *alloc_context(unsigned int nr_mirrors,
++                                      sector_t region_size,
++                                      struct dm_target *ti,
++                                      struct dirty_log *dl)
++{
++      size_t len;
++      struct mirror_set *ms = NULL;
++
++      if (array_too_big(sizeof(*ms), sizeof(ms->mirror[0]), nr_mirrors))
++              return NULL;
++
++      len = sizeof(*ms) + (sizeof(ms->mirror[0]) * nr_mirrors);
++
++      ms = kmalloc(len, GFP_KERNEL);
++      if (!ms) {
++              ti->error = "dm-mirror: Cannot allocate mirror context";
++              return NULL;
++      }
++
++      memset(ms, 0, len);
++      spin_lock_init(&ms->lock);
++
++      ms->ti = ti;
++      ms->nr_mirrors = nr_mirrors;
++      ms->nr_regions = dm_div_up(ti->len, region_size);
++
++      if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
++              ti->error = "dm-mirror: Error creating dirty region hash";
++              kfree(ms);
++              return NULL;
++      }
++
++      return ms;
++}
++
++static void free_context(struct mirror_set *ms, struct dm_target *ti,
++                       unsigned int m)
++{
++      while (m--)
++              dm_put_device(ti, ms->mirror[m].dev);
++
++      rh_exit(&ms->rh);
++      kfree(ms);
++}
++
++static inline int _check_region_size(struct dm_target *ti, sector_t size)
++{
++      return !(size % (PAGE_SIZE >> 9) || (size & (size - 1)) ||
++               size > ti->len);
++}
++
++static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
++                    unsigned int mirror, char **argv)
++{
++      sector_t offset;
++
++      if (sscanf(argv[1], SECTOR_FORMAT, &offset) != 1) {
++              ti->error = "dm-mirror: Invalid offset";
++              return -EINVAL;
++      }
++
++      if (dm_get_device(ti, argv[0], offset, ti->len,
++                        dm_table_get_mode(ti->table),
++                        &ms->mirror[mirror].dev)) {
++              ti->error = "dm-mirror: Device lookup failure";
++              return -ENXIO;
++      }
++
++      ms->mirror[mirror].offset = offset;
++
++      return 0;
++}
++
++static int add_mirror_set(struct mirror_set *ms)
++{
++      down_write(&_mirror_sets_lock);
++      list_add_tail(&ms->list, &_mirror_sets);
++      up_write(&_mirror_sets_lock);
++      dm_daemon_wake(&_kmirrord);
++
++      return 0;
++}
++
++static void del_mirror_set(struct mirror_set *ms)
++{
++      down_write(&_mirror_sets_lock);
++      list_del(&ms->list);
++      up_write(&_mirror_sets_lock);
++}
++
++/*
++ * Create dirty log: log_type #log_params <log_params>
++ */
++static struct dirty_log *create_dirty_log(struct dm_target *ti,
++                                        unsigned int argc, char **argv,
++                                        unsigned int *args_used)
++{
++      unsigned int param_count;
++      struct dirty_log *dl;
++
++      if (argc < 2) {
++              ti->error = "dm-mirror: Insufficient mirror log arguments";
++              return NULL;
++      }
++
++      if (sscanf(argv[1], "%u", &param_count) != 1 || param_count != 1) {
++              ti->error = "dm-mirror: Invalid mirror log argument count";
++              return NULL;
++      }
++
++      *args_used = 2 + param_count;
++
++      if (argc < *args_used) {
++              ti->error = "dm-mirror: Insufficient mirror log arguments";
++              return NULL;
++      }
++
++      dl = dm_create_dirty_log(argv[0], ti->len, param_count, argv + 2);
++      if (!dl) {
++              ti->error = "dm-mirror: Error creating mirror dirty log";
++              return NULL;
++      }
++
++      if (!_check_region_size(ti, dl->type->get_region_size(dl))) {
++              ti->error = "dm-mirror: Invalid region size";
++              dm_destroy_dirty_log(dl);
++              return NULL;
++      }
++
++      return dl;
++}
++
++/*
++ * Construct a mirror mapping:
++ *
++ * log_type #log_params <log_params>
++ * #mirrors [mirror_path offset]{2,}
++ *
++ * For now, #log_params = 1, log_type = "core"
++ *
++ */
++#define DM_IO_PAGES 64
++static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++      int r;
++      unsigned int nr_mirrors, m, args_used;
++      struct mirror_set *ms;
++      struct dirty_log *dl;
++
++      dl = create_dirty_log(ti, argc, argv, &args_used);
++      if (!dl)
++              return -EINVAL;
++
++      argv += args_used;
++      argc -= args_used;
++
++      if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 ||
++          nr_mirrors < 2) {
++              ti->error = "dm-mirror: Invalid number of mirrors";
++              dm_destroy_dirty_log(dl);
++              return -EINVAL;
++      }
++
++      argv++, argc--;
++
++      if (argc != nr_mirrors * 2) {
++              ti->error = "dm-mirror: Wrong number of mirror arguments";
++              dm_destroy_dirty_log(dl);
++              return -EINVAL;
++      }
++
++      ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl);
++      if (!ms) {
++              dm_destroy_dirty_log(dl);
++              return -ENOMEM;
++      }
++
++      /* Get the mirror parameter sets */
++      for (m = 0; m < nr_mirrors; m++) {
++              r = get_mirror(ms, ti, m, argv);
++              if (r) {
++                      free_context(ms, ti, m);
++                      return r;
++              }
++              argv += 2;
++              argc -= 2;
++      }
++
++      ti->private = ms;
++
++      r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
++      if (r) {
++              free_context(ms, ti, ms->nr_mirrors);
++              return r;
++      }
++
++      add_mirror_set(ms);
++      return 0;
++}
++
++static void mirror_dtr(struct dm_target *ti)
++{
++      struct mirror_set *ms = (struct mirror_set *) ti->private;
++
++      del_mirror_set(ms);
++      kcopyd_client_destroy(ms->kcopyd_client);
++      free_context(ms, ti, ms->nr_mirrors);
++}
++
++static void queue_bh(struct mirror_set *ms, struct buffer_head *bh, int rw)
++{
++      int wake = 0;
++      struct buffer_list *bl;
++
++      bl = (rw == WRITE) ? &ms->writes : &ms->reads;
++      spin_lock(&ms->lock);
++      wake = !(bl->head);
++      buffer_list_add(bl, bh);
++      spin_unlock(&ms->lock);
++
++      if (wake)
++              dm_daemon_wake(&_kmirrord);
++}
++
++/*
++ * Mirror mapping function
++ */
++static int mirror_map(struct dm_target *ti, struct buffer_head *bh,
++                    int rw, union map_info *map_context)
++{
++      int r;
++      struct mirror *m;
++      struct mirror_set *ms = ti->private;
++
++      /* FIXME: nasty hack, 32 bit sector_t only */
++      map_context->ll = bh->b_rsector / ms->rh.region_size;
++
++      if (rw == WRITE) {
++              queue_bh(ms, bh, rw);
++              return 0;
++      }
++
++      r = ms->rh.log->type->in_sync(ms->rh.log, bh_to_region(&ms->rh, bh), 0);
++      if (r < 0 && r != -EWOULDBLOCK)
++              return r;
++
++      if (r == -EWOULDBLOCK)  /* FIXME: ugly */
++              r = 0;
++
++      /*
++       * We don't want to fast track a recovery just for a read
++       * ahead.  So we just let it silently fail.
++       * FIXME: get rid of this.
++       */
++      if (!r && rw == READA)
++              return -EIO;
++
++      if (!r) {
++              /* Pass this io over to the daemon */
++              queue_bh(ms, bh, rw);
++              return 0;
++      }
++
++      m = choose_mirror(ms, bh->b_rsector);
++      if (!m)
++              return -EIO;
++
++      map_buffer(ms, m, bh);
++      return 1;
++}
++
++static int mirror_end_io(struct dm_target *ti, struct buffer_head *bh,
++                       int rw, int error, union map_info *map_context)
++{
++      struct mirror_set *ms = (struct mirror_set *) ti->private;
++      region_t region = map_context->ll;
++
++      /*
++       * We need to dec pending if this was a write.
++       */
++      if (rw == WRITE)
++              rh_dec(&ms->rh, region);
++
++      return 0;
++}
++
++static void mirror_suspend(struct dm_target *ti)
++{
++      struct mirror_set *ms = (struct mirror_set *) ti->private;
++      rh_stop_recovery(&ms->rh);
++}
++
++static void mirror_resume(struct dm_target *ti)
++{
++      struct mirror_set *ms = (struct mirror_set *) ti->private;
++      rh_start_recovery(&ms->rh);
++}
++
++static int mirror_status(struct dm_target *ti, status_type_t type,
++                       char *result, unsigned int maxlen)
++{
++      unsigned int m, sz = 0;
++      struct mirror_set *ms = (struct mirror_set *) ti->private;
++
++      switch (type) {
++      case STATUSTYPE_INFO:
++              sz += snprintf(result + sz, maxlen - sz, "%d ", ms->nr_mirrors);
++
++              for (m = 0; m < ms->nr_mirrors; m++)
++                      sz += snprintf(result + sz, maxlen - sz, "%s ",
++                                     dm_kdevname(ms->mirror[m].dev->dev));
++
++              sz += snprintf(result + sz, maxlen - sz, "%lu/%lu",
++                             ms->sync_count, ms->nr_regions);
++              break;
++
++      case STATUSTYPE_TABLE:
++              sz += snprintf(result + sz, maxlen - sz,
++                             "%s 1 " SECTOR_FORMAT " %d ",
++                             ms->rh.log->type->name, ms->rh.region_size,
++                             ms->nr_mirrors);
++
++              for (m = 0; m < ms->nr_mirrors; m++)
++                      sz += snprintf(result + sz, maxlen - sz, "%s %ld ",
++                                     dm_kdevname(ms->mirror[m].dev->dev),
++                                     ms->mirror[m].offset);
++      }
++
++      return 0;
++}
++
++static struct target_type mirror_target = {
++      .name    = "mirror",
++      .module  = THIS_MODULE,
++      .ctr     = mirror_ctr,
++      .dtr     = mirror_dtr,
++      .map     = mirror_map,
++      .end_io  = mirror_end_io,
++      .suspend = mirror_suspend,
++      .resume  = mirror_resume,
++      .status  = mirror_status,
++};
++
++static int __init dm_mirror_init(void)
++{
++      int r;
++
++      r = dm_dirty_log_init();
++      if (r)
++              return r;
++
++      r = dm_daemon_start(&_kmirrord, "kmirrord", do_work);
++      if (r) {
++              DMERR("couldn't start kmirrord");
++              dm_dirty_log_exit();
++              return r;
++      }
++
++      r = dm_register_target(&mirror_target);
++      if (r < 0) {
++              DMERR("%s: Failed to register mirror target",
++                    mirror_target.name);
++              dm_dirty_log_exit();
++              dm_daemon_stop(&_kmirrord);
++      }
++
++      return r;
++}
++
++static void __exit dm_mirror_exit(void)
++{
++      int r;
++
++      r = dm_unregister_target(&mirror_target);
++      if (r < 0)
++              DMERR("%s: unregister failed %d", mirror_target.name, r);
++
++      dm_daemon_stop(&_kmirrord);
++      dm_dirty_log_exit();
++}
++
++/* Module hooks */
++module_init(dm_mirror_init);
++module_exit(dm_mirror_exit);
++
++MODULE_DESCRIPTION(DM_NAME " mirror target");
++MODULE_AUTHOR("Heinz Mauelshagen <mge@sistina.com>");
++MODULE_LICENSE("GPL");
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-snapshot.c linux-2.4.21/drivers/md/dm-snapshot.c
+--- linux-2.4.21-dm-real/drivers/md/dm-snapshot.c      Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-snapshot.c      Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,1235 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <linux/config.h>
++#include <linux/ctype.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/blkdev.h>
++#include <linux/mempool.h>
++#include <linux/device-mapper.h>
++#include <linux/vmalloc.h>
++
++#include "dm-snapshot.h"
++#include "kcopyd.h"
++
++/*
++ * FIXME: Remove this before release.
++ */
++#if 0
++#define DMDEBUG(x...) DMWARN( ## x)
++#else
++#define DMDEBUG(x...)
++#endif
++
++/*
++ * The percentage increment we will wake up users at
++ */
++#define WAKE_UP_PERCENT 5
++
++/*
++ * kcopyd priority of snapshot operations
++ */
++#define SNAPSHOT_COPY_PRIORITY 2
++
++/*
++ * Each snapshot reserves this many pages for io
++ * FIXME: calculate this
++ */
++#define SNAPSHOT_PAGES 256
++
++struct pending_exception {
++      struct exception e;
++
++      /*
++       * Origin buffers waiting for this to complete are held
++       * in a list (using b_reqnext).
++       */
++      struct buffer_head *origin_bhs;
++      struct buffer_head *snapshot_bhs;
++
++      /*
++       * Other pending_exceptions that are processing this
++       * chunk.  When this list is empty, we know we can
++       * complete the origins.
++       */
++      struct list_head siblings;
++
++      /* Pointer back to snapshot context */
++      struct dm_snapshot *snap;
++
++      /*
++       * 1 indicates the exception has already been sent to
++       * kcopyd.
++       */
++      int started;
++};
++
++/*
++ * Hash table mapping origin volumes to lists of snapshots and
++ * a lock to protect it
++ */
++static kmem_cache_t *exception_cache;
++static kmem_cache_t *pending_cache;
++static mempool_t *pending_pool;
++
++/*
++ * One of these per registered origin, held in the snapshot_origins hash
++ */
++struct origin {
++      /* The origin device */
++      kdev_t dev;
++
++      struct list_head hash_list;
++
++      /* List of snapshots for this origin */
++      struct list_head snapshots;
++};
++
++/*
++ * Size of the hash table for origin volumes. If we make this
++ * the size of the minors list then it should be nearly perfect
++ */
++#define ORIGIN_HASH_SIZE 256
++#define ORIGIN_MASK      0xFF
++static struct list_head *_origins;
++static struct rw_semaphore _origins_lock;
++
++static int init_origin_hash(void)
++{
++      int i;
++
++      _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
++                         GFP_KERNEL);
++      if (!_origins) {
++              DMERR("Device mapper: Snapshot: unable to allocate memory");
++              return -ENOMEM;
++      }
++
++      for (i = 0; i < ORIGIN_HASH_SIZE; i++)
++              INIT_LIST_HEAD(_origins + i);
++      init_rwsem(&_origins_lock);
++
++      return 0;
++}
++
++static void exit_origin_hash(void)
++{
++      kfree(_origins);
++}
++
++static inline unsigned int origin_hash(kdev_t dev)
++{
++      return MINOR(dev) & ORIGIN_MASK;
++}
++
++static struct origin *__lookup_origin(kdev_t origin)
++{
++      struct list_head *slist;
++      struct list_head *ol;
++      struct origin *o;
++
++      ol = &_origins[origin_hash(origin)];
++      list_for_each(slist, ol) {
++              o = list_entry(slist, struct origin, hash_list);
++
++              if (o->dev == origin)
++                      return o;
++      }
++
++      return NULL;
++}
++
++static void __insert_origin(struct origin *o)
++{
++      struct list_head *sl = &_origins[origin_hash(o->dev)];
++      list_add_tail(&o->hash_list, sl);
++}
++
++/*
++ * Make a note of the snapshot and its origin so we can look it
++ * up when the origin has a write on it.
++ */
++static int register_snapshot(struct dm_snapshot *snap)
++{
++      struct origin *o;
++      kdev_t dev = snap->origin->dev;
++
++      down_write(&_origins_lock);
++      o = __lookup_origin(dev);
++
++      if (!o) {
++              /* New origin */
++              o = kmalloc(sizeof(*o), GFP_KERNEL);
++              if (!o) {
++                      up_write(&_origins_lock);
++                      return -ENOMEM;
++              }
++
++              /* Initialise the struct */
++              INIT_LIST_HEAD(&o->snapshots);
++              o->dev = dev;
++
++              __insert_origin(o);
++      }
++
++      list_add_tail(&snap->list, &o->snapshots);
++
++      up_write(&_origins_lock);
++      return 0;
++}
++
++static void unregister_snapshot(struct dm_snapshot *s)
++{
++      struct origin *o;
++
++      down_write(&_origins_lock);
++      o = __lookup_origin(s->origin->dev);
++
++      list_del(&s->list);
++      if (list_empty(&o->snapshots)) {
++              list_del(&o->hash_list);
++              kfree(o);
++      }
++
++      up_write(&_origins_lock);
++}
++
++/*
++ * Implementation of the exception hash tables.
++ */
++static int init_exception_table(struct exception_table *et, uint32_t size)
++{
++      unsigned int i;
++
++      et->hash_mask = size - 1;
++      et->table = vcalloc(size, sizeof(struct list_head));
++      if (!et->table)
++              return -ENOMEM;
++
++      for (i = 0; i < size; i++)
++              INIT_LIST_HEAD(et->table + i);
++
++      return 0;
++}
++
++static void exit_exception_table(struct exception_table *et, kmem_cache_t *mem)
++{
++      struct list_head *slot, *entry, *temp;
++      struct exception *ex;
++      int i, size;
++
++      size = et->hash_mask + 1;
++      for (i = 0; i < size; i++) {
++              slot = et->table + i;
++
++              list_for_each_safe(entry, temp, slot) {
++                      ex = list_entry(entry, struct exception, hash_list);
++                      kmem_cache_free(mem, ex);
++              }
++      }
++
++      vfree(et->table);
++}
++
++/*
++ * FIXME: check how this hash fn is performing.
++ */
++static inline uint32_t exception_hash(struct exception_table *et, chunk_t chunk)
++{
++      return chunk & et->hash_mask;
++}
++
++static void insert_exception(struct exception_table *eh, struct exception *e)
++{
++      struct list_head *l = &eh->table[exception_hash(eh, e->old_chunk)];
++      list_add(&e->hash_list, l);
++}
++
++static inline void remove_exception(struct exception *e)
++{
++      list_del(&e->hash_list);
++}
++
++/*
++ * Return the exception data for a sector, or NULL if not
++ * remapped.
++ */
++static struct exception *lookup_exception(struct exception_table *et,
++                                        chunk_t chunk)
++{
++      struct list_head *slot, *el;
++      struct exception *e;
++
++      slot = &et->table[exception_hash(et, chunk)];
++      list_for_each(el, slot) {
++              e = list_entry(el, struct exception, hash_list);
++              if (e->old_chunk == chunk)
++                      return e;
++      }
++
++      return NULL;
++}
++
++static inline struct exception *alloc_exception(void)
++{
++      struct exception *e;
++
++      e = kmem_cache_alloc(exception_cache, GFP_NOIO);
++      if (!e)
++              e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
++
++      return e;
++}
++
++static inline void free_exception(struct exception *e)
++{
++      kmem_cache_free(exception_cache, e);
++}
++
++static inline struct pending_exception *alloc_pending_exception(void)
++{
++      return mempool_alloc(pending_pool, GFP_NOIO);
++}
++
++static inline void free_pending_exception(struct pending_exception *pe)
++{
++      mempool_free(pe, pending_pool);
++}
++
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new)
++{
++      struct exception *e;
++
++      e = alloc_exception();
++      if (!e)
++              return -ENOMEM;
++
++      e->old_chunk = old;
++      e->new_chunk = new;
++      insert_exception(&s->complete, e);
++      return 0;
++}
++
++/*
++ * Hard coded magic.
++ */
++static int calc_max_buckets(void)
++{
++      unsigned long mem;
++
++      mem = num_physpages << PAGE_SHIFT;
++      mem /= 50;
++      mem /= sizeof(struct list_head);
++
++      return mem;
++}
++
++/*
++ * Rounds a number down to a power of 2.
++ */
++static inline uint32_t round_down(uint32_t n)
++{
++      while (n & (n - 1))
++              n &= (n - 1);
++      return n;
++}
++
++/*
++ * Allocate room for a suitable hash table.
++ */
++static int init_hash_tables(struct dm_snapshot *s)
++{
++      sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets;
++
++      /*
++       * Calculate based on the size of the original volume or
++       * the COW volume...
++       */
++      cow_dev_size = get_dev_size(s->cow->dev);
++      origin_dev_size = get_dev_size(s->origin->dev);
++      max_buckets = calc_max_buckets();
++
++      hash_size = min(origin_dev_size, cow_dev_size) / s->chunk_size;
++      hash_size = min(hash_size, max_buckets);
++
++      /* Round it down to a power of 2 */
++      hash_size = round_down(hash_size);
++      if (init_exception_table(&s->complete, hash_size))
++              return -ENOMEM;
++
++      /*
++       * Allocate hash table for in-flight exceptions
++       * Make this smaller than the real hash table
++       */
++      hash_size >>= 3;
++      if (!hash_size)
++              hash_size = 64;
++
++      if (init_exception_table(&s->pending, hash_size)) {
++              exit_exception_table(&s->complete, exception_cache);
++              return -ENOMEM;
++      }
++
++      return 0;
++}
++
++/*
++ * Round a number up to the nearest 'size' boundary.  size must
++ * be a power of 2.
++ */
++static inline ulong round_up(ulong n, ulong size)
++{
++      size--;
++      return (n + size) & ~size;
++}
++
++/*
++ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
++ */
++static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++      struct dm_snapshot *s;
++      unsigned long chunk_size;
++      int r = -EINVAL;
++      char persistent;
++      char *origin_path;
++      char *cow_path;
++      char *value;
++      int blocksize;
++
++      if (argc < 4) {
++              ti->error = "dm-snapshot: requires exactly 4 arguments";
++              r = -EINVAL;
++              goto bad1;
++      }
++
++      origin_path = argv[0];
++      cow_path = argv[1];
++      persistent = toupper(*argv[2]);
++
++      if (persistent != 'P' && persistent != 'N') {
++              ti->error = "Persistent flag is not P or N";
++              r = -EINVAL;
++              goto bad1;
++      }
++
++      chunk_size = simple_strtoul(argv[3], &value, 10);
++      if (chunk_size == 0 || value == NULL) {
++              ti->error = "Invalid chunk size";
++              r = -EINVAL;
++              goto bad1;
++      }
++
++      s = kmalloc(sizeof(*s), GFP_KERNEL);
++      if (s == NULL) {
++              ti->error = "Cannot allocate snapshot context private "
++                  "structure";
++              r = -ENOMEM;
++              goto bad1;
++      }
++
++      r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin);
++      if (r) {
++              ti->error = "Cannot get origin device";
++              goto bad2;
++      }
++
++      /* FIXME: get cow length */
++      r = dm_get_device(ti, cow_path, 0, 0,
++                        FMODE_READ | FMODE_WRITE, &s->cow);
++      if (r) {
++              dm_put_device(ti, s->origin);
++              ti->error = "Cannot get COW device";
++              goto bad2;
++      }
++
++      /*
++       * Chunk size must be multiple of page size.  Silently
++       * round up if it's not.
++       */
++      chunk_size = round_up(chunk_size, PAGE_SIZE / SECTOR_SIZE);
++
++      /* Validate the chunk size against the device block size */
++      blocksize = get_hardsect_size(s->cow->dev);
++      if (chunk_size % (blocksize / SECTOR_SIZE)) {
++              ti->error = "Chunk size is not a multiple of device blocksize";
++              r = -EINVAL;
++              goto bad3;
++      }
++
++      /* Check the sizes are small enough to fit in one kiovec */
++      if (chunk_size > KIO_MAX_SECTORS) {
++              ti->error = "Chunk size is too big";
++              r = -EINVAL;
++              goto bad3;
++      }
++
++      /* Check chunk_size is a power of 2 */
++      if (chunk_size & (chunk_size - 1)) {
++              ti->error = "Chunk size is not a power of 2";
++              r = -EINVAL;
++              goto bad3;
++      }
++
++      s->chunk_size = chunk_size;
++      s->chunk_mask = chunk_size - 1;
++      s->type = persistent;
++      for (s->chunk_shift = 0; chunk_size;
++           s->chunk_shift++, chunk_size >>= 1)
++              ;
++      s->chunk_shift--;
++
++      s->valid = 1;
++      s->have_metadata = 0;
++      s->last_percent = 0;
++      init_rwsem(&s->lock);
++      s->table = ti->table;
++
++      /* Allocate hash table for COW data */
++      if (init_hash_tables(s)) {
++              ti->error = "Unable to allocate hash table space";
++              r = -ENOMEM;
++              goto bad3;
++      }
++
++      /*
++       * Check the persistent flag - done here because we need the iobuf
++       * to check the LV header
++       */
++      s->store.snap = s;
++
++      if (persistent == 'P')
++              r = dm_create_persistent(&s->store, s->chunk_size);
++      else
++              r = dm_create_transient(&s->store, s, blocksize);
++
++      if (r) {
++              ti->error = "Couldn't create exception store";
++              r = -EINVAL;
++              goto bad4;
++      }
++
++      r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client);
++      if (r) {
++              ti->error = "Could not create kcopyd client";
++              goto bad5;
++      }
++
++      /* Flush IO to the origin device */
++      fsync_dev(s->origin->dev);
++
++      /* Add snapshot to the list of snapshots for this origin */
++      if (register_snapshot(s)) {
++              r = -EINVAL;
++              ti->error = "Cannot register snapshot origin";
++              goto bad6;
++      }
++
++      ti->private = s;
++      return 0;
++
++ bad6:
++      kcopyd_client_destroy(s->kcopyd_client);
++
++ bad5:
++      s->store.destroy(&s->store);
++
++ bad4:
++      exit_exception_table(&s->pending, pending_cache);
++      exit_exception_table(&s->complete, exception_cache);
++
++ bad3:
++      dm_put_device(ti, s->cow);
++      dm_put_device(ti, s->origin);
++
++ bad2:
++      kfree(s);
++
++ bad1:
++      return r;
++}
++
++static void snapshot_dtr(struct dm_target *ti)
++{
++      struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++
++      dm_table_event(ti->table);
++
++      unregister_snapshot(s);
++
++      exit_exception_table(&s->pending, pending_cache);
++      exit_exception_table(&s->complete, exception_cache);
++
++      /* Deallocate memory used */
++      s->store.destroy(&s->store);
++
++      dm_put_device(ti, s->origin);
++      dm_put_device(ti, s->cow);
++      kcopyd_client_destroy(s->kcopyd_client);
++      kfree(s);
++}
++
++/*
++ * We hold lists of buffer_heads, using the b_reqnext field.
++ */
++static void queue_buffer(struct buffer_head **queue, struct buffer_head *bh)
++{
++      bh->b_reqnext = *queue;
++      *queue = bh;
++}
++
++/*
++ * FIXME: inefficient.
++ */
++static void queue_buffers(struct buffer_head **queue, struct buffer_head *bhs)
++{
++      while (*queue)
++              queue = &((*queue)->b_reqnext);
++
++      *queue = bhs;
++}
++
++/*
++ * Flush a list of buffers.
++ */
++static void flush_buffers(struct buffer_head *bh)
++{
++      struct buffer_head *n;
++
++      DMDEBUG("begin flush");
++      while (bh) {
++              n = bh->b_reqnext;
++              bh->b_reqnext = NULL;
++              DMDEBUG("flushing %p", bh);
++              generic_make_request(WRITE, bh);
++              bh = n;
++      }
++
++      run_task_queue(&tq_disk);
++}
++
++/*
++ * Error a list of buffers.
++ */
++static void error_buffers(struct buffer_head *bh)
++{
++      struct buffer_head *n;
++
++      while (bh) {
++              n = bh->b_reqnext;
++              bh->b_reqnext = NULL;
++              buffer_IO_error(bh);
++              bh = n;
++      }
++}
++
++static struct buffer_head *__flush_bhs(struct pending_exception *pe)
++{
++      struct pending_exception *sibling;
++
++      if (list_empty(&pe->siblings))
++              return pe->origin_bhs;
++
++      sibling = list_entry(pe->siblings.next,
++                           struct pending_exception, siblings);
++
++      list_del(&pe->siblings);
++
++      /* FIXME: I think there's a race on SMP machines here, add spin lock */
++      queue_buffers(&sibling->origin_bhs, pe->origin_bhs);
++
++      return NULL;
++}
++
++static void pending_complete(struct pending_exception *pe, int success)
++{
++      struct exception *e;
++      struct dm_snapshot *s = pe->snap;
++      struct buffer_head *flush = NULL;
++
++      if (success) {
++              e = alloc_exception();
++              if (!e) {
++                      DMWARN("Unable to allocate exception.");
++                      down_write(&s->lock);
++                      s->store.drop_snapshot(&s->store);
++                      s->valid = 0;
++                      flush = __flush_bhs(pe);
++                      up_write(&s->lock);
++
++                      error_buffers(pe->snapshot_bhs);
++                      goto out;
++              }
++
++              /*
++               * Add a proper exception, and remove the
++               * in-flight exception from the list.
++               */
++              down_write(&s->lock);
++
++              memcpy(e, &pe->e, sizeof(*e));
++              insert_exception(&s->complete, e);
++              remove_exception(&pe->e);
++              flush = __flush_bhs(pe);
++
++              /* Submit any pending write BHs */
++              up_write(&s->lock);
++
++              flush_buffers(pe->snapshot_bhs);
++              DMDEBUG("Exception completed successfully.");
++
++              /* Notify any interested parties */
++              if (s->store.fraction_full) {
++                      sector_t numerator, denominator;
++                      int pc;
++
++                      s->store.fraction_full(&s->store, &numerator,
++                                             &denominator);
++                      pc = numerator * 100 / denominator;
++
++                      if (pc >= s->last_percent + WAKE_UP_PERCENT) {
++                              dm_table_event(s->table);
++                              s->last_percent = pc - pc % WAKE_UP_PERCENT;
++                      }
++              }
++
++      } else {
++              /* Read/write error - snapshot is unusable */
++              down_write(&s->lock);
++              if (s->valid)
++                      DMERR("Error reading/writing snapshot");
++              s->store.drop_snapshot(&s->store);
++              s->valid = 0;
++              remove_exception(&pe->e);
++              flush = __flush_bhs(pe);
++              up_write(&s->lock);
++
++              error_buffers(pe->snapshot_bhs);
++
++              dm_table_event(s->table);
++              DMDEBUG("Exception failed.");
++      }
++
++ out:
++      if (flush)
++              flush_buffers(flush);
++
++      free_pending_exception(pe);
++}
++
++static void commit_callback(void *context, int success)
++{
++      struct pending_exception *pe = (struct pending_exception *) context;
++      pending_complete(pe, success);
++}
++
++/*
++ * Called when the copy I/O has finished.  kcopyd actually runs
++ * this code so don't block.
++ */
++static void copy_callback(int read_err, unsigned int write_err, void *context)
++{
++      struct pending_exception *pe = (struct pending_exception *) context;
++      struct dm_snapshot *s = pe->snap;
++
++      if (read_err || write_err)
++              pending_complete(pe, 0);
++
++      else
++              /* Update the metadata if we are persistent */
++              s->store.commit_exception(&s->store, &pe->e, commit_callback,
++                                        pe);
++}
++
++/*
++ * Dispatches the copy operation to kcopyd.
++ */
++static inline void start_copy(struct pending_exception *pe)
++{
++      struct dm_snapshot *s = pe->snap;
++      struct io_region src, dest;
++      kdev_t dev = s->origin->dev;
++      int *sizes = blk_size[major(dev)];
++      sector_t dev_size = (sector_t) -1;
++
++      if (pe->started)
++              return;
++
++      /* this is protected by snap->lock */
++      pe->started = 1;
++
++      if (sizes && sizes[minor(dev)])
++              dev_size = sizes[minor(dev)] << 1;
++
++      src.dev = dev;
++      src.sector = chunk_to_sector(s, pe->e.old_chunk);
++      src.count = min(s->chunk_size, dev_size - src.sector);
++
++      dest.dev = s->cow->dev;
++      dest.sector = chunk_to_sector(s, pe->e.new_chunk);
++      dest.count = src.count;
++
++      /* Hand over to kcopyd */
++      kcopyd_copy(s->kcopyd_client,
++                  &src, 1, &dest, 0, copy_callback, pe);
++}
++
++/*
++ * Looks to see if this snapshot already has a pending exception
++ * for this chunk, otherwise it allocates a new one and inserts
++ * it into the pending table.
++ */
++static struct pending_exception *find_pending_exception(struct dm_snapshot *s,
++                                                      struct buffer_head *bh)
++{
++      struct exception *e;
++      struct pending_exception *pe;
++      chunk_t chunk = sector_to_chunk(s, bh->b_rsector);
++
++      /*
++       * Is there a pending exception for this already ?
++       */
++      e = lookup_exception(&s->pending, chunk);
++      if (e) {
++              /* cast the exception to a pending exception */
++              pe = list_entry(e, struct pending_exception, e);
++
++      } else {
++              /* Create a new pending exception */
++              pe = alloc_pending_exception();
++              pe->e.old_chunk = chunk;
++              pe->origin_bhs = pe->snapshot_bhs = NULL;
++              INIT_LIST_HEAD(&pe->siblings);
++              pe->snap = s;
++              pe->started = 0;
++
++              if (s->store.prepare_exception(&s->store, &pe->e)) {
++                      free_pending_exception(pe);
++                      s->valid = 0;
++                      return NULL;
++              }
++
++              insert_exception(&s->pending, &pe->e);
++      }
++
++      return pe;
++}
++
++static inline void remap_exception(struct dm_snapshot *s, struct exception *e,
++                                 struct buffer_head *bh)
++{
++      bh->b_rdev = s->cow->dev;
++      bh->b_rsector = chunk_to_sector(s, e->new_chunk) +
++          (bh->b_rsector & s->chunk_mask);
++}
++
++static int snapshot_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++                      union map_info *map_context)
++{
++      struct exception *e;
++      struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++      int r = 1;
++      chunk_t chunk;
++      struct pending_exception *pe;
++
++      chunk = sector_to_chunk(s, bh->b_rsector);
++
++      /* Full snapshots are not usable */
++      if (!s->valid)
++              return -1;
++
++      /*
++       * Write to snapshot - higher level takes care of RW/RO
++       * flags so we should only get this if we are
++       * writeable.
++       */
++      if (rw == WRITE) {
++
++              down_write(&s->lock);
++
++              /* If the block is already remapped - use that, else remap it */
++              e = lookup_exception(&s->complete, chunk);
++              if (e)
++                      remap_exception(s, e, bh);
++
++              else {
++                      pe = find_pending_exception(s, bh);
++
++                      if (!pe) {
++                              s->store.drop_snapshot(&s->store);
++                              s->valid = 0;
++                              r = -EIO;
++                      } else {
++                              remap_exception(s, &pe->e, bh);
++                              queue_buffer(&pe->snapshot_bhs, bh);
++                              start_copy(pe);
++                              r = 0;
++                      }
++              }
++
++              up_write(&s->lock);
++
++      } else {
++              /*
++               * FIXME: this read path scares me because we
++               * always use the origin when we have a pending
++               * exception.  However I can't think of a
++               * situation where this is wrong - ejt.
++               */
++
++              /* Do reads */
++              down_read(&s->lock);
++
++              /* See if it it has been remapped */
++              e = lookup_exception(&s->complete, chunk);
++              if (e)
++                      remap_exception(s, e, bh);
++              else
++                      bh->b_rdev = s->origin->dev;
++
++              up_read(&s->lock);
++      }
++
++      return r;
++}
++
++void snapshot_resume(struct dm_target *ti)
++{
++      struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
++
++      if (s->have_metadata)
++              return;
++
++      if (s->store.read_metadata(&s->store)) {
++              down_write(&s->lock);
++              s->valid = 0;
++              up_write(&s->lock);
++      }
++
++      s->have_metadata = 1;
++}
++
++static int snapshot_status(struct dm_target *ti, status_type_t type,
++                         char *result, unsigned int maxlen)
++{
++      struct dm_snapshot *snap = (struct dm_snapshot *) ti->private;
++      char cow[16];
++      char org[16];
++
++      switch (type) {
++      case STATUSTYPE_INFO:
++              if (!snap->valid)
++                      snprintf(result, maxlen, "Invalid");
++              else {
++                      if (snap->store.fraction_full) {
++                              sector_t numerator, denominator;
++                              snap->store.fraction_full(&snap->store,
++                                                        &numerator,
++                                                        &denominator);
++                              snprintf(result, maxlen,
++                                       SECTOR_FORMAT "/" SECTOR_FORMAT,
++                                       numerator, denominator);
++                      }
++                      else
++                              snprintf(result, maxlen, "Unknown");
++              }
++              break;
++
++      case STATUSTYPE_TABLE:
++              /*
++               * kdevname returns a static pointer so we need
++               * to make private copies if the output is to
++               * make sense.
++               */
++              strncpy(cow, dm_kdevname(snap->cow->dev), sizeof(cow));
++              strncpy(org, dm_kdevname(snap->origin->dev), sizeof(org));
++              snprintf(result, maxlen, "%s %s %c %ld", org, cow,
++                       snap->type, snap->chunk_size);
++              break;
++      }
++
++      return 0;
++}
++
++/*-----------------------------------------------------------------
++ * Origin methods
++ *---------------------------------------------------------------*/
++static void list_merge(struct list_head *l1, struct list_head *l2)
++{
++      struct list_head *l1_n, *l2_p;
++
++      l1_n = l1->next;
++      l2_p = l2->prev;
++
++      l1->next = l2;
++      l2->prev = l1;
++
++      l2_p->next = l1_n;
++      l1_n->prev = l2_p;
++}
++
++static int __origin_write(struct list_head *snapshots, struct buffer_head *bh)
++{
++      int r = 1, first = 1;
++      struct list_head *sl;
++      struct dm_snapshot *snap;
++      struct exception *e;
++      struct pending_exception *pe, *last = NULL;
++      chunk_t chunk;
++
++      /* Do all the snapshots on this origin */
++      list_for_each(sl, snapshots) {
++              snap = list_entry(sl, struct dm_snapshot, list);
++
++              /* Only deal with valid snapshots */
++              if (!snap->valid)
++                      continue;
++
++              down_write(&snap->lock);
++
++              /*
++               * Remember, different snapshots can have
++               * different chunk sizes.
++               */
++              chunk = sector_to_chunk(snap, bh->b_rsector);
++
++              /*
++               * Check exception table to see if block
++               * is already remapped in this snapshot
++               * and trigger an exception if not.
++               */
++              e = lookup_exception(&snap->complete, chunk);
++              if (!e) {
++                      pe = find_pending_exception(snap, bh);
++                      if (!pe) {
++                              snap->store.drop_snapshot(&snap->store);
++                              snap->valid = 0;
++
++                      } else {
++                              if (last)
++                                      list_merge(&pe->siblings,
++                                                 &last->siblings);
++
++                              last = pe;
++                              r = 0;
++                      }
++              }
++
++              up_write(&snap->lock);
++      }
++
++      /*
++       * Now that we have a complete pe list we can start the copying.
++       */
++      if (last) {
++              pe = last;
++              do {
++                      down_write(&pe->snap->lock);
++                      if (first)
++                              queue_buffer(&pe->origin_bhs, bh);
++                      start_copy(pe);
++                      up_write(&pe->snap->lock);
++                      first = 0;
++                      pe = list_entry(pe->siblings.next,
++                                      struct pending_exception, siblings);
++
++              } while (pe != last);
++      }
++
++      return r;
++}
++
++/*
++ * Called on a write from the origin driver.
++ */
++int do_origin(struct dm_dev *origin, struct buffer_head *bh)
++{
++      struct origin *o;
++      int r;
++
++      down_read(&_origins_lock);
++      o = __lookup_origin(origin->dev);
++      if (!o)
++              BUG();
++
++      r = __origin_write(&o->snapshots, bh);
++      up_read(&_origins_lock);
++
++      return r;
++}
++
++/*
++ * Origin: maps a linear range of a device, with hooks for snapshotting.
++ */
++
++/*
++ * Construct an origin mapping: <dev_path>
++ * The context for an origin is merely a 'struct dm_dev *'
++ * pointing to the real device.
++ */
++static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++      int r;
++      struct dm_dev *dev;
++
++      if (argc != 1) {
++              ti->error = "dm-origin: incorrect number of arguments";
++              return -EINVAL;
++      }
++
++      r = dm_get_device(ti, argv[0], 0, ti->len,
++                        dm_table_get_mode(ti->table), &dev);
++      if (r) {
++              ti->error = "Cannot get target device";
++              return r;
++      }
++
++      ti->private = dev;
++      return 0;
++}
++
++static void origin_dtr(struct dm_target *ti)
++{
++      struct dm_dev *dev = (struct dm_dev *) ti->private;
++      dm_put_device(ti, dev);
++}
++
++static int origin_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++                    union map_info *map_context)
++{
++      struct dm_dev *dev = (struct dm_dev *) ti->private;
++      bh->b_rdev = dev->dev;
++
++      /* Only tell snapshots if this is a write */
++      return (rw == WRITE) ? do_origin(dev, bh) : 1;
++}
++
++static int origin_status(struct dm_target *ti, status_type_t type, char *result,
++                       unsigned int maxlen)
++{
++      struct dm_dev *dev = (struct dm_dev *) ti->private;
++
++      switch (type) {
++      case STATUSTYPE_INFO:
++              result[0] = '\0';
++              break;
++
++      case STATUSTYPE_TABLE:
++              snprintf(result, maxlen, "%s", dm_kdevname(dev->dev));
++              break;
++      }
++
++      return 0;
++}
++
++static struct target_type origin_target = {
++      name:   "snapshot-origin",
++      module: THIS_MODULE,
++      ctr:    origin_ctr,
++      dtr:    origin_dtr,
++      map:    origin_map,
++      status: origin_status,
++};
++
++static struct target_type snapshot_target = {
++      name:   "snapshot",
++      module: THIS_MODULE,
++      ctr:    snapshot_ctr,
++      dtr:    snapshot_dtr,
++      map:    snapshot_map,
++      resume: snapshot_resume,
++      status: snapshot_status,
++};
++
++int __init dm_snapshot_init(void)
++{
++      int r;
++
++      r = dm_register_target(&snapshot_target);
++      if (r) {
++              DMERR("snapshot target register failed %d", r);
++              return r;
++      }
++
++      r = dm_register_target(&origin_target);
++      if (r < 0) {
++              DMERR("Device mapper: Origin: register failed %d\n", r);
++              goto bad1;
++      }
++
++      r = init_origin_hash();
++      if (r) {
++              DMERR("init_origin_hash failed.");
++              goto bad2;
++      }
++
++      exception_cache = kmem_cache_create("dm-snapshot-ex",
++                                          sizeof(struct exception),
++                                          __alignof__(struct exception),
++                                          0, NULL, NULL);
++      if (!exception_cache) {
++              DMERR("Couldn't create exception cache.");
++              r = -ENOMEM;
++              goto bad3;
++      }
++
++      pending_cache =
++          kmem_cache_create("dm-snapshot-in",
++                            sizeof(struct pending_exception),
++                            __alignof__(struct pending_exception),
++                            0, NULL, NULL);
++      if (!pending_cache) {
++              DMERR("Couldn't create pending cache.");
++              r = -ENOMEM;
++              goto bad4;
++      }
++
++      pending_pool = mempool_create(128, mempool_alloc_slab,
++                                    mempool_free_slab, pending_cache);
++      if (!pending_pool) {
++              DMERR("Couldn't create pending pool.");
++              r = -ENOMEM;
++              goto bad5;
++      }
++
++      return 0;
++
++      bad5:
++      kmem_cache_destroy(pending_cache);
++      bad4:
++      kmem_cache_destroy(exception_cache);
++      bad3:
++      exit_origin_hash();
++      bad2:
++      dm_unregister_target(&origin_target);
++      bad1:
++      dm_unregister_target(&snapshot_target);
++      return r;
++}
++
++void dm_snapshot_exit(void)
++{
++      int r;
++
++      r = dm_unregister_target(&snapshot_target);
++      if (r)
++              DMERR("snapshot unregister failed %d", r);
++
++      r = dm_unregister_target(&origin_target);
++      if (r)
++              DMERR("origin unregister failed %d", r);
++
++      exit_origin_hash();
++      mempool_destroy(pending_pool);
++      kmem_cache_destroy(pending_cache);
++      kmem_cache_destroy(exception_cache);
++}
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-snapshot.h linux-2.4.21/drivers/md/dm-snapshot.h
+--- linux-2.4.21-dm-real/drivers/md/dm-snapshot.h      Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-snapshot.h      Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,158 @@
++/*
++ * dm-snapshot.c
++ *
++ * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_SNAPSHOT_H
++#define DM_SNAPSHOT_H
++
++#include "dm.h"
++#include <linux/blkdev.h>
++
++struct exception_table {
++      uint32_t hash_mask;
++      struct list_head *table;
++};
++
++/*
++ * The snapshot code deals with largish chunks of the disk at a
++ * time. Typically 64k - 256k.
++ */
++/* FIXME: can we get away with limiting these to a uint32_t ? */
++typedef sector_t chunk_t;
++
++/*
++ * An exception is used where an old chunk of data has been
++ * replaced by a new one.
++ */
++struct exception {
++      struct list_head hash_list;
++
++      chunk_t old_chunk;
++      chunk_t new_chunk;
++};
++
++/*
++ * Abstraction to handle the meta/layout of exception stores (the
++ * COW device).
++ */
++struct exception_store {
++
++      /*
++       * Destroys this object when you've finished with it.
++       */
++      void (*destroy) (struct exception_store *store);
++
++      /*
++       * The target shouldn't read the COW device until this is
++       * called.
++       */
++      int (*read_metadata) (struct exception_store *store);
++
++      /*
++       * Find somewhere to store the next exception.
++       */
++      int (*prepare_exception) (struct exception_store *store,
++                                struct exception *e);
++
++      /*
++       * Update the metadata with this exception.
++       */
++      void (*commit_exception) (struct exception_store *store,
++                                struct exception *e,
++                                void (*callback) (void *, int success),
++                                void *callback_context);
++
++      /*
++       * The snapshot is invalid, note this in the metadata.
++       */
++      void (*drop_snapshot) (struct exception_store *store);
++
++      /*
++       * Return how full the snapshot is.
++       */
++      void (*fraction_full) (struct exception_store *store,
++                             sector_t *numerator,
++                             sector_t *denominator);
++
++      struct dm_snapshot *snap;
++      void *context;
++};
++
++struct dm_snapshot {
++      struct rw_semaphore lock;
++      struct dm_table *table;
++
++      struct dm_dev *origin;
++      struct dm_dev *cow;
++
++      /* List of snapshots per Origin */
++      struct list_head list;
++
++      /* Size of data blocks saved - must be a power of 2 */
++      chunk_t chunk_size;
++      chunk_t chunk_mask;
++      chunk_t chunk_shift;
++
++      /* You can't use a snapshot if this is 0 (e.g. if full) */
++      int valid;
++      int have_metadata;
++
++      /* Used for display of table */
++      char type;
++
++      /* The last percentage we notified */
++      int last_percent;
++
++      struct exception_table pending;
++      struct exception_table complete;
++
++      /* The on disk metadata handler */
++      struct exception_store store;
++
++      struct kcopyd_client *kcopyd_client;
++};
++
++/*
++ * Used by the exception stores to load exceptions hen
++ * initialising.
++ */
++int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new);
++
++/*
++ * Constructor and destructor for the default persistent
++ * store.
++ */
++int dm_create_persistent(struct exception_store *store, uint32_t chunk_size);
++
++int dm_create_transient(struct exception_store *store,
++                      struct dm_snapshot *s, int blocksize);
++
++/*
++ * Return the number of sectors in the device.
++ */
++static inline sector_t get_dev_size(kdev_t dev)
++{
++      int *sizes;
++
++      sizes = blk_size[MAJOR(dev)];
++      if (sizes)
++              return sizes[MINOR(dev)] << 1;
++
++      return 0;
++}
++
++static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
++{
++      return (sector & ~s->chunk_mask) >> s->chunk_shift;
++}
++
++static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
++{
++      return chunk << s->chunk_shift;
++}
++
++#endif
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-stripe.c linux-2.4.21/drivers/md/dm-stripe.c
+--- linux-2.4.21-dm-real/drivers/md/dm-stripe.c        Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-stripe.c        Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,258 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/slab.h>
++
++struct stripe {
++      struct dm_dev *dev;
++      sector_t physical_start;
++};
++
++struct stripe_c {
++      uint32_t stripes;
++
++      /* The size of this target / num. stripes */
++      uint32_t stripe_width;
++
++      /* stripe chunk size */
++      uint32_t chunk_shift;
++      sector_t chunk_mask;
++
++      struct stripe stripe[0];
++};
++
++static inline struct stripe_c *alloc_context(unsigned int stripes)
++{
++      size_t len;
++
++      if (array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
++                        stripes))
++              return NULL;
++
++      len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
++
++      return kmalloc(len, GFP_KERNEL);
++}
++
++/*
++ * Parse a single <dev> <sector> pair
++ */
++static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
++                    unsigned int stripe, char **argv)
++{
++      sector_t start;
++
++      if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
++              return -EINVAL;
++
++      if (dm_get_device(ti, argv[0], start, sc->stripe_width,
++                        dm_table_get_mode(ti->table),
++                        &sc->stripe[stripe].dev))
++              return -ENXIO;
++
++      sc->stripe[stripe].physical_start = start;
++      return 0;
++}
++
++/*
++ * FIXME: Nasty function, only present because we can't link
++ * against __moddi3 and __divdi3.
++ *
++ * returns a == b * n
++ */
++static int multiple(sector_t a, sector_t b, sector_t *n)
++{
++      sector_t acc, prev, i;
++
++      *n = 0;
++      while (a >= b) {
++              for (acc = b, prev = 0, i = 1;
++                   acc <= a;
++                   prev = acc, acc <<= 1, i <<= 1)
++                      ;
++
++              a -= prev;
++              *n += i >> 1;
++      }
++
++      return a == 0;
++}
++
++/*
++ * Construct a striped mapping.
++ * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
++ */
++static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++      struct stripe_c *sc;
++      sector_t width;
++      uint32_t stripes;
++      uint32_t chunk_size;
++      char *end;
++      int r;
++      unsigned int i;
++
++      if (argc < 2) {
++              ti->error = "dm-stripe: Not enough arguments";
++              return -EINVAL;
++      }
++
++      stripes = simple_strtoul(argv[0], &end, 10);
++      if (*end) {
++              ti->error = "dm-stripe: Invalid stripe count";
++              return -EINVAL;
++      }
++
++      chunk_size = simple_strtoul(argv[1], &end, 10);
++      if (*end) {
++              ti->error = "dm-stripe: Invalid chunk_size";
++              return -EINVAL;
++      }
++
++      /*
++       * chunk_size is a power of two
++       */
++      if (!chunk_size || (chunk_size & (chunk_size - 1))) {
++              ti->error = "dm-stripe: Invalid chunk size";
++              return -EINVAL;
++      }
++
++      if (!multiple(ti->len, stripes, &width)) {
++              ti->error = "dm-stripe: Target length not divisable by "
++                  "number of stripes";
++              return -EINVAL;
++      }
++
++      /*
++       * Do we have enough arguments for that many stripes ?
++       */
++      if (argc != (2 + 2 * stripes)) {
++              ti->error = "dm-stripe: Not enough destinations specified";
++              return -EINVAL;
++      }
++
++      sc = alloc_context(stripes);
++      if (!sc) {
++              ti->error = "dm-stripe: Memory allocation for striped context "
++                  "failed";
++              return -ENOMEM;
++      }
++
++      sc->stripes = stripes;
++      sc->stripe_width = width;
++
++      sc->chunk_mask = ((sector_t) chunk_size) - 1;
++      for (sc->chunk_shift = 0; chunk_size; sc->chunk_shift++)
++              chunk_size >>= 1;
++      sc->chunk_shift--;
++
++      /*
++       * Get the stripe destinations.
++       */
++      for (i = 0; i < stripes; i++) {
++              argv += 2;
++
++              r = get_stripe(ti, sc, i, argv);
++              if (r < 0) {
++                      ti->error = "dm-stripe: Couldn't parse stripe "
++                          "destination";
++                      while (i--)
++                              dm_put_device(ti, sc->stripe[i].dev);
++                      kfree(sc);
++                      return r;
++              }
++      }
++
++      ti->private = sc;
++      return 0;
++}
++
++static void stripe_dtr(struct dm_target *ti)
++{
++      unsigned int i;
++      struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++      for (i = 0; i < sc->stripes; i++)
++              dm_put_device(ti, sc->stripe[i].dev);
++
++      kfree(sc);
++}
++
++static int stripe_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++                    union map_info *context)
++{
++      struct stripe_c *sc = (struct stripe_c *) ti->private;
++
++      sector_t offset = bh->b_rsector - ti->begin;
++      uint32_t chunk = (uint32_t) (offset >> sc->chunk_shift);
++      uint32_t stripe = chunk % sc->stripes;  /* 32bit modulus */
++      chunk = chunk / sc->stripes;
++
++      bh->b_rdev = sc->stripe[stripe].dev->dev;
++      bh->b_rsector = sc->stripe[stripe].physical_start +
++          (chunk << sc->chunk_shift) + (offset & sc->chunk_mask);
++      return 1;
++}
++
++static int stripe_status(struct dm_target *ti, status_type_t type,
++                       char *result, unsigned int maxlen)
++{
++      struct stripe_c *sc = (struct stripe_c *) ti->private;
++      int offset;
++      unsigned int i;
++
++      switch (type) {
++      case STATUSTYPE_INFO:
++              result[0] = '\0';
++              break;
++
++      case STATUSTYPE_TABLE:
++              offset = snprintf(result, maxlen, "%d " SECTOR_FORMAT,
++                                sc->stripes, sc->chunk_mask + 1);
++              for (i = 0; i < sc->stripes; i++) {
++                      offset +=
++                          snprintf(result + offset, maxlen - offset,
++                                   " %s " SECTOR_FORMAT,
++                     dm_kdevname(to_kdev_t(sc->stripe[i].dev->bdev->bd_dev)),
++                                   sc->stripe[i].physical_start);
++              }
++              break;
++      }
++      return 0;
++}
++
++static struct target_type stripe_target = {
++      .name   = "striped",
++      .module = THIS_MODULE,
++      .ctr    = stripe_ctr,
++      .dtr    = stripe_dtr,
++      .map    = stripe_map,
++      .status = stripe_status,
++};
++
++int __init dm_stripe_init(void)
++{
++      int r;
++
++      r = dm_register_target(&stripe_target);
++      if (r < 0)
++              DMWARN("striped target registration failed");
++
++      return r;
++}
++
++void dm_stripe_exit(void)
++{
++      if (dm_unregister_target(&stripe_target))
++              DMWARN("striped target unregistration failed");
++
++      return;
++}
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-table.c linux-2.4.21/drivers/md/dm-table.c
+--- linux-2.4.21-dm-real/drivers/md/dm-table.c Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-table.c Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,687 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/ctype.h>
++#include <linux/slab.h>
++#include <asm/atomic.h>
++
++#define MAX_DEPTH 16
++#define NODE_SIZE L1_CACHE_BYTES
++#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
++#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
++
++struct dm_table {
++      atomic_t holders;
++
++      /* btree table */
++      unsigned int depth;
++      unsigned int counts[MAX_DEPTH]; /* in nodes */
++      sector_t *index[MAX_DEPTH];
++
++      unsigned int num_targets;
++      unsigned int num_allocated;
++      sector_t *highs;
++      struct dm_target *targets;
++
++      /*
++       * Indicates the rw permissions for the new logical
++       * device.  This should be a combination of FMODE_READ
++       * and FMODE_WRITE.
++       */
++      int mode;
++
++      /* a list of devices used by this table */
++      struct list_head devices;
++
++      /* events get handed up using this callback */
++      void (*event_fn)(void *);
++      void *event_context;
++};
++
++/*
++ * Similar to ceiling(log_size(n))
++ */
++static unsigned int int_log(unsigned long n, unsigned long base)
++{
++      int result = 0;
++
++      while (n > 1) {
++              n = dm_div_up(n, base);
++              result++;
++      }
++
++      return result;
++}
++
++/*
++ * Calculate the index of the child node of the n'th node k'th key.
++ */
++static inline unsigned int get_child(unsigned int n, unsigned int k)
++{
++      return (n * CHILDREN_PER_NODE) + k;
++}
++
++/*
++ * Return the n'th node of level l from table t.
++ */
++static inline sector_t *get_node(struct dm_table *t, unsigned int l,
++                               unsigned int n)
++{
++      return t->index[l] + (n * KEYS_PER_NODE);
++}
++
++/*
++ * Return the highest key that you could lookup from the n'th
++ * node on level l of the btree.
++ */
++static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
++{
++      for (; l < t->depth - 1; l++)
++              n = get_child(n, CHILDREN_PER_NODE - 1);
++
++      if (n >= t->counts[l])
++              return (sector_t) - 1;
++
++      return get_node(t, l, n)[KEYS_PER_NODE - 1];
++}
++
++/*
++ * Fills in a level of the btree based on the highs of the level
++ * below it.
++ */
++static int setup_btree_index(unsigned int l, struct dm_table *t)
++{
++      unsigned int n, k;
++      sector_t *node;
++
++      for (n = 0U; n < t->counts[l]; n++) {
++              node = get_node(t, l, n);
++
++              for (k = 0U; k < KEYS_PER_NODE; k++)
++                      node[k] = high(t, l + 1, get_child(n, k));
++      }
++
++      return 0;
++}
++
++/*
++ * highs, and targets are managed as dynamic arrays during a
++ * table load.
++ */
++static int alloc_targets(struct dm_table *t, unsigned int num)
++{
++      sector_t *n_highs;
++      struct dm_target *n_targets;
++      int n = t->num_targets;
++
++      /*
++       * Allocate both the target array and offset array at once.
++       */
++      n_highs = (sector_t *) vcalloc(sizeof(struct dm_target) +
++                                     sizeof(sector_t), num);
++      if (!n_highs)
++              return -ENOMEM;
++
++      n_targets = (struct dm_target *) (n_highs + num);
++
++      if (n) {
++              memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
++              memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
++      }
++
++      memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
++      vfree(t->highs);
++
++      t->num_allocated = num;
++      t->highs = n_highs;
++      t->targets = n_targets;
++
++      return 0;
++}
++
++int dm_table_create(struct dm_table **result, int mode)
++{
++      struct dm_table *t = kmalloc(sizeof(*t), GFP_NOIO);
++
++      if (!t)
++              return -ENOMEM;
++
++      memset(t, 0, sizeof(*t));
++      INIT_LIST_HEAD(&t->devices);
++      atomic_set(&t->holders, 1);
++
++      /* allocate a single nodes worth of targets to begin with */
++      if (alloc_targets(t, KEYS_PER_NODE)) {
++              kfree(t);
++              t = NULL;
++              return -ENOMEM;
++      }
++
++      t->mode = mode;
++      *result = t;
++      return 0;
++}
++
++static void free_devices(struct list_head *devices)
++{
++      struct list_head *tmp, *next;
++
++      for (tmp = devices->next; tmp != devices; tmp = next) {
++              struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++              next = tmp->next;
++              kfree(dd);
++      }
++}
++
++void table_destroy(struct dm_table *t)
++{
++      unsigned int i;
++
++      /* free the indexes (see dm_table_complete) */
++      if (t->depth >= 2)
++              vfree(t->index[t->depth - 2]);
++
++      /* free the targets */
++      for (i = 0; i < t->num_targets; i++) {
++              struct dm_target *tgt = t->targets + i;
++
++              if (tgt->type->dtr)
++                      tgt->type->dtr(tgt);
++
++              dm_put_target_type(tgt->type);
++      }
++
++      vfree(t->highs);
++
++      /* free the device list */
++      if (t->devices.next != &t->devices) {
++              DMWARN("devices still present during destroy: "
++                     "dm_table_remove_device calls missing");
++
++              free_devices(&t->devices);
++      }
++
++      kfree(t);
++}
++
++void dm_table_get(struct dm_table *t)
++{
++      atomic_inc(&t->holders);
++}
++
++void dm_table_put(struct dm_table *t)
++{
++      if (atomic_dec_and_test(&t->holders))
++              table_destroy(t);
++}
++
++/*
++ * Checks to see if we need to extend highs or targets.
++ */
++static inline int check_space(struct dm_table *t)
++{
++      if (t->num_targets >= t->num_allocated)
++              return alloc_targets(t, t->num_allocated * 2);
++
++      return 0;
++}
++
++/*
++ * Convert a device path to a dev_t.
++ */
++static int lookup_device(const char *path, kdev_t *dev)
++{
++      int r;
++      struct nameidata nd;
++      struct inode *inode;
++
++      if (!path_init(path, LOOKUP_FOLLOW, &nd))
++              return 0;
++
++      if ((r = path_walk(path, &nd)))
++              goto out;
++
++      inode = nd.dentry->d_inode;
++      if (!inode) {
++              r = -ENOENT;
++              goto out;
++      }
++
++      if (!S_ISBLK(inode->i_mode)) {
++              r = -ENOTBLK;
++              goto out;
++      }
++
++      *dev = inode->i_rdev;
++
++      out:
++      path_release(&nd);
++      return r;
++}
++
++/*
++ * See if we've already got a device in the list.
++ */
++static struct dm_dev *find_device(struct list_head *l, kdev_t dev)
++{
++      struct list_head *tmp;
++
++      list_for_each(tmp, l) {
++              struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++              if (kdev_same(dd->dev, dev))
++                      return dd;
++      }
++
++      return NULL;
++}
++
++/*
++ * Open a device so we can use it as a map destination.
++ */
++static int open_dev(struct dm_dev *dd)
++{
++      if (dd->bdev)
++              BUG();
++
++      dd->bdev = bdget(kdev_t_to_nr(dd->dev));
++      if (!dd->bdev)
++              return -ENOMEM;
++
++      return blkdev_get(dd->bdev, dd->mode, 0, BDEV_RAW);
++}
++
++/*
++ * Close a device that we've been using.
++ */
++static void close_dev(struct dm_dev *dd)
++{
++      if (!dd->bdev)
++              return;
++
++      blkdev_put(dd->bdev, BDEV_RAW);
++      dd->bdev = NULL;
++}
++
++/*
++ * If possible (ie. blk_size[major] is set), this checks an area
++ * of a destination device is valid.
++ */
++static int check_device_area(kdev_t dev, sector_t start, sector_t len)
++{
++      int *sizes;
++      sector_t dev_size;
++
++      if (!(sizes = blk_size[major(dev)]) || !(dev_size = sizes[minor(dev)]))
++              /* we don't know the device details,
++               * so give the benefit of the doubt */
++              return 1;
++
++      /* convert to 512-byte sectors */
++      dev_size <<= 1;
++
++      return ((start < dev_size) && (len <= (dev_size - start)));
++}
++
++/*
++ * This upgrades the mode on an already open dm_dev.  Being
++ * careful to leave things as they were if we fail to reopen the
++ * device.
++ */
++static int upgrade_mode(struct dm_dev *dd, int new_mode)
++{
++      int r;
++      struct dm_dev dd_copy;
++
++      memcpy(&dd_copy, dd, sizeof(dd_copy));
++
++      dd->mode |= new_mode;
++      dd->bdev = NULL;
++      r = open_dev(dd);
++      if (!r)
++              close_dev(&dd_copy);
++      else
++              memcpy(dd, &dd_copy, sizeof(dd_copy));
++
++      return r;
++}
++
++/*
++ * Add a device to the list, or just increment the usage count if
++ * it's already present.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++                sector_t len, int mode, struct dm_dev **result)
++{
++      int r;
++      kdev_t dev;
++      struct dm_dev *dd;
++      unsigned major, minor;
++      struct dm_table *t = ti->table;
++
++      if (!t)
++              BUG();
++
++      if (sscanf(path, "%u:%u", &major, &minor) == 2) {
++              /* Extract the major/minor numbers */
++              dev = mk_kdev(major, minor);
++      } else {
++              /* convert the path to a device */
++              if ((r = lookup_device(path, &dev)))
++                      return r;
++      }
++
++      dd = find_device(&t->devices, dev);
++      if (!dd) {
++              dd = kmalloc(sizeof(*dd), GFP_KERNEL);
++              if (!dd)
++                      return -ENOMEM;
++
++              dd->dev = dev;
++              dd->mode = mode;
++              dd->bdev = NULL;
++
++              if ((r = open_dev(dd))) {
++                      kfree(dd);
++                      return r;
++              }
++
++              atomic_set(&dd->count, 0);
++              list_add(&dd->list, &t->devices);
++
++      } else if (dd->mode != (mode | dd->mode)) {
++              r = upgrade_mode(dd, mode);
++              if (r)
++                      return r;
++      }
++      atomic_inc(&dd->count);
++
++      if (!check_device_area(dd->dev, start, len)) {
++              DMWARN("device %s too small for target", path);
++              dm_put_device(ti, dd);
++              return -EINVAL;
++      }
++
++      *result = dd;
++
++      return 0;
++}
++
++/*
++ * Decrement a devices use count and remove it if neccessary.
++ */
++void dm_put_device(struct dm_target *ti, struct dm_dev *dd)
++{
++      if (atomic_dec_and_test(&dd->count)) {
++              close_dev(dd);
++              list_del(&dd->list);
++              kfree(dd);
++      }
++}
++
++/*
++ * Checks to see if the target joins onto the end of the table.
++ */
++static int adjoin(struct dm_table *table, struct dm_target *ti)
++{
++      struct dm_target *prev;
++
++      if (!table->num_targets)
++              return !ti->begin;
++
++      prev = &table->targets[table->num_targets - 1];
++      return (ti->begin == (prev->begin + prev->len));
++}
++
++/*
++ * Destructively splits up the argument list to pass to ctr.
++ */
++static int split_args(int max, int *argc, char **argv, char *input)
++{
++      char *start, *end = input, *out;
++      *argc = 0;
++
++      while (1) {
++              start = end;
++
++              /* Skip whitespace */
++              while (*start && isspace(*start))
++                      start++;
++
++              if (!*start)
++                      break;  /* success, we hit the end */
++
++              /* 'out' is used to remove any back-quotes */
++              end = out = start;
++              while (*end) {
++                      /* Everything apart from '\0' can be quoted */
++                      if (*end == '\\' && *(end + 1)) {
++                              *out++ = *(end + 1);
++                              end += 2;
++                              continue;
++                      }
++
++                      if (isspace(*end))
++                              break;  /* end of token */
++
++                      *out++ = *end++;
++              }
++
++              /* have we already filled the array ? */
++              if ((*argc + 1) > max)
++                      return -EINVAL;
++
++              /* we know this is whitespace */
++              if (*end)
++                      end++;
++
++              /* terminate the string and put it in the array */
++              *out = '\0';
++              argv[*argc] = start;
++              (*argc)++;
++      }
++
++      return 0;
++}
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++                      sector_t start, sector_t len, char *params)
++{
++      int r = -EINVAL, argc;
++      char *argv[32];
++      struct dm_target *tgt;
++
++      if ((r = check_space(t)))
++              return r;
++
++      tgt = t->targets + t->num_targets;
++      memset(tgt, 0, sizeof(*tgt));
++
++      tgt->type = dm_get_target_type(type);
++      if (!tgt->type) {
++              tgt->error = "unknown target type";
++              return -EINVAL;
++      }
++
++      tgt->table = t;
++      tgt->begin = start;
++      tgt->len = len;
++      tgt->error = "Unknown error";
++
++      /*
++       * Does this target adjoin the previous one ?
++       */
++      if (!adjoin(t, tgt)) {
++              tgt->error = "Gap in table";
++              r = -EINVAL;
++              goto bad;
++      }
++
++      r = split_args(ARRAY_SIZE(argv), &argc, argv, params);
++      if (r) {
++              tgt->error = "couldn't split parameters";
++              goto bad;
++      }
++
++      r = tgt->type->ctr(tgt, argc, argv);
++      if (r)
++              goto bad;
++
++      t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
++      return 0;
++
++      bad:
++      printk(KERN_ERR DM_NAME ": %s\n", tgt->error);
++      dm_put_target_type(tgt->type);
++      return r;
++}
++
++static int setup_indexes(struct dm_table *t)
++{
++      int i;
++      unsigned int total = 0;
++      sector_t *indexes;
++
++      /* allocate the space for *all* the indexes */
++      for (i = t->depth - 2; i >= 0; i--) {
++              t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
++              total += t->counts[i];
++      }
++
++      indexes = (sector_t *) vcalloc(total, (unsigned long) NODE_SIZE);
++      if (!indexes)
++              return -ENOMEM;
++
++      /* set up internal nodes, bottom-up */
++      for (i = t->depth - 2, total = 0; i >= 0; i--) {
++              t->index[i] = indexes;
++              indexes += (KEYS_PER_NODE * t->counts[i]);
++              setup_btree_index(i, t);
++      }
++
++      return 0;
++}
++
++/*
++ * Builds the btree to index the map.
++ */
++int dm_table_complete(struct dm_table *t)
++{
++      int r = 0;
++      unsigned int leaf_nodes;
++
++      /* how many indexes will the btree have ? */
++      leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
++      t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
++
++      /* leaf layer has already been set up */
++      t->counts[t->depth - 1] = leaf_nodes;
++      t->index[t->depth - 1] = t->highs;
++
++      if (t->depth >= 2)
++              r = setup_indexes(t);
++
++      return r;
++}
++
++static spinlock_t _event_lock = SPIN_LOCK_UNLOCKED;
++void dm_table_event_callback(struct dm_table *t,
++                           void (*fn)(void *), void *context)
++{
++      spin_lock_irq(&_event_lock);
++      t->event_fn = fn;
++      t->event_context = context;
++      spin_unlock_irq(&_event_lock);
++}
++
++void dm_table_event(struct dm_table *t)
++{
++      spin_lock(&_event_lock);
++      if (t->event_fn)
++              t->event_fn(t->event_context);
++      spin_unlock(&_event_lock);
++}
++
++sector_t dm_table_get_size(struct dm_table *t)
++{
++      return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
++}
++
++struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
++{
++      if (index > t->num_targets)
++              return NULL;
++
++      return t->targets + index;
++}
++
++/*
++ * Search the btree for the correct target.
++ */
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
++{
++      unsigned int l, n = 0, k = 0;
++      sector_t *node;
++
++      for (l = 0; l < t->depth; l++) {
++              n = get_child(n, k);
++              node = get_node(t, l, n);
++
++              for (k = 0; k < KEYS_PER_NODE; k++)
++                      if (node[k] >= sector)
++                              break;
++      }
++
++      return &t->targets[(KEYS_PER_NODE * n) + k];
++}
++
++unsigned int dm_table_get_num_targets(struct dm_table *t)
++{
++      return t->num_targets;
++}
++
++struct list_head *dm_table_get_devices(struct dm_table *t)
++{
++      return &t->devices;
++}
++
++int dm_table_get_mode(struct dm_table *t)
++{
++      return t->mode;
++}
++
++void dm_table_suspend_targets(struct dm_table *t)
++{
++      int i;
++
++      for (i = 0; i < t->num_targets; i++) {
++              struct dm_target *ti = t->targets + i;
++
++              if (ti->type->suspend)
++                      ti->type->suspend(ti);
++      }
++}
++
++void dm_table_resume_targets(struct dm_table *t)
++{
++      int i;
++
++      for (i = 0; i < t->num_targets; i++) {
++              struct dm_target *ti = t->targets + i;
++
++              if (ti->type->resume)
++                      ti->type->resume(ti);
++      }
++}
++
++EXPORT_SYMBOL(dm_get_device);
++EXPORT_SYMBOL(dm_put_device);
++EXPORT_SYMBOL(dm_table_event);
++EXPORT_SYMBOL(dm_table_get_mode);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm-target.c linux-2.4.21/drivers/md/dm-target.c
+--- linux-2.4.21-dm-real/drivers/md/dm-target.c        Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm-target.c        Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,188 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++
++#include <linux/module.h>
++#include <linux/kmod.h>
++#include <linux/slab.h>
++
++struct tt_internal {
++      struct target_type tt;
++
++      struct list_head list;
++      long use;
++};
++
++static LIST_HEAD(_targets);
++static DECLARE_RWSEM(_lock);
++
++#define DM_MOD_NAME_SIZE 32
++
++static inline struct tt_internal *__find_target_type(const char *name)
++{
++      struct list_head *tih;
++      struct tt_internal *ti;
++
++      list_for_each(tih, &_targets) {
++              ti = list_entry(tih, struct tt_internal, list);
++
++              if (!strcmp(name, ti->tt.name))
++                      return ti;
++      }
++
++      return NULL;
++}
++
++static struct tt_internal *get_target_type(const char *name)
++{
++      struct tt_internal *ti;
++
++      down_read(&_lock);
++      ti = __find_target_type(name);
++
++      if (ti) {
++              if (ti->use == 0 && ti->tt.module)
++                      __MOD_INC_USE_COUNT(ti->tt.module);
++              ti->use++;
++      }
++      up_read(&_lock);
++
++      return ti;
++}
++
++static void load_module(const char *name)
++{
++      char module_name[DM_MOD_NAME_SIZE] = "dm-";
++
++      /* Length check for strcat() below */
++      if (strlen(name) > (DM_MOD_NAME_SIZE - 4))
++              return;
++
++      strcat(module_name, name);
++      request_module(module_name);
++}
++
++struct target_type *dm_get_target_type(const char *name)
++{
++      struct tt_internal *ti = get_target_type(name);
++
++      if (!ti) {
++              load_module(name);
++              ti = get_target_type(name);
++      }
++
++      return ti ? &ti->tt : NULL;
++}
++
++void dm_put_target_type(struct target_type *t)
++{
++      struct tt_internal *ti = (struct tt_internal *) t;
++
++      down_read(&_lock);
++      if (--ti->use == 0 && ti->tt.module)
++              __MOD_DEC_USE_COUNT(ti->tt.module);
++
++      if (ti->use < 0)
++              BUG();
++      up_read(&_lock);
++
++      return;
++}
++
++static struct tt_internal *alloc_target(struct target_type *t)
++{
++      struct tt_internal *ti = kmalloc(sizeof(*ti), GFP_KERNEL);
++
++      if (ti) {
++              memset(ti, 0, sizeof(*ti));
++              ti->tt = *t;
++      }
++
++      return ti;
++}
++
++int dm_register_target(struct target_type *t)
++{
++      int rv = 0;
++      struct tt_internal *ti = alloc_target(t);
++
++      if (!ti)
++              return -ENOMEM;
++
++      down_write(&_lock);
++      if (__find_target_type(t->name)) {
++              kfree(ti);
++              rv = -EEXIST;
++      } else
++              list_add(&ti->list, &_targets);
++
++      up_write(&_lock);
++      return rv;
++}
++
++int dm_unregister_target(struct target_type *t)
++{
++      struct tt_internal *ti;
++
++      down_write(&_lock);
++      if (!(ti = __find_target_type(t->name))) {
++              up_write(&_lock);
++              return -EINVAL;
++      }
++
++      if (ti->use) {
++              up_write(&_lock);
++              return -ETXTBSY;
++      }
++
++      list_del(&ti->list);
++      kfree(ti);
++
++      up_write(&_lock);
++      return 0;
++}
++
++/*
++ * io-err: always fails an io, useful for bringing
++ * up LVs that have holes in them.
++ */
++static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
++{
++      return 0;
++}
++
++static void io_err_dtr(struct dm_target *ti)
++{
++      /* empty */
++}
++
++static int io_err_map(struct dm_target *ti, struct buffer_head *bh, int rw,
++                    union map_info *map_context)
++{
++      return -EIO;
++}
++
++static struct target_type error_target = {
++      .name = "error",
++      .ctr  = io_err_ctr,
++      .dtr  = io_err_dtr,
++      .map  = io_err_map,
++};
++
++int dm_target_init(void)
++{
++      return dm_register_target(&error_target);
++}
++
++void dm_target_exit(void)
++{
++      if (dm_unregister_target(&error_target))
++              DMWARN("error target unregistration failed");
++}
++
++EXPORT_SYMBOL(dm_register_target);
++EXPORT_SYMBOL(dm_unregister_target);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm.c linux-2.4.21/drivers/md/dm.c
+--- linux-2.4.21-dm-real/drivers/md/dm.c       Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm.c       Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,1115 @@
++/*
++ * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include "dm.h"
++#include "kcopyd.h"
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/blk.h>
++#include <linux/blkpg.h>
++#include <linux/mempool.h>
++#include <linux/slab.h>
++#include <linux/major.h>
++#include <linux/kdev_t.h>
++#include <linux/lvm.h>
++
++#include <asm/uaccess.h>
++
++static const char *_name = DM_NAME;
++#define DEFAULT_READ_AHEAD 64
++
++struct dm_io {
++      struct mapped_device *md;
++
++      struct dm_target *ti;
++      int rw;
++      union map_info map_context;
++      void (*end_io) (struct buffer_head * bh, int uptodate);
++      void *context;
++};
++
++struct deferred_io {
++      int rw;
++      struct buffer_head *bh;
++      struct deferred_io *next;
++};
++
++/*
++ * Bits for the md->flags field.
++ */
++#define DMF_BLOCK_IO 0
++#define DMF_SUSPENDED 1
++
++struct mapped_device {
++      struct rw_semaphore lock;
++      atomic_t holders;
++
++      kdev_t dev;
++      unsigned long flags;
++
++      /*
++       * A list of ios that arrived while we were suspended.
++       */
++      atomic_t pending;
++      wait_queue_head_t wait;
++      struct deferred_io *deferred;
++
++      /*
++       * The current mapping.
++       */
++      struct dm_table *map;
++
++      /*
++       * io objects are allocated from here.
++       */
++      mempool_t *io_pool;
++
++      /*
++       * Event handling.
++       */
++      uint32_t event_nr;
++      wait_queue_head_t eventq;
++};
++
++#define MIN_IOS 256
++static kmem_cache_t *_io_cache;
++
++static struct mapped_device *get_kdev(kdev_t dev);
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh);
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb);
++
++/*-----------------------------------------------------------------
++ * In order to avoid the 256 minor number limit we are going to
++ * register more major numbers as neccessary.
++ *---------------------------------------------------------------*/
++#define MAX_MINORS (1 << MINORBITS)
++
++struct major_details {
++      unsigned int major;
++
++      int transient;
++      struct list_head transient_list;
++
++      unsigned int first_free_minor;
++      int nr_free_minors;
++
++      struct mapped_device *mds[MAX_MINORS];
++      int blk_size[MAX_MINORS];
++      int blksize_size[MAX_MINORS];
++      int hardsect_size[MAX_MINORS];
++};
++
++static struct rw_semaphore _dev_lock;
++static struct major_details *_majors[MAX_BLKDEV];
++
++/*
++ * This holds a list of majors that non-specified device numbers
++ * may be allocated from.  Only majors with free minors appear on
++ * this list.
++ */
++static LIST_HEAD(_transients_free);
++
++static int __alloc_major(unsigned int major, struct major_details **result)
++{
++      int r;
++      unsigned int transient = !major;
++      struct major_details *maj;
++
++      /* Major already allocated? */
++      if (major && _majors[major])
++              return 0;
++
++      maj = kmalloc(sizeof(*maj), GFP_KERNEL);
++      if (!maj)
++              return -ENOMEM;
++
++      memset(maj, 0, sizeof(*maj));
++      INIT_LIST_HEAD(&maj->transient_list);
++
++      maj->nr_free_minors = MAX_MINORS;
++
++      r = register_blkdev(major, _name, &dm_blk_dops);
++      if (r < 0) {
++              DMERR("register_blkdev failed for %d", major);
++              kfree(maj);
++              return r;
++      }
++      if (r > 0)
++              major = r;
++
++      maj->major = major;
++
++      if (transient) {
++              maj->transient = transient;
++              list_add_tail(&maj->transient_list, &_transients_free);
++      }
++
++      _majors[major] = maj;
++
++      blk_size[major] = maj->blk_size;
++      blksize_size[major] = maj->blksize_size;
++      hardsect_size[major] = maj->hardsect_size;
++      read_ahead[major] = DEFAULT_READ_AHEAD;
++
++      blk_queue_make_request(BLK_DEFAULT_QUEUE(major), dm_request);
++
++      *result = maj;
++      return 0;
++}
++
++static void __free_major(struct major_details *maj)
++{
++      unsigned int major = maj->major;
++
++      list_del(&maj->transient_list);
++
++      read_ahead[major] = 0;
++      blk_size[major] = NULL;
++      blksize_size[major] = NULL;
++      hardsect_size[major] = NULL;
++
++      _majors[major] = NULL;
++      kfree(maj);
++
++      if (unregister_blkdev(major, _name) < 0)
++              DMERR("devfs_unregister_blkdev failed");
++}
++
++static void free_all_majors(void)
++{
++      unsigned int major = ARRAY_SIZE(_majors);
++
++      down_write(&_dev_lock);
++
++      while (major--)
++              if (_majors[major])
++                      __free_major(_majors[major]);
++
++      up_write(&_dev_lock);
++}
++
++static void free_dev(kdev_t dev)
++{
++      unsigned int major = major(dev);
++      unsigned int minor = minor(dev);
++      struct major_details *maj;
++
++      down_write(&_dev_lock);
++
++      maj = _majors[major];
++      if (!maj)
++              goto out;
++
++      maj->mds[minor] = NULL;
++      maj->nr_free_minors++;
++
++      if (maj->nr_free_minors == MAX_MINORS) {
++              __free_major(maj);
++              goto out;
++      }
++
++      if (!maj->transient)
++              goto out;
++
++      if (maj->nr_free_minors == 1)
++              list_add_tail(&maj->transient_list, &_transients_free);
++
++      if (minor < maj->first_free_minor)
++              maj->first_free_minor = minor;
++
++      out:
++      up_write(&_dev_lock);
++}
++
++static void __alloc_minor(struct major_details *maj, unsigned int minor,
++                        struct mapped_device *md)
++{
++      maj->mds[minor] = md;
++      md->dev = mk_kdev(maj->major, minor);
++      maj->nr_free_minors--;
++
++      if (maj->transient && !maj->nr_free_minors)
++              list_del_init(&maj->transient_list);
++}
++
++/*
++ * See if requested kdev_t is available.
++ */
++static int specific_dev(kdev_t dev, struct mapped_device *md)
++{
++      int r = 0;
++      unsigned int major = major(dev);
++      unsigned int minor = minor(dev);
++      struct major_details *maj;
++
++      if (!major || (major > MAX_BLKDEV) || (minor >= MAX_MINORS)) {
++              DMWARN("device number requested out of range (%d, %d)",
++                     major, minor);
++              return -EINVAL;
++      }
++
++      down_write(&_dev_lock);
++      maj = _majors[major];
++
++      /* Register requested major? */
++      if (!maj) {
++              r = __alloc_major(major, &maj);
++              if (r)
++                      goto out;
++
++              major = maj->major;
++      }
++
++      if (maj->mds[minor]) {
++              r = -EBUSY;
++              goto out;
++      }
++
++      __alloc_minor(maj, minor, md);
++
++      out:
++      up_write(&_dev_lock);
++
++      return r;
++}
++
++/*
++ * Find first unused device number, requesting a new major number if required.
++ */
++static int first_free_dev(struct mapped_device *md)
++{
++      int r = 0;
++      struct major_details *maj;
++
++      down_write(&_dev_lock);
++
++      if (list_empty(&_transients_free)) {
++              r = __alloc_major(0, &maj);
++              if (r)
++                      goto out;
++      } else
++              maj = list_entry(_transients_free.next, struct major_details,
++                               transient_list);
++
++      while (maj->mds[maj->first_free_minor++])
++              ;
++
++      __alloc_minor(maj, maj->first_free_minor - 1, md);
++
++      out:
++      up_write(&_dev_lock);
++
++      return r;
++}
++
++static struct mapped_device *get_kdev(kdev_t dev)
++{
++      struct mapped_device *md;
++      struct major_details *maj;
++
++      down_read(&_dev_lock);
++      maj = _majors[major(dev)];
++      if (!maj) {
++              md = NULL;
++              goto out;
++      }
++      md = maj->mds[minor(dev)];
++      if (md)
++              dm_get(md);
++      out:
++      up_read(&_dev_lock);
++
++      return md;
++}
++
++/*-----------------------------------------------------------------
++ * init/exit code
++ *---------------------------------------------------------------*/
++
++static __init int local_init(void)
++{
++      init_rwsem(&_dev_lock);
++
++      /* allocate a slab for the dm_ios */
++      _io_cache = kmem_cache_create("dm io",
++                                    sizeof(struct dm_io), 0, 0, NULL, NULL);
++
++      if (!_io_cache)
++              return -ENOMEM;
++
++      return 0;
++}
++
++static void local_exit(void)
++{
++      kmem_cache_destroy(_io_cache);
++      free_all_majors();
++
++      DMINFO("cleaned up");
++}
++
++/*
++ * We have a lot of init/exit functions, so it seems easier to
++ * store them in an array.  The disposable macro 'xx'
++ * expands a prefix into a pair of function names.
++ */
++static struct {
++      int (*init) (void);
++      void (*exit) (void);
++
++} _inits[] = {
++#define xx(n) {n ## _init, n ## _exit},
++      xx(local)
++      xx(kcopyd)
++      xx(dm_target)
++      xx(dm_linear)
++      xx(dm_stripe)
++      xx(dm_snapshot)
++      xx(dm_interface)
++#undef xx
++};
++
++static int __init dm_init(void)
++{
++      const int count = ARRAY_SIZE(_inits);
++
++      int r, i;
++
++      for (i = 0; i < count; i++) {
++              r = _inits[i].init();
++              if (r)
++                      goto bad;
++      }
++
++      return 0;
++
++      bad:
++      while (i--)
++              _inits[i].exit();
++
++      return r;
++}
++
++static void __exit dm_exit(void)
++{
++      int i = ARRAY_SIZE(_inits);
++
++      while (i--)
++              _inits[i].exit();
++}
++
++/*
++ * Block device functions
++ */
++static int dm_blk_open(struct inode *inode, struct file *file)
++{
++      struct mapped_device *md;
++
++      md = get_kdev(inode->i_rdev);
++      if (!md)
++              return -ENXIO;
++
++      return 0;
++}
++
++static int dm_blk_close(struct inode *inode, struct file *file)
++{
++      struct mapped_device *md;
++
++      md = get_kdev(inode->i_rdev);
++      dm_put(md);             /* put the reference gained by dm_blk_open */
++      dm_put(md);
++      return 0;
++}
++
++static inline struct dm_io *alloc_io(struct mapped_device *md)
++{
++      return mempool_alloc(md->io_pool, GFP_NOIO);
++}
++
++static inline void free_io(struct mapped_device *md, struct dm_io *io)
++{
++      mempool_free(io, md->io_pool);
++}
++
++static inline struct deferred_io *alloc_deferred(void)
++{
++      return kmalloc(sizeof(struct deferred_io), GFP_NOIO);
++}
++
++static inline void free_deferred(struct deferred_io *di)
++{
++      kfree(di);
++}
++
++static inline sector_t volume_size(kdev_t dev)
++{
++      return blk_size[major(dev)][minor(dev)] << 1;
++}
++
++/* FIXME: check this */
++static int dm_blk_ioctl(struct inode *inode, struct file *file,
++                      unsigned int command, unsigned long a)
++{
++      kdev_t dev = inode->i_rdev;
++      long size;
++
++      switch (command) {
++      case BLKROSET:
++      case BLKROGET:
++      case BLKRASET:
++      case BLKRAGET:
++      case BLKFLSBUF:
++      case BLKSSZGET:
++              //case BLKRRPART: /* Re-read partition tables */
++              //case BLKPG:
++      case BLKELVGET:
++      case BLKELVSET:
++      case BLKBSZGET:
++      case BLKBSZSET:
++              return blk_ioctl(dev, command, a);
++              break;
++
++      case BLKGETSIZE:
++              size = volume_size(dev);
++              if (copy_to_user((void *) a, &size, sizeof(long)))
++                      return -EFAULT;
++              break;
++
++      case BLKGETSIZE64:
++              size = volume_size(dev);
++              if (put_user((u64) ((u64) size) << 9, (u64 *) a))
++                      return -EFAULT;
++              break;
++
++      case BLKRRPART:
++              return -ENOTTY;
++
++      case LV_BMAP:
++              return dm_user_bmap(inode, (struct lv_bmap *) a);
++
++      default:
++              DMWARN("unknown block ioctl 0x%x", command);
++              return -ENOTTY;
++      }
++
++      return 0;
++}
++
++/*
++ * Add the buffer to the list of deferred io.
++ */
++static int queue_io(struct mapped_device *md, struct buffer_head *bh, int rw)
++{
++      struct deferred_io *di;
++
++      di = alloc_deferred();
++      if (!di)
++              return -ENOMEM;
++
++      down_write(&md->lock);
++
++      if (!test_bit(DMF_BLOCK_IO, &md->flags)) {
++              up_write(&md->lock);
++              free_deferred(di);
++              return 1;
++      }
++
++      di->bh = bh;
++      di->rw = rw;
++      di->next = md->deferred;
++      md->deferred = di;
++
++      up_write(&md->lock);
++      return 0;               /* deferred successfully */
++}
++
++/*
++ * bh->b_end_io routine that decrements the pending count
++ * and then calls the original bh->b_end_io fn.
++ */
++static void dec_pending(struct buffer_head *bh, int uptodate)
++{
++      int r;
++      struct dm_io *io = bh->b_private;
++      dm_endio_fn endio = io->ti->type->end_io;
++
++      if (endio) {
++              r = endio(io->ti, bh, io->rw, uptodate ? 0 : -EIO,
++                        &io->map_context);
++              if (r < 0)
++                      uptodate = 0;
++
++              else if (r > 0)
++                      /* the target wants another shot at the io */
++                      return;
++      }
++
++      if (atomic_dec_and_test(&io->md->pending))
++              /* nudge anyone waiting on suspend queue */
++              wake_up(&io->md->wait);
++
++      bh->b_end_io = io->end_io;
++      bh->b_private = io->context;
++      free_io(io->md, io);
++
++      bh->b_end_io(bh, uptodate);
++}
++
++/*
++ * Do the bh mapping for a given leaf
++ */
++static inline int __map_buffer(struct mapped_device *md, int rw,
++                             struct buffer_head *bh, struct dm_io *io)
++{
++      struct dm_target *ti;
++
++      if (!md->map)
++              return -EINVAL;
++
++      ti = dm_table_find_target(md->map, bh->b_rsector);
++      if (!ti->type)
++              return -EINVAL;
++
++      /* hook the end io request fn */
++      atomic_inc(&md->pending);
++      io->md = md;
++      io->ti = ti;
++      io->rw = rw;
++      io->end_io = bh->b_end_io;
++      io->context = bh->b_private;
++      bh->b_end_io = dec_pending;
++      bh->b_private = io;
++
++      return ti->type->map(ti, bh, rw, &io->map_context);
++}
++
++/*
++ * Checks to see if we should be deferring io, if so it queues it
++ * and returns 1.
++ */
++static inline int __deferring(struct mapped_device *md, int rw,
++                            struct buffer_head *bh)
++{
++      int r;
++
++      /*
++       * If we're suspended we have to queue this io for later.
++       */
++      while (test_bit(DMF_BLOCK_IO, &md->flags)) {
++              up_read(&md->lock);
++
++              /*
++               * There's no point deferring a read ahead
++               * request, just drop it.
++               */
++              if (rw == READA) {
++                      down_read(&md->lock);
++                      return -EIO;
++              }
++
++              r = queue_io(md, bh, rw);
++              down_read(&md->lock);
++
++              if (r < 0)
++                      return r;
++
++              if (r == 0)
++                      return 1;       /* deferred successfully */
++
++      }
++
++      return 0;
++}
++
++static int dm_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++      int r;
++      struct dm_io *io;
++      struct mapped_device *md;
++
++      md = get_kdev(bh->b_rdev);
++      if (!md) {
++              buffer_IO_error(bh);
++              return 0;
++      }
++
++      io = alloc_io(md);
++      down_read(&md->lock);
++
++      r = __deferring(md, rw, bh);
++      if (r < 0)
++              goto bad;
++
++      else if (!r) {
++              /* not deferring */
++              r = __map_buffer(md, rw, bh, io);
++              if (r < 0)
++                      goto bad;
++      } else
++              r = 0;
++
++      up_read(&md->lock);
++      dm_put(md);
++      return r;
++
++      bad:
++      buffer_IO_error(bh);
++      up_read(&md->lock);
++      dm_put(md);
++      return 0;
++}
++
++static int check_dev_size(kdev_t dev, unsigned long block)
++{
++      unsigned int major = major(dev);
++      unsigned int minor = minor(dev);
++
++      /* FIXME: check this */
++      unsigned long max_sector = (blk_size[major][minor] << 1) + 1;
++      unsigned long sector = (block + 1) * (blksize_size[major][minor] >> 9);
++
++      return (sector > max_sector) ? 0 : 1;
++}
++
++/*
++ * Creates a dummy buffer head and maps it (for lilo).
++ */
++static int __bmap(struct mapped_device *md, kdev_t dev, unsigned long block,
++                kdev_t *r_dev, unsigned long *r_block)
++{
++      struct buffer_head bh;
++      struct dm_target *ti;
++      union map_info map_context;
++      int r;
++
++      if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++              return -EPERM;
++      }
++
++      if (!check_dev_size(dev, block)) {
++              return -EINVAL;
++      }
++
++      if (!md->map)
++              return -EINVAL;
++
++      /* setup dummy bh */
++      memset(&bh, 0, sizeof(bh));
++      bh.b_blocknr = block;
++      bh.b_dev = bh.b_rdev = dev;
++      bh.b_size = blksize_size[major(dev)][minor(dev)];
++      bh.b_rsector = block * (bh.b_size >> 9);
++
++      /* find target */
++      ti = dm_table_find_target(md->map, bh.b_rsector);
++
++      /* do the mapping */
++      r = ti->type->map(ti, &bh, READ, &map_context);
++      ti->type->end_io(ti, &bh, READ, 0, &map_context);
++
++      if (!r) {
++              *r_dev = bh.b_rdev;
++              *r_block = bh.b_rsector / (bh.b_size >> 9);
++      }
++
++      return r;
++}
++
++/*
++ * Marshals arguments and results between user and kernel space.
++ */
++static int dm_user_bmap(struct inode *inode, struct lv_bmap *lvb)
++{
++      struct mapped_device *md;
++      unsigned long block, r_block;
++      kdev_t r_dev;
++      int r;
++
++      if (get_user(block, &lvb->lv_block))
++              return -EFAULT;
++
++      md = get_kdev(inode->i_rdev);
++      if (!md)
++              return -ENXIO;
++
++      down_read(&md->lock);
++      r = __bmap(md, inode->i_rdev, block, &r_dev, &r_block);
++      up_read(&md->lock);
++      dm_put(md);
++
++      if (!r && (put_user(kdev_t_to_nr(r_dev), &lvb->lv_dev) ||
++                 put_user(r_block, &lvb->lv_block)))
++              r = -EFAULT;
++
++      return r;
++}
++
++static void free_md(struct mapped_device *md)
++{
++      free_dev(md->dev);
++      mempool_destroy(md->io_pool);
++      kfree(md);
++}
++
++/*
++ * Allocate and initialise a blank device with a given minor.
++ */
++static struct mapped_device *alloc_md(kdev_t dev)
++{
++      int r;
++      struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
++
++      if (!md) {
++              DMWARN("unable to allocate device, out of memory.");
++              return NULL;
++      }
++
++      memset(md, 0, sizeof(*md));
++
++      /* Allocate suitable device number */
++      if (!dev)
++              r = first_free_dev(md);
++      else
++              r = specific_dev(dev, md);
++
++      if (r) {
++              kfree(md);
++              return NULL;
++      }
++
++      md->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
++                                   mempool_free_slab, _io_cache);
++      if (!md->io_pool) {
++              free_md(md);
++              kfree(md);
++              return NULL;
++      }
++
++      init_rwsem(&md->lock);
++      atomic_set(&md->holders, 1);
++      atomic_set(&md->pending, 0);
++      init_waitqueue_head(&md->wait);
++      init_waitqueue_head(&md->eventq);
++
++      return md;
++}
++
++/*
++ * The hardsect size for a mapped device is the largest hardsect size
++ * from the devices it maps onto.
++ */
++static int __find_hardsect_size(struct list_head *devices)
++{
++      int result = 512, size;
++      struct list_head *tmp;
++
++      list_for_each (tmp, devices) {
++              struct dm_dev *dd = list_entry(tmp, struct dm_dev, list);
++              size = get_hardsect_size(dd->dev);
++              if (size > result)
++                      result = size;
++      }
++
++      return result;
++}
++
++/*
++ * Bind a table to the device.
++ */
++static void event_callback(void *context)
++{
++      struct mapped_device *md = (struct mapped_device *) context;
++
++      down_write(&md->lock);
++      md->event_nr++;
++      wake_up_interruptible(&md->eventq);
++      up_write(&md->lock);
++}
++
++static int __bind(struct mapped_device *md, struct dm_table *t)
++{
++      unsigned int minor = minor(md->dev);
++      unsigned int major = major(md->dev);
++      md->map = t;
++
++      /* in k */
++      blk_size[major][minor] = dm_table_get_size(t) >> 1;
++      blksize_size[major][minor] = BLOCK_SIZE;
++      hardsect_size[major][minor] =
++          __find_hardsect_size(dm_table_get_devices(t));
++      register_disk(NULL, md->dev, 1, &dm_blk_dops, blk_size[major][minor]);
++
++      dm_table_event_callback(md->map, event_callback, md);
++      dm_table_get(t);
++      return 0;
++}
++
++static void __unbind(struct mapped_device *md)
++{
++      unsigned int minor = minor(md->dev);
++      unsigned int major = major(md->dev);
++
++      if (md->map) {
++              dm_table_event_callback(md->map, NULL, NULL);
++              dm_table_put(md->map);
++              md->map = NULL;
++
++      }
++
++      blk_size[major][minor] = 0;
++      blksize_size[major][minor] = 0;
++      hardsect_size[major][minor] = 0;
++}
++
++/*
++ * Constructor for a new device.
++ */
++int dm_create(kdev_t dev, struct mapped_device **result)
++{
++      struct mapped_device *md;
++
++      md = alloc_md(dev);
++      if (!md)
++              return -ENXIO;
++
++      __unbind(md);   /* Ensure zero device size */
++
++      *result = md;
++      return 0;
++}
++
++void dm_get(struct mapped_device *md)
++{
++      atomic_inc(&md->holders);
++}
++
++void dm_put(struct mapped_device *md)
++{
++      if (atomic_dec_and_test(&md->holders)) {
++              if (md->map)
++                      dm_table_suspend_targets(md->map);
++              __unbind(md);
++              free_md(md);
++      }
++}
++
++/*
++ * Requeue the deferred io by calling generic_make_request.
++ */
++static void flush_deferred_io(struct deferred_io *c)
++{
++      struct deferred_io *n;
++
++      while (c) {
++              n = c->next;
++              generic_make_request(c->rw, c->bh);
++              free_deferred(c);
++              c = n;
++      }
++}
++
++/*
++ * Swap in a new table (destroying old one).
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *table)
++{
++      int r;
++
++      down_write(&md->lock);
++
++      /*
++       * The device must be suspended, or have no table bound yet.
++       */
++      if (md->map && !test_bit(DMF_SUSPENDED, &md->flags)) {
++              up_write(&md->lock);
++              return -EPERM;
++      }
++
++      __unbind(md);
++      r = __bind(md, table);
++      if (r)
++              return r;
++
++      up_write(&md->lock);
++      return 0;
++}
++
++/*
++ * We need to be able to change a mapping table under a mounted
++ * filesystem.  For example we might want to move some data in
++ * the background.  Before the table can be swapped with
++ * dm_bind_table, dm_suspend must be called to flush any in
++ * flight io and ensure that any further io gets deferred.
++ */
++int dm_suspend(struct mapped_device *md)
++{
++      int r = 0;
++      DECLARE_WAITQUEUE(wait, current);
++
++      down_write(&md->lock);
++
++      /*
++       * First we set the BLOCK_IO flag so no more ios will be
++       * mapped.
++       */
++      if (test_bit(DMF_BLOCK_IO, &md->flags)) {
++              up_write(&md->lock);
++              return -EINVAL;
++      }
++
++      set_bit(DMF_BLOCK_IO, &md->flags);
++      add_wait_queue(&md->wait, &wait);
++      up_write(&md->lock);
++
++      /*
++       * Then we wait for the already mapped ios to
++       * complete.
++       */
++      run_task_queue(&tq_disk);
++      while (1) {
++              set_current_state(TASK_INTERRUPTIBLE);
++
++              if (!atomic_read(&md->pending) || signal_pending(current))
++                      break;
++
++              schedule();
++      }
++      set_current_state(TASK_RUNNING);
++
++      down_write(&md->lock);
++      remove_wait_queue(&md->wait, &wait);
++
++      /* did we flush everything ? */
++      if (atomic_read(&md->pending)) {
++              clear_bit(DMF_BLOCK_IO, &md->flags);
++              r = -EINTR;
++      } else {
++              set_bit(DMF_SUSPENDED, &md->flags);
++              if (md->map)
++                      dm_table_suspend_targets(md->map);
++      }
++      up_write(&md->lock);
++
++      return r;
++}
++
++int dm_resume(struct mapped_device *md)
++{
++      struct deferred_io *def;
++
++      down_write(&md->lock);
++      if (!test_bit(DMF_SUSPENDED, &md->flags)) {
++              up_write(&md->lock);
++              return -EINVAL;
++      }
++
++      if (md->map)
++              dm_table_resume_targets(md->map);
++
++      clear_bit(DMF_SUSPENDED, &md->flags);
++      clear_bit(DMF_BLOCK_IO, &md->flags);
++      def = md->deferred;
++      md->deferred = NULL;
++      up_write(&md->lock);
++
++      flush_deferred_io(def);
++      run_task_queue(&tq_disk);
++
++      return 0;
++}
++
++struct dm_table *dm_get_table(struct mapped_device *md)
++{
++      struct dm_table *t;
++
++      down_read(&md->lock);
++      t = md->map;
++      if (t)
++              dm_table_get(t);
++      up_read(&md->lock);
++
++      return t;
++}
++
++/*-----------------------------------------------------------------
++ * Event notification.
++ *---------------------------------------------------------------*/
++uint32_t dm_get_event_nr(struct mapped_device *md)
++{
++      uint32_t r;
++
++      down_read(&md->lock);
++      r = md->event_nr;
++      up_read(&md->lock);
++
++      return r;
++}
++
++int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
++                    uint32_t event_nr)
++{
++      down_write(&md->lock);
++      if (event_nr != md->event_nr) {
++              up_write(&md->lock);
++              return 1;
++      }
++
++      add_wait_queue(&md->eventq, wq);
++      up_write(&md->lock);
++
++      return 0;
++}
++
++const char *dm_kdevname(kdev_t dev)
++{
++      static char buffer[32];
++      sprintf(buffer, "%03d:%03d", MAJOR(dev), MINOR(dev));
++      return buffer;
++}
++
++void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq)
++{
++      down_write(&md->lock);
++      remove_wait_queue(&md->eventq, wq);
++      up_write(&md->lock);
++}
++
++kdev_t dm_kdev(struct mapped_device *md)
++{
++      kdev_t dev;
++
++      down_read(&md->lock);
++      dev = md->dev;
++      up_read(&md->lock);
++
++      return dev;
++}
++
++int dm_suspended(struct mapped_device *md)
++{
++      return test_bit(DMF_SUSPENDED, &md->flags);
++}
++
++struct block_device_operations dm_blk_dops = {
++      .open = dm_blk_open,
++      .release = dm_blk_close,
++      .ioctl = dm_blk_ioctl,
++      .owner = THIS_MODULE
++};
++
++/*
++ * module hooks
++ */
++module_init(dm_init);
++module_exit(dm_exit);
++
++MODULE_DESCRIPTION(DM_NAME " driver");
++MODULE_AUTHOR("Joe Thornber <thornber@sistina.com>");
++MODULE_LICENSE("GPL");
++
++EXPORT_SYMBOL(dm_kdevname);
+diff -ruN linux-2.4.21-dm-real/drivers/md/dm.h linux-2.4.21/drivers/md/dm.h
+--- linux-2.4.21-dm-real/drivers/md/dm.h       Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/dm.h       Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,175 @@
++/*
++ * Internal header file for device mapper
++ *
++ * Copyright (C) 2001, 2002 Sistina Software
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef DM_INTERNAL_H
++#define DM_INTERNAL_H
++
++#include <linux/fs.h>
++#include <linux/device-mapper.h>
++#include <linux/list.h>
++#include <linux/blkdev.h>
++
++#define DM_NAME "device-mapper"
++#define DMWARN(f, x...) printk(KERN_WARNING DM_NAME ": " f "\n" , ## x)
++#define DMERR(f, x...) printk(KERN_ERR DM_NAME ": " f "\n" , ## x)
++#define DMINFO(f, x...) printk(KERN_INFO DM_NAME ": " f "\n" , ## x)
++
++/*
++ * FIXME: I think this should be with the definition of sector_t
++ * in types.h.
++ */
++#ifdef CONFIG_LBD
++#define SECTOR_FORMAT "%Lu"
++#else
++#define SECTOR_FORMAT "%lu"
++#endif
++
++#define SECTOR_SHIFT 9
++#define SECTOR_SIZE (1 << SECTOR_SHIFT)
++
++extern struct block_device_operations dm_blk_dops;
++
++/*
++ * List of devices that a metadevice uses and should open/close.
++ */
++struct dm_dev {
++      struct list_head list;
++
++      atomic_t count;
++      int mode;
++      kdev_t dev;
++      struct block_device *bdev;
++};
++
++struct dm_table;
++struct mapped_device;
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a struct mapped_device.
++ * Drop the reference with dm_put when you finish with the object.
++ *---------------------------------------------------------------*/
++int dm_create(kdev_t dev, struct mapped_device **md);
++
++/*
++ * Reference counting for md.
++ */
++void dm_get(struct mapped_device *md);
++void dm_put(struct mapped_device *md);
++
++/*
++ * A device can still be used while suspended, but I/O is deferred.
++ */
++int dm_suspend(struct mapped_device *md);
++int dm_resume(struct mapped_device *md);
++
++/*
++ * The device must be suspended before calling this method.
++ */
++int dm_swap_table(struct mapped_device *md, struct dm_table *t);
++
++/*
++ * Drop a reference on the table when you've finished with the
++ * result.
++ */
++struct dm_table *dm_get_table(struct mapped_device *md);
++
++/*
++ * Event functions.
++ */
++uint32_t dm_get_event_nr(struct mapped_device *md);
++int dm_add_wait_queue(struct mapped_device *md, wait_queue_t *wq,
++                    uint32_t event_nr);
++void dm_remove_wait_queue(struct mapped_device *md, wait_queue_t *wq);
++
++/*
++ * Info functions.
++ */
++kdev_t dm_kdev(struct mapped_device *md);
++int dm_suspended(struct mapped_device *md);
++
++/*-----------------------------------------------------------------
++ * Functions for manipulating a table.  Tables are also reference
++ * counted.
++ *---------------------------------------------------------------*/
++int dm_table_create(struct dm_table **result, int mode);
++
++void dm_table_get(struct dm_table *t);
++void dm_table_put(struct dm_table *t);
++
++int dm_table_add_target(struct dm_table *t, const char *type,
++                      sector_t start, sector_t len, char *params);
++int dm_table_complete(struct dm_table *t);
++void dm_table_event_callback(struct dm_table *t,
++                           void (*fn)(void *), void *context);
++void dm_table_event(struct dm_table *t);
++sector_t dm_table_get_size(struct dm_table *t);
++struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index);
++struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
++unsigned int dm_table_get_num_targets(struct dm_table *t);
++struct list_head *dm_table_get_devices(struct dm_table *t);
++int dm_table_get_mode(struct dm_table *t);
++void dm_table_suspend_targets(struct dm_table *t);
++void dm_table_resume_targets(struct dm_table *t);
++
++/*-----------------------------------------------------------------
++ * A registry of target types.
++ *---------------------------------------------------------------*/
++int dm_target_init(void);
++void dm_target_exit(void);
++struct target_type *dm_get_target_type(const char *name);
++void dm_put_target_type(struct target_type *t);
++
++
++/*-----------------------------------------------------------------
++ * Useful inlines.
++ *---------------------------------------------------------------*/
++static inline int array_too_big(unsigned long fixed, unsigned long obj,
++                              unsigned long num)
++{
++      return (num > (ULONG_MAX - fixed) / obj);
++}
++
++/*
++ * ceiling(n / size) * size
++ */
++static inline unsigned long dm_round_up(unsigned long n, unsigned long size)
++{
++      unsigned long r = n % size;
++      return n + (r ? (size - r) : 0);
++}
++
++/*
++ * Ceiling(n / size)
++ */
++static inline unsigned long dm_div_up(unsigned long n, unsigned long size)
++{
++      return dm_round_up(n, size) / size;
++}
++
++const char *dm_kdevname(kdev_t dev);
++
++/*
++ * The device-mapper can be driven through one of two interfaces;
++ * ioctl or filesystem, depending which patch you have applied.
++ */
++int dm_interface_init(void);
++void dm_interface_exit(void);
++
++/*
++ * Targets for linear and striped mappings
++ */
++int dm_linear_init(void);
++void dm_linear_exit(void);
++
++int dm_stripe_init(void);
++void dm_stripe_exit(void);
++
++int dm_snapshot_init(void);
++void dm_snapshot_exit(void);
++
++#endif
+diff -ruN linux-2.4.21-dm-real/drivers/md/kcopyd.c linux-2.4.21/drivers/md/kcopyd.c
+--- linux-2.4.21-dm-real/drivers/md/kcopyd.c   Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/kcopyd.c   Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,650 @@
++/*
++ * Copyright (C) 2002 Sistina Software (UK) Limited.
++ *
++ * This file is released under the GPL.
++ */
++
++#include <asm/atomic.h>
++
++#include <linux/blkdev.h>
++#include <linux/config.h>
++#include <linux/device-mapper.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/list.h>
++#include <linux/locks.h>
++#include <linux/mempool.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++#include <linux/slab.h>
++#include <linux/vmalloc.h>
++
++#include "kcopyd.h"
++#include "dm-daemon.h"
++
++/* FIXME: this is only needed for the DMERR macros */
++#include "dm.h"
++
++static struct dm_daemon _kcopyd;
++
++/*-----------------------------------------------------------------
++ * Each kcopyd client has its own little pool of preallocated
++ * pages for kcopyd io.
++ *---------------------------------------------------------------*/
++struct kcopyd_client {
++      struct list_head list;
++
++      spinlock_t lock;
++      struct list_head pages;
++      unsigned int nr_pages;
++      unsigned int nr_free_pages;
++};
++
++static inline void __push_page(struct kcopyd_client *kc, struct page *p)
++{
++      list_add(&p->list, &kc->pages);
++      kc->nr_free_pages++;
++}
++
++static inline struct page *__pop_page(struct kcopyd_client *kc)
++{
++      struct page *p;
++
++      p = list_entry(kc->pages.next, struct page, list);
++      list_del(&p->list);
++      kc->nr_free_pages--;
++
++      return p;
++}
++
++static int kcopyd_get_pages(struct kcopyd_client *kc,
++                          unsigned int nr, struct list_head *pages)
++{
++      struct page *p;
++      INIT_LIST_HEAD(pages);
++
++      spin_lock(&kc->lock);
++      if (kc->nr_free_pages < nr) {
++              spin_unlock(&kc->lock);
++              return -ENOMEM;
++      }
++
++      while (nr--) {
++              p = __pop_page(kc);
++              list_add(&p->list, pages);
++      }
++      spin_unlock(&kc->lock);
++
++      return 0;
++}
++
++static void kcopyd_put_pages(struct kcopyd_client *kc, struct list_head *pages)
++{
++      struct list_head *tmp, *tmp2;
++
++      spin_lock(&kc->lock);
++      list_for_each_safe (tmp, tmp2, pages)
++              __push_page(kc, list_entry(tmp, struct page, list));
++      spin_unlock(&kc->lock);
++}
++
++/*
++ * These three functions resize the page pool.
++ */
++static void release_pages(struct list_head *pages)
++{
++      struct page *p;
++      struct list_head *tmp, *tmp2;
++
++      list_for_each_safe (tmp, tmp2, pages) {
++              p = list_entry(tmp, struct page, list);
++              UnlockPage(p);
++              __free_page(p);
++      }
++}
++
++static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr)
++{
++      unsigned int i;
++      struct page *p;
++      LIST_HEAD(new);
++
++      for (i = 0; i < nr; i++) {
++              p = alloc_page(GFP_KERNEL);
++              if (!p) {
++                      release_pages(&new);
++                      return -ENOMEM;
++              }
++
++              LockPage(p);
++              list_add(&p->list, &new);
++      }
++
++      kcopyd_put_pages(kc, &new);
++      kc->nr_pages += nr;
++      return 0;
++}
++
++static void client_free_pages(struct kcopyd_client *kc)
++{
++      BUG_ON(kc->nr_free_pages != kc->nr_pages);
++      release_pages(&kc->pages);
++      kc->nr_free_pages = kc->nr_pages = 0;
++}
++
++/*-----------------------------------------------------------------
++ * kcopyd_jobs need to be allocated by the *clients* of kcopyd,
++ * for this reason we use a mempool to prevent the client from
++ * ever having to do io (which could cause a deadlock).
++ *---------------------------------------------------------------*/
++struct kcopyd_job {
++      struct kcopyd_client *kc;
++      struct list_head list;
++      unsigned int flags;
++
++      /*
++       * Error state of the job.
++       */
++      int read_err;
++      unsigned int write_err;
++
++      /*
++       * Either READ or WRITE
++       */
++      int rw;
++      struct io_region source;
++
++      /*
++       * The destinations for the transfer.
++       */
++      unsigned int num_dests;
++      struct io_region dests[KCOPYD_MAX_REGIONS];
++
++      sector_t offset;
++      unsigned int nr_pages;
++      struct list_head pages;
++
++      /*
++       * Set this to ensure you are notified when the job has
++       * completed.  'context' is for callback to use.
++       */
++      kcopyd_notify_fn fn;
++      void *context;
++
++      /*
++       * These fields are only used if the job has been split
++       * into more manageable parts.
++       */
++      struct semaphore lock;
++      atomic_t sub_jobs;
++      sector_t progress;
++};
++
++/* FIXME: this should scale with the number of pages */
++#define MIN_JOBS 512
++
++static kmem_cache_t *_job_cache = NULL;
++static mempool_t *_job_pool = NULL;
++
++/*
++ * We maintain three lists of jobs:
++ *
++ * i)   jobs waiting for pages
++ * ii)  jobs that have pages, and are waiting for the io to be issued.
++ * iii) jobs that have completed.
++ *
++ * All three of these are protected by job_lock.
++ */
++static spinlock_t _job_lock = SPIN_LOCK_UNLOCKED;
++
++static LIST_HEAD(_complete_jobs);
++static LIST_HEAD(_io_jobs);
++static LIST_HEAD(_pages_jobs);
++
++static int jobs_init(void)
++{
++      INIT_LIST_HEAD(&_complete_jobs);
++      INIT_LIST_HEAD(&_io_jobs);
++      INIT_LIST_HEAD(&_pages_jobs);
++
++      _job_cache = kmem_cache_create("kcopyd-jobs",
++                                     sizeof(struct kcopyd_job),
++                                     __alignof__(struct kcopyd_job),
++                                     0, NULL, NULL);
++      if (!_job_cache)
++              return -ENOMEM;
++
++      _job_pool = mempool_create(MIN_JOBS, mempool_alloc_slab,
++                                 mempool_free_slab, _job_cache);
++      if (!_job_pool) {
++              kmem_cache_destroy(_job_cache);
++              return -ENOMEM;
++      }
++
++      return 0;
++}
++
++static void jobs_exit(void)
++{
++      BUG_ON(!list_empty(&_complete_jobs));
++      BUG_ON(!list_empty(&_io_jobs));
++      BUG_ON(!list_empty(&_pages_jobs));
++
++      mempool_destroy(_job_pool);
++      kmem_cache_destroy(_job_cache);
++}
++
++/*
++ * Functions to push and pop a job onto the head of a given job
++ * list.
++ */
++static inline struct kcopyd_job *pop(struct list_head *jobs)
++{
++      struct kcopyd_job *job = NULL;
++      unsigned long flags;
++
++      spin_lock_irqsave(&_job_lock, flags);
++
++      if (!list_empty(jobs)) {
++              job = list_entry(jobs->next, struct kcopyd_job, list);
++              list_del(&job->list);
++      }
++      spin_unlock_irqrestore(&_job_lock, flags);
++
++      return job;
++}
++
++static inline void push(struct list_head *jobs, struct kcopyd_job *job)
++{
++      unsigned long flags;
++
++      spin_lock_irqsave(&_job_lock, flags);
++      list_add_tail(&job->list, jobs);
++      spin_unlock_irqrestore(&_job_lock, flags);
++}
++
++/*
++ * These three functions process 1 item from the corresponding
++ * job list.
++ *
++ * They return:
++ * < 0: error
++ *   0: success
++ * > 0: can't process yet.
++ */
++static int run_complete_job(struct kcopyd_job *job)
++{
++      void *context = job->context;
++      int read_err = job->read_err;
++      unsigned int write_err = job->write_err;
++      kcopyd_notify_fn fn = job->fn;
++
++      kcopyd_put_pages(job->kc, &job->pages);
++      mempool_free(job, _job_pool);
++      fn(read_err, write_err, context);
++      return 0;
++}
++
++static void complete_io(unsigned int error, void *context)
++{
++      struct kcopyd_job *job = (struct kcopyd_job *) context;
++
++      if (error) {
++              if (job->rw == WRITE)
++                      job->write_err &= error;
++              else
++                      job->read_err = 1;
++
++              if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
++                      push(&_complete_jobs, job);
++                      dm_daemon_wake(&_kcopyd);
++                      return;
++              }
++      }
++
++      if (job->rw == WRITE)
++              push(&_complete_jobs, job);
++
++      else {
++              job->rw = WRITE;
++              push(&_io_jobs, job);
++      }
++
++      dm_daemon_wake(&_kcopyd);
++}
++
++/*
++ * Request io on as many buffer heads as we can currently get for
++ * a particular job.
++ */
++static int run_io_job(struct kcopyd_job *job)
++{
++      int r;
++
++      if (job->rw == READ)
++              r = dm_io_async(1, &job->source, job->rw,
++                              list_entry(job->pages.next, struct page, list),
++                              job->offset, complete_io, job);
++
++      else
++              r = dm_io_async(job->num_dests, job->dests, job->rw,
++                              list_entry(job->pages.next, struct page, list),
++                              job->offset, complete_io, job);
++
++      return r;
++}
++
++#define SECTORS_PER_PAGE (PAGE_SIZE / SECTOR_SIZE)
++static int run_pages_job(struct kcopyd_job *job)
++{
++      int r;
++
++      job->nr_pages = dm_div_up(job->dests[0].count + job->offset,
++                                SECTORS_PER_PAGE);
++      r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages);
++      if (!r) {
++              /* this job is ready for io */
++              push(&_io_jobs, job);
++              return 0;
++      }
++
++      if (r == -ENOMEM)
++              /* can't complete now */
++              return 1;
++
++      return r;
++}
++
++/*
++ * Run through a list for as long as possible.  Returns the count
++ * of successful jobs.
++ */
++static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
++{
++      struct kcopyd_job *job;
++      int r, count = 0;
++
++      while ((job = pop(jobs))) {
++
++              r = fn(job);
++
++              if (r < 0) {
++                      /* error this rogue job */
++                      if (job->rw == WRITE)
++                              job->write_err = (unsigned int) -1;
++                      else
++                              job->read_err = 1;
++                      push(&_complete_jobs, job);
++                      break;
++              }
++
++              if (r > 0) {
++                      /*
++                       * We couldn't service this job ATM, so
++                       * push this job back onto the list.
++                       */
++                      push(jobs, job);
++                      break;
++              }
++
++              count++;
++      }
++
++      return count;
++}
++
++/*
++ * kcopyd does this every time it's woken up.
++ */
++static void do_work(void)
++{
++      /*
++       * The order that these are called is *very* important.
++       * complete jobs can free some pages for pages jobs.
++       * Pages jobs when successful will jump onto the io jobs
++       * list.  io jobs call wake when they complete and it all
++       * starts again.
++       */
++      process_jobs(&_complete_jobs, run_complete_job);
++      process_jobs(&_pages_jobs, run_pages_job);
++      process_jobs(&_io_jobs, run_io_job);
++      run_task_queue(&tq_disk);
++}
++
++/*
++ * If we are copying a small region we just dispatch a single job
++ * to do the copy, otherwise the io has to be split up into many
++ * jobs.
++ */
++static void dispatch_job(struct kcopyd_job *job)
++{
++      push(&_pages_jobs, job);
++      dm_daemon_wake(&_kcopyd);
++}
++
++#define SUB_JOB_SIZE 128
++static void segment_complete(int read_err,
++                           unsigned int write_err, void *context)
++{
++      /* FIXME: tidy this function */
++      sector_t progress = 0;
++      sector_t count = 0;
++      struct kcopyd_job *job = (struct kcopyd_job *) context;
++
++      down(&job->lock);
++
++      /* update the error */
++      if (read_err)
++              job->read_err = 1;
++
++      if (write_err)
++              job->write_err &= write_err;
++
++      /*
++       * Only dispatch more work if there hasn't been an error.
++       */
++      if ((!job->read_err && !job->write_err) ||
++          test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) {
++              /* get the next chunk of work */
++              progress = job->progress;
++              count = job->source.count - progress;
++              if (count) {
++                      if (count > SUB_JOB_SIZE)
++                              count = SUB_JOB_SIZE;
++
++                      job->progress += count;
++              }
++      }
++      up(&job->lock);
++
++      if (count) {
++              int i;
++              struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO);
++
++              memcpy(sub_job, job, sizeof(*job));
++              sub_job->source.sector += progress;
++              sub_job->source.count = count;
++
++              for (i = 0; i < job->num_dests; i++) {
++                      sub_job->dests[i].sector += progress;
++                      sub_job->dests[i].count = count;
++              }
++
++              sub_job->fn = segment_complete;
++              sub_job->context = job;
++              dispatch_job(sub_job);
++
++      } else if (atomic_dec_and_test(&job->sub_jobs)) {
++
++              /*
++               * To avoid a race we must keep the job around
++               * until after the notify function has completed.
++               * Otherwise the client may try and stop the job
++               * after we've completed.
++               */
++              job->fn(read_err, write_err, job->context);
++              mempool_free(job, _job_pool);
++      }
++}
++
++/*
++ * Create some little jobs that will do the move between
++ * them.
++ */
++#define SPLIT_COUNT 8
++static void split_job(struct kcopyd_job *job)
++{
++      int i;
++
++      atomic_set(&job->sub_jobs, SPLIT_COUNT);
++      for (i = 0; i < SPLIT_COUNT; i++)
++              segment_complete(0, 0u, job);
++}
++
++#define SUB_JOB_THRESHOLD (SPLIT_COUNT * SUB_JOB_SIZE)
++int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
++              unsigned int num_dests, struct io_region *dests,
++              unsigned int flags, kcopyd_notify_fn fn, void *context)
++{
++      struct kcopyd_job *job;
++
++      /*
++       * Allocate a new job.
++       */
++      job = mempool_alloc(_job_pool, GFP_NOIO);
++
++      /*
++       * set up for the read.
++       */
++      job->kc = kc;
++      job->flags = flags;
++      job->read_err = 0;
++      job->write_err = 0;
++      job->rw = READ;
++
++      memcpy(&job->source, from, sizeof(*from));
++
++      job->num_dests = num_dests;
++      memcpy(&job->dests, dests, sizeof(*dests) * num_dests);
++
++      job->offset = 0;
++      job->nr_pages = 0;
++      INIT_LIST_HEAD(&job->pages);
++
++      job->fn = fn;
++      job->context = context;
++
++      if (job->source.count < SUB_JOB_THRESHOLD)
++              dispatch_job(job);
++
++      else {
++              init_MUTEX(&job->lock);
++              job->progress = 0;
++              split_job(job);
++      }
++
++      return 0;
++}
++
++/*
++ * Cancels a kcopyd job, eg. someone might be deactivating a
++ * mirror.
++ */
++int kcopyd_cancel(struct kcopyd_job *job, int block)
++{
++      /* FIXME: finish */
++      return -1;
++}
++
++/*-----------------------------------------------------------------
++ * Unit setup
++ *---------------------------------------------------------------*/
++static DECLARE_MUTEX(_client_lock);
++static LIST_HEAD(_clients);
++
++static int client_add(struct kcopyd_client *kc)
++{
++      down(&_client_lock);
++      list_add(&kc->list, &_clients);
++      up(&_client_lock);
++      return 0;
++}
++
++static void client_del(struct kcopyd_client *kc)
++{
++      down(&_client_lock);
++      list_del(&kc->list);
++      up(&_client_lock);
++}
++
++int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
++{
++      int r = 0;
++      struct kcopyd_client *kc;
++
++      kc = kmalloc(sizeof(*kc), GFP_KERNEL);
++      if (!kc)
++              return -ENOMEM;
++
++      kc->lock = SPIN_LOCK_UNLOCKED;
++      INIT_LIST_HEAD(&kc->pages);
++      kc->nr_pages = kc->nr_free_pages = 0;
++      r = client_alloc_pages(kc, nr_pages);
++      if (r) {
++              kfree(kc);
++              return r;
++      }
++
++      r = dm_io_get(nr_pages);
++      if (r) {
++              client_free_pages(kc);
++              kfree(kc);
++              return r;
++      }
++
++      r = client_add(kc);
++      if (r) {
++              dm_io_put(nr_pages);
++              client_free_pages(kc);
++              kfree(kc);
++              return r;
++      }
++
++      *result = kc;
++      return 0;
++}
++
++void kcopyd_client_destroy(struct kcopyd_client *kc)
++{
++      dm_io_put(kc->nr_pages);
++      client_free_pages(kc);
++      client_del(kc);
++      kfree(kc);
++}
++
++
++int __init kcopyd_init(void)
++{
++      int r;
++
++      r = jobs_init();
++      if (r)
++              return r;
++
++      r = dm_daemon_start(&_kcopyd, "kcopyd", do_work);
++      if (r)
++              jobs_exit();
++
++      return r;
++}
++
++void kcopyd_exit(void)
++{
++      jobs_exit();
++      dm_daemon_stop(&_kcopyd);
++}
++
++EXPORT_SYMBOL(kcopyd_client_create);
++EXPORT_SYMBOL(kcopyd_client_destroy);
++EXPORT_SYMBOL(kcopyd_copy);
++EXPORT_SYMBOL(kcopyd_cancel);
+diff -ruN linux-2.4.21-dm-real/drivers/md/kcopyd.h linux-2.4.21/drivers/md/kcopyd.h
+--- linux-2.4.21-dm-real/drivers/md/kcopyd.h   Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/drivers/md/kcopyd.h   Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (C) 2001 Sistina Software
++ *
++ * This file is released under the GPL.
++ */
++
++#ifndef DM_KCOPYD_H
++#define DM_KCOPYD_H
++
++/*
++ * Needed for the definition of offset_t.
++ */
++#include <linux/device-mapper.h>
++#include <linux/iobuf.h>
++
++#include "dm-io.h"
++
++int kcopyd_init(void);
++void kcopyd_exit(void);
++
++/* FIXME: make this configurable */
++#define KCOPYD_MAX_REGIONS 8
++
++#define KCOPYD_IGNORE_ERROR 1
++
++/*
++ * To use kcopyd you must first create a kcopyd client object.
++ */
++struct kcopyd_client;
++int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result);
++void kcopyd_client_destroy(struct kcopyd_client *kc);
++
++/*
++ * Submit a copy job to kcopyd.  This is built on top of the
++ * previous three fns.
++ *
++ * read_err is a boolean,
++ * write_err is a bitset, with 1 bit for each destination region
++ */
++typedef void (*kcopyd_notify_fn)(int read_err,
++                               unsigned int write_err, void *context);
++
++int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
++              unsigned int num_dests, struct io_region *dests,
++              unsigned int flags, kcopyd_notify_fn fn, void *context);
++
++#endif
+diff -ruN linux-2.4.21-dm-real/fs/buffer.c linux-2.4.21/fs/buffer.c
+--- linux-2.4.21-dm-real/fs/buffer.c   Fri Jun 13 16:32:48 2003
++++ linux-2.4.21/fs/buffer.c   Sat Jul 12 18:13:56 2003
+@@ -735,6 +735,7 @@
+       bh->b_list = BUF_CLEAN;
+       bh->b_end_io = handler;
+       bh->b_private = private;
++      bh->b_journal_head = NULL;
+ }
+ 
+ static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
+diff -ruN linux-2.4.21-dm-real/fs/jbd/journal.c linux-2.4.21/fs/jbd/journal.c
+--- linux-2.4.21-dm-real/fs/jbd/journal.c      Fri Jun 13 16:32:48 2003
++++ linux-2.4.21/fs/jbd/journal.c      Sat Jul 12 18:13:56 2003
+@@ -1802,9 +1802,9 @@
+ 
+               if (buffer_jbd(bh)) {
+                       /* Someone did it for us! */
+-                      J_ASSERT_BH(bh, bh->b_private != NULL);
++                      J_ASSERT_BH(bh, bh->b_journal_head != NULL);
+                       journal_free_journal_head(jh);
+-                      jh = bh->b_private;
++                      jh = bh->b_journal_head;
+               } else {
+                       /*
+                        * We actually don't need jh_splice_lock when
+@@ -1812,7 +1812,7 @@
+                        */
+                       spin_lock(&jh_splice_lock);
+                       set_bit(BH_JBD, &bh->b_state);
+-                      bh->b_private = jh;
++                      bh->b_journal_head = jh;
+                       jh->b_bh = bh;
+                       atomic_inc(&bh->b_count);
+                       spin_unlock(&jh_splice_lock);
+@@ -1821,7 +1821,7 @@
+       }
+       jh->b_jcount++;
+       spin_unlock(&journal_datalist_lock);
+-      return bh->b_private;
++      return bh->b_journal_head;
+ }
+ 
+ /*
+@@ -1854,7 +1854,7 @@
+                       J_ASSERT_BH(bh, jh2bh(jh) == bh);
+                       BUFFER_TRACE(bh, "remove journal_head");
+                       spin_lock(&jh_splice_lock);
+-                      bh->b_private = NULL;
++                      bh->b_journal_head = NULL;
+                       jh->b_bh = NULL;        /* debug, really */
+                       clear_bit(BH_JBD, &bh->b_state);
+                       __brelse(bh);
+diff -ruN linux-2.4.21-dm-real/include/linux/device-mapper.h linux-2.4.21/include/linux/device-mapper.h
+--- linux-2.4.21-dm-real/include/linux/device-mapper.h Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/include/linux/device-mapper.h Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,104 @@
++/*
++ * Copyright (C) 2001 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DEVICE_MAPPER_H
++#define _LINUX_DEVICE_MAPPER_H
++
++typedef unsigned long sector_t;
++
++struct dm_target;
++struct dm_table;
++struct dm_dev;
++
++typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
++
++union map_info {
++      void *ptr;
++      unsigned long long ll;
++};
++
++/*
++ * In the constructor the target parameter will already have the
++ * table, type, begin and len fields filled in.
++ */
++typedef int (*dm_ctr_fn) (struct dm_target * target, unsigned int argc,
++                        char **argv);
++
++/*
++ * The destructor doesn't need to free the dm_target, just
++ * anything hidden ti->private.
++ */
++typedef void (*dm_dtr_fn) (struct dm_target * ti);
++
++/*
++ * The map function must return:
++ * < 0: error
++ * = 0: The target will handle the io by resubmitting it later
++ * > 0: simple remap complete
++ */
++typedef int (*dm_map_fn) (struct dm_target * ti, struct buffer_head * bh,
++                        int rw, union map_info *map_context);
++
++/*
++ * Returns:
++ * < 0 : error (currently ignored)
++ * 0   : ended successfully
++ * 1   : for some reason the io has still not completed (eg,
++ *       multipath target might want to requeue a failed io).
++ */
++typedef int (*dm_endio_fn) (struct dm_target * ti,
++                          struct buffer_head * bh, int rw, int error,
++                          union map_info *map_context);
++typedef void (*dm_suspend_fn) (struct dm_target *ti);
++typedef void (*dm_resume_fn) (struct dm_target *ti);
++typedef int (*dm_status_fn) (struct dm_target * ti, status_type_t status_type,
++                           char *result, unsigned int maxlen);
++
++void dm_error(const char *message);
++
++/*
++ * Constructors should call these functions to ensure destination devices
++ * are opened/closed correctly.
++ * FIXME: too many arguments.
++ */
++int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
++                sector_t len, int mode, struct dm_dev **result);
++void dm_put_device(struct dm_target *ti, struct dm_dev *d);
++
++/*
++ * Information about a target type
++ */
++struct target_type {
++      const char *name;
++      struct module *module;
++      dm_ctr_fn ctr;
++      dm_dtr_fn dtr;
++      dm_map_fn map;
++      dm_endio_fn end_io;
++      dm_suspend_fn suspend;
++      dm_resume_fn resume;
++      dm_status_fn status;
++};
++
++struct dm_target {
++      struct dm_table *table;
++      struct target_type *type;
++
++      /* target limits */
++      sector_t begin;
++      sector_t len;
++
++      /* target specific data */
++      void *private;
++
++      /* Used to provide an error string from the ctr */
++      char *error;
++};
++
++int dm_register_target(struct target_type *t);
++int dm_unregister_target(struct target_type *t);
++
++#endif                                /* _LINUX_DEVICE_MAPPER_H */
+diff -ruN linux-2.4.21-dm-real/include/linux/dm-ioctl.h linux-2.4.21/include/linux/dm-ioctl.h
+--- linux-2.4.21-dm-real/include/linux/dm-ioctl.h      Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/include/linux/dm-ioctl.h      Sat Jul 12 18:14:05 2003
+@@ -0,0 +1,237 @@
++/*
++ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
++ *
++ * This file is released under the LGPL.
++ */
++
++#ifndef _LINUX_DM_IOCTL_H
++#define _LINUX_DM_IOCTL_H
++
++#include <linux/types.h>
++
++#define DM_DIR "mapper"               /* Slashes not supported */
++#define DM_MAX_TYPE_NAME 16
++#define DM_NAME_LEN 128
++#define DM_UUID_LEN 129
++
++/*
++ * A traditional ioctl interface for the device mapper.
++ *
++ * Each device can have two tables associated with it, an
++ * 'active' table which is the one currently used by io passing
++ * through the device, and an 'inactive' one which is a table
++ * that is being prepared as a replacement for the 'active' one.
++ *
++ * DM_VERSION:
++ * Just get the version information for the ioctl interface.
++ *
++ * DM_REMOVE_ALL:
++ * Remove all dm devices, destroy all tables.  Only really used
++ * for debug.
++ *
++ * DM_LIST_DEVICES:
++ * Get a list of all the dm device names.
++ *
++ * DM_DEV_CREATE:
++ * Create a new device, neither the 'active' or 'inactive' table
++ * slots will be filled.  The device will be in suspended state
++ * after creation, however any io to the device will get errored
++ * since it will be out-of-bounds.
++ *
++ * DM_DEV_REMOVE:
++ * Remove a device, destroy any tables.
++ *
++ * DM_DEV_RENAME:
++ * Rename a device.
++ *
++ * DM_SUSPEND:
++ * This performs both suspend and resume, depending which flag is
++ * passed in.
++ * Suspend: This command will not return until all pending io to
++ * the device has completed.  Further io will be deferred until
++ * the device is resumed.
++ * Resume: It is no longer an error to issue this command on an
++ * unsuspended device.  If a table is present in the 'inactive'
++ * slot, it will be moved to the active slot, then the old table
++ * from the active slot will be _destroyed_.  Finally the device
++ * is resumed.
++ *
++ * DM_DEV_STATUS:
++ * Retrieves the status for the table in the 'active' slot.
++ *
++ * DM_DEV_WAIT:
++ * Wait for a significant event to occur to the device.  This
++ * could either be caused by an event triggered by one of the
++ * targets of the table in the 'active' slot, or a table change.
++ *
++ * DM_TABLE_LOAD:
++ * Load a table into the 'inactive' slot for the device.  The
++ * device does _not_ need to be suspended prior to this command.
++ *
++ * DM_TABLE_CLEAR:
++ * Destroy any table in the 'inactive' slot (ie. abort).
++ *
++ * DM_TABLE_DEPS:
++ * Return a set of device dependencies for the 'active' table.
++ *
++ * DM_TABLE_STATUS:
++ * Return the targets status for the 'active' table.
++ */
++
++/*
++ * All ioctl arguments consist of a single chunk of memory, with
++ * this structure at the start.  If a uuid is specified any
++ * lookup (eg. for a DM_INFO) will be done on that, *not* the
++ * name.
++ */
++struct dm_ioctl {
++      /*
++       * The version number is made up of three parts:
++       * major - no backward or forward compatibility,
++       * minor - only backwards compatible,
++       * patch - both backwards and forwards compatible.
++       *
++       * All clients of the ioctl interface should fill in the
++       * version number of the interface that they were
++       * compiled with.
++       *
++       * All recognised ioctl commands (ie. those that don't
++       * return -ENOTTY) fill out this field, even if the
++       * command failed.
++       */
++      uint32_t version[3];    /* in/out */
++      uint32_t data_size;     /* total size of data passed in
++                               * including this struct */
++
++      uint32_t data_start;    /* offset to start of data
++                               * relative to start of this struct */
++
++      uint32_t target_count;  /* in/out */
++      int32_t open_count;     /* out */
++      uint32_t flags;         /* in/out */
++      uint32_t event_nr;      /* in/out */
++      uint32_t padding;
++
++      uint64_t dev;           /* in/out */
++
++      char name[DM_NAME_LEN]; /* device name */
++      char uuid[DM_UUID_LEN]; /* unique identifier for
++                               * the block device */
++};
++
++/*
++ * Used to specify tables.  These structures appear after the
++ * dm_ioctl.
++ */
++struct dm_target_spec {
++      uint64_t sector_start;
++      uint64_t length;
++      int32_t status;         /* used when reading from kernel only */
++
++      /*
++       * Offset in bytes (from the start of this struct) to
++       * next target_spec.
++       */
++      uint32_t next;
++
++      char target_type[DM_MAX_TYPE_NAME];
++
++      /*
++       * Parameter string starts immediately after this object.
++       * Be careful to add padding after string to ensure correct
++       * alignment of subsequent dm_target_spec.
++       */
++};
++
++/*
++ * Used to retrieve the target dependencies.
++ */
++struct dm_target_deps {
++      uint32_t count;         /* Array size */
++      uint32_t padding;       /* unused */
++      uint64_t dev[0];        /* out */
++};
++
++/*
++ * Used to get a list of all dm devices.
++ */
++struct dm_name_list {
++      uint64_t dev;
++      uint32_t next;          /* offset to the next record from
++                                 the _start_ of this */
++      char name[0];
++};
++
++/*
++ * If you change this make sure you make the corresponding change
++ * to dm-ioctl.c:lookup_ioctl()
++ */
++enum {
++      /* Top level cmds */
++      DM_VERSION_CMD = 0,
++      DM_REMOVE_ALL_CMD,
++      DM_LIST_DEVICES_CMD,
++
++      /* device level cmds */
++      DM_DEV_CREATE_CMD,
++      DM_DEV_REMOVE_CMD,
++      DM_DEV_RENAME_CMD,
++      DM_DEV_SUSPEND_CMD,
++      DM_DEV_STATUS_CMD,
++      DM_DEV_WAIT_CMD,
++
++      /* Table level cmds */
++      DM_TABLE_LOAD_CMD,
++      DM_TABLE_CLEAR_CMD,
++      DM_TABLE_DEPS_CMD,
++      DM_TABLE_STATUS_CMD,
++};
++
++#define DM_IOCTL 0xfd
++
++#define DM_VERSION       _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
++#define DM_REMOVE_ALL    _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
++#define DM_LIST_DEVICES  _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
++
++#define DM_DEV_CREATE    _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
++#define DM_DEV_REMOVE    _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
++#define DM_DEV_RENAME    _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
++#define DM_DEV_SUSPEND   _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
++#define DM_DEV_STATUS    _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
++#define DM_DEV_WAIT      _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
++
++#define DM_TABLE_LOAD    _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
++#define DM_TABLE_CLEAR   _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
++#define DM_TABLE_DEPS    _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
++#define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
++
++#define DM_VERSION_MAJOR      4
++#define DM_VERSION_MINOR      0
++#define DM_VERSION_PATCHLEVEL 1
++#define DM_VERSION_EXTRA      "-ioctl (2003-07-12)"
++
++/* Status bits */
++#define DM_READONLY_FLAG      (1 << 0) /* In/Out */
++#define DM_SUSPEND_FLAG               (1 << 1) /* In/Out */
++#define DM_PERSISTENT_DEV_FLAG        (1 << 3) /* In */
++
++/*
++ * Flag passed into ioctl STATUS command to get table information
++ * rather than current status.
++ */
++#define DM_STATUS_TABLE_FLAG  (1 << 4) /* In */
++
++/*
++ * Flags that indicate whether a table is present in either of
++ * the two table slots that a device has.
++ */
++#define DM_ACTIVE_PRESENT_FLAG   (1 << 5) /* Out */
++#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
++
++/*
++ * Indicates that the buffer passed in wasn't big enough for the
++ * results.
++ */
++#define DM_BUFFER_FULL_FLAG   (1 << 8) /* Out */
++
++#endif                                /* _LINUX_DM_IOCTL_H */
+diff -ruN linux-2.4.21-dm-real/include/linux/fs.h linux-2.4.21/include/linux/fs.h
+--- linux-2.4.21-dm-real/include/linux/fs.h    Fri Jun 13 16:32:51 2003
++++ linux-2.4.21/include/linux/fs.h    Sat Jul 12 18:13:56 2003
+@@ -263,7 +263,7 @@
+       struct page *b_page;            /* the page this bh is mapped to */
+       void (*b_end_io)(struct buffer_head *bh, int uptodate); /* I/O completion */
+       void *b_private;                /* reserved for b_end_io */
+-
++      void *b_journal_head;           /* ext3 journal_heads */
+       unsigned long b_rsector;        /* Real buffer location on disk */
+       wait_queue_head_t b_wait;
+ 
+diff -ruN linux-2.4.21-dm-real/include/linux/jbd.h linux-2.4.21/include/linux/jbd.h
+--- linux-2.4.21-dm-real/include/linux/jbd.h   Fri Jun 13 16:32:51 2003
++++ linux-2.4.21/include/linux/jbd.h   Sat Jul 12 18:13:56 2003
+@@ -311,7 +311,7 @@
+ 
+ static inline struct journal_head *bh2jh(struct buffer_head *bh)
+ {
+-      return bh->b_private;
++      return bh->b_journal_head;
+ }
+ 
+ #define HAVE_JOURNAL_CALLBACK_STATUS
+diff -ruN linux-2.4.21-dm-real/include/linux/mempool.h linux-2.4.21/include/linux/mempool.h
+--- linux-2.4.21-dm-real/include/linux/mempool.h       Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/include/linux/mempool.h       Sat Jul 12 18:14:17 2003
+@@ -0,0 +1,31 @@
++/*
++ * memory buffer pool support
++ */
++#ifndef _LINUX_MEMPOOL_H
++#define _LINUX_MEMPOOL_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct mempool_s;
++typedef struct mempool_s mempool_t;
++
++typedef void * (mempool_alloc_t)(int gfp_mask, void *pool_data);
++typedef void (mempool_free_t)(void *element, void *pool_data);
++
++extern mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++                               mempool_free_t *free_fn, void *pool_data);
++extern int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask);
++extern void mempool_destroy(mempool_t *pool);
++extern void * mempool_alloc(mempool_t *pool, int gfp_mask);
++extern void mempool_free(void *element, mempool_t *pool);
++
++/*
++ * A mempool_alloc_t and mempool_free_t that get the memory from
++ * a slab that is passed in through pool_data.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data);
++void mempool_free_slab(void *element, void *pool_data);
++
++
++#endif /* _LINUX_MEMPOOL_H */
+diff -ruN linux-2.4.21-dm-real/include/linux/vmalloc.h linux-2.4.21/include/linux/vmalloc.h
+--- linux-2.4.21-dm-real/include/linux/vmalloc.h       Fri Jan 10 16:35:58 2003
++++ linux-2.4.21/include/linux/vmalloc.h       Sat Jul 12 18:14:25 2003
+@@ -26,6 +26,7 @@
+ extern void vmfree_area_pages(unsigned long address, unsigned long size);
+ extern int vmalloc_area_pages(unsigned long address, unsigned long size,
+                               int gfp_mask, pgprot_t prot);
++extern void *vcalloc(unsigned long nmemb, unsigned long elem_size);
+ 
+ /*
+  *    Allocate any pages
+diff -ruN linux-2.4.21-dm-real/kernel/ksyms.c linux-2.4.21/kernel/ksyms.c
+--- linux-2.4.21-dm-real/kernel/ksyms.c        Fri Jun 13 16:32:52 2003
++++ linux-2.4.21/kernel/ksyms.c        Sat Jul 12 18:14:25 2003
+@@ -112,6 +112,7 @@
+ EXPORT_SYMBOL(vfree);
+ EXPORT_SYMBOL(__vmalloc);
+ EXPORT_SYMBOL(vmalloc_to_page);
++EXPORT_SYMBOL(vcalloc);
+ EXPORT_SYMBOL(mem_map);
+ EXPORT_SYMBOL(remap_page_range);
+ EXPORT_SYMBOL(max_mapnr);
+diff -ruN linux-2.4.21-dm-real/mm/Makefile linux-2.4.21/mm/Makefile
+--- linux-2.4.21-dm-real/mm/Makefile   Fri Jan 10 16:36:02 2003
++++ linux-2.4.21/mm/Makefile   Sat Jul 12 18:14:17 2003
+@@ -9,12 +9,12 @@
+ 
+ O_TARGET := mm.o
+ 
+-export-objs := shmem.o filemap.o memory.o page_alloc.o
++export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o
+ 
+ obj-y  := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
+           vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+           page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
+-          shmem.o
++          shmem.o mempool.o
+ 
+ obj-$(CONFIG_HIGHMEM) += highmem.o
+ 
+diff -ruN linux-2.4.21-dm-real/mm/filemap.c linux-2.4.21/mm/filemap.c
+--- linux-2.4.21-dm-real/mm/filemap.c  Fri Jun 13 16:33:25 2003
++++ linux-2.4.21/mm/filemap.c  Sat Jul 12 18:14:21 2003
+@@ -1704,8 +1704,10 @@
+                       retval = generic_file_direct_IO(READ, filp, buf, count, pos);
+                       if (retval > 0)
+                               *ppos = pos + retval;
++
+               }
+-              UPDATE_ATIME(filp->f_dentry->d_inode);
++              if (!S_ISBLK(inode->i_mode))
++                      UPDATE_ATIME(filp->f_dentry->d_inode);
+               goto out;
+       }
+ }
+diff -ruN linux-2.4.21-dm-real/mm/mempool.c linux-2.4.21/mm/mempool.c
+--- linux-2.4.21-dm-real/mm/mempool.c  Thu Jan  1 01:00:00 1970
++++ linux-2.4.21/mm/mempool.c  Sat Jul 12 18:14:17 2003
+@@ -0,0 +1,299 @@
++/*
++ *  linux/mm/mempool.c
++ *
++ *  memory buffer pool support. Such pools are mostly used
++ *  for guaranteed, deadlock-free memory allocations during
++ *  extreme VM load.
++ *
++ *  started by Ingo Molnar, Copyright (C) 2001
++ */
++
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/mempool.h>
++
++struct mempool_s {
++      spinlock_t lock;
++      int min_nr;             /* nr of elements at *elements */
++      int curr_nr;            /* Current nr of elements at *elements */
++      void **elements;
++
++      void *pool_data;
++      mempool_alloc_t *alloc;
++      mempool_free_t *free;
++      wait_queue_head_t wait;
++};
++
++static void add_element(mempool_t *pool, void *element)
++{
++      BUG_ON(pool->curr_nr >= pool->min_nr);
++      pool->elements[pool->curr_nr++] = element;
++}
++
++static void *remove_element(mempool_t *pool)
++{
++      BUG_ON(pool->curr_nr <= 0);
++      return pool->elements[--pool->curr_nr];
++}
++
++static void free_pool(mempool_t *pool)
++{
++      while (pool->curr_nr) {
++              void *element = remove_element(pool);
++              pool->free(element, pool->pool_data);
++      }
++      kfree(pool->elements);
++      kfree(pool);
++}
++
++/**
++ * mempool_create - create a memory pool
++ * @min_nr:    the minimum number of elements guaranteed to be
++ *             allocated for this pool.
++ * @alloc_fn:  user-defined element-allocation function.
++ * @free_fn:   user-defined element-freeing function.
++ * @pool_data: optional private data available to the user-defined functions.
++ *
++ * this function creates and allocates a guaranteed size, preallocated
++ * memory pool. The pool can be used from the mempool_alloc and mempool_free
++ * functions. This function might sleep. Both the alloc_fn() and the free_fn()
++ * functions might sleep - as long as the mempool_alloc function is not called
++ * from IRQ contexts.
++ */
++mempool_t * mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
++                              mempool_free_t *free_fn, void *pool_data)
++{
++      mempool_t *pool;
++
++      pool = kmalloc(sizeof(*pool), GFP_KERNEL);
++      if (!pool)
++              return NULL;
++      memset(pool, 0, sizeof(*pool));
++      pool->elements = kmalloc(min_nr * sizeof(void *), GFP_KERNEL);
++      if (!pool->elements) {
++              kfree(pool);
++              return NULL;
++      }
++      spin_lock_init(&pool->lock);
++      pool->min_nr = min_nr;
++      pool->pool_data = pool_data;
++      init_waitqueue_head(&pool->wait);
++      pool->alloc = alloc_fn;
++      pool->free = free_fn;
++
++      /*
++       * First pre-allocate the guaranteed number of buffers.
++       */
++      while (pool->curr_nr < pool->min_nr) {
++              void *element;
++
++              element = pool->alloc(GFP_KERNEL, pool->pool_data);
++              if (unlikely(!element)) {
++                      free_pool(pool);
++                      return NULL;
++              }
++              add_element(pool, element);
++      }
++      return pool;
++}
++
++/**
++ * mempool_resize - resize an existing memory pool
++ * @pool:       pointer to the memory pool which was allocated via
++ *              mempool_create().
++ * @new_min_nr: the new minimum number of elements guaranteed to be
++ *              allocated for this pool.
++ * @gfp_mask:   the usual allocation bitmask.
++ *
++ * This function shrinks/grows the pool. In the case of growing,
++ * it cannot be guaranteed that the pool will be grown to the new
++ * size immediately, but new mempool_free() calls will refill it.
++ *
++ * Note, the caller must guarantee that no mempool_destroy is called
++ * while this function is running. mempool_alloc() & mempool_free()
++ * might be called (eg. from IRQ contexts) while this function executes.
++ */
++int mempool_resize(mempool_t *pool, int new_min_nr, int gfp_mask)
++{
++      void *element;
++      void **new_elements;
++      unsigned long flags;
++
++      BUG_ON(new_min_nr <= 0);
++
++      spin_lock_irqsave(&pool->lock, flags);
++      if (new_min_nr < pool->min_nr) {
++              while (pool->curr_nr > new_min_nr) {
++                      element = remove_element(pool);
++                      spin_unlock_irqrestore(&pool->lock, flags);
++                      pool->free(element, pool->pool_data);
++                      spin_lock_irqsave(&pool->lock, flags);
++              }
++              pool->min_nr = new_min_nr;
++              goto out_unlock;
++      }
++      spin_unlock_irqrestore(&pool->lock, flags);
++
++      /* Grow the pool */
++      new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
++      if (!new_elements)
++              return -ENOMEM;
++
++      spin_lock_irqsave(&pool->lock, flags);
++      memcpy(new_elements, pool->elements,
++                      pool->curr_nr * sizeof(*new_elements));
++      kfree(pool->elements);
++      pool->elements = new_elements;
++      pool->min_nr = new_min_nr;
++
++      while (pool->curr_nr < pool->min_nr) {
++              spin_unlock_irqrestore(&pool->lock, flags);
++              element = pool->alloc(gfp_mask, pool->pool_data);
++              if (!element)
++                      goto out;
++              spin_lock_irqsave(&pool->lock, flags);
++              if (pool->curr_nr < pool->min_nr)
++                      add_element(pool, element);
++              else
++                      kfree(element);         /* Raced */
++      }
++out_unlock:
++      spin_unlock_irqrestore(&pool->lock, flags);
++out:
++      return 0;
++}
++
++/**
++ * mempool_destroy - deallocate a memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps. The caller
++ * has to guarantee that all elements have been returned to the pool (ie:
++ * freed) prior to calling mempool_destroy().
++ */
++void mempool_destroy(mempool_t *pool)
++{
++      if (pool->curr_nr != pool->min_nr)
++              BUG();          /* There were outstanding elements */
++      free_pool(pool);
++}
++
++/**
++ * mempool_alloc - allocate an element from a specific memory pool
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ * @gfp_mask:  the usual allocation bitmask.
++ *
++ * this function only sleeps if the alloc_fn function sleeps or
++ * returns NULL. Note that due to preallocation, this function
++ * *never* fails when called from process contexts. (it might
++ * fail if called from an IRQ context.)
++ */
++void * mempool_alloc(mempool_t *pool, int gfp_mask)
++{
++      void *element;
++      unsigned long flags;
++      int curr_nr;
++      DECLARE_WAITQUEUE(wait, current);
++      int gfp_nowait = gfp_mask & ~(__GFP_WAIT | __GFP_IO);
++
++repeat_alloc:
++      element = pool->alloc(gfp_nowait, pool->pool_data);
++      if (likely(element != NULL))
++              return element;
++
++      /*
++       * If the pool is less than 50% full then try harder
++       * to allocate an element:
++       */
++      if ((gfp_mask != gfp_nowait) && (pool->curr_nr <= pool->min_nr/2)) {
++              element = pool->alloc(gfp_mask, pool->pool_data);
++              if (likely(element != NULL))
++                      return element;
++      }
++
++      /*
++       * Kick the VM at this point.
++       */
++      wakeup_bdflush();
++
++      spin_lock_irqsave(&pool->lock, flags);
++      if (likely(pool->curr_nr)) {
++              element = remove_element(pool);
++              spin_unlock_irqrestore(&pool->lock, flags);
++              return element;
++      }
++      spin_unlock_irqrestore(&pool->lock, flags);
++
++      /* We must not sleep in the GFP_ATOMIC case */
++      if (gfp_mask == gfp_nowait)
++              return NULL;
++
++      run_task_queue(&tq_disk);
++
++      add_wait_queue_exclusive(&pool->wait, &wait);
++      set_task_state(current, TASK_UNINTERRUPTIBLE);
++
++      spin_lock_irqsave(&pool->lock, flags);
++      curr_nr = pool->curr_nr;
++      spin_unlock_irqrestore(&pool->lock, flags);
++
++      if (!curr_nr)
++              schedule();
++
++      current->state = TASK_RUNNING;
++      remove_wait_queue(&pool->wait, &wait);
++
++      goto repeat_alloc;
++}
++
++/**
++ * mempool_free - return an element to the pool.
++ * @element:   pool element pointer.
++ * @pool:      pointer to the memory pool which was allocated via
++ *             mempool_create().
++ *
++ * this function only sleeps if the free_fn() function sleeps.
++ */
++void mempool_free(void *element, mempool_t *pool)
++{
++      unsigned long flags;
++
++      if (pool->curr_nr < pool->min_nr) {
++              spin_lock_irqsave(&pool->lock, flags);
++              if (pool->curr_nr < pool->min_nr) {
++                      add_element(pool, element);
++                      spin_unlock_irqrestore(&pool->lock, flags);
++                      wake_up(&pool->wait);
++                      return;
++              }
++              spin_unlock_irqrestore(&pool->lock, flags);
++      }
++      pool->free(element, pool->pool_data);
++}
++
++/*
++ * A commonly used alloc and free fn.
++ */
++void *mempool_alloc_slab(int gfp_mask, void *pool_data)
++{
++      kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++      return kmem_cache_alloc(mem, gfp_mask);
++}
++
++void mempool_free_slab(void *element, void *pool_data)
++{
++      kmem_cache_t *mem = (kmem_cache_t *) pool_data;
++      kmem_cache_free(mem, element);
++}
++
++
++EXPORT_SYMBOL(mempool_create);
++EXPORT_SYMBOL(mempool_resize);
++EXPORT_SYMBOL(mempool_destroy);
++EXPORT_SYMBOL(mempool_alloc);
++EXPORT_SYMBOL(mempool_free);
++EXPORT_SYMBOL(mempool_alloc_slab);
++EXPORT_SYMBOL(mempool_free_slab);
+diff -ruN linux-2.4.21-dm-real/mm/vmalloc.c linux-2.4.21/mm/vmalloc.c
+--- linux-2.4.21-dm-real/mm/vmalloc.c  Fri Jun 13 16:33:25 2003
++++ linux-2.4.21/mm/vmalloc.c  Sat Jul 12 18:14:25 2003
+@@ -327,3 +327,22 @@
+       read_unlock(&vmlist_lock);
+       return buf - buf_start;
+ }
++
++void *vcalloc(unsigned long nmemb, unsigned long elem_size)
++{
++      unsigned long size;
++      void *addr;
++
++      /*
++       * Check that we're not going to overflow.
++       */
++      if (nmemb > (ULONG_MAX / elem_size))
++              return NULL;
++
++      size = nmemb * elem_size;
++      addr = vmalloc(size);
++      if (addr)
++              memset(addr, 0, size);
++
++      return addr;
++}
author	Arkadiusz Miśkiewicz <arekm@maven.pl>
	Fri, 18 Jul 2003 23:06:16 +0000 (23:06 +0000)
committer	cvs2git <feedback@pld-linux.org>
	Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)