[packages/kernel.git] / evms-1.2.0-linux-2.4.patch

diff -Naur linux-2002-09-30/drivers/evms/AIXlvm_vge.c evms-2002-09-30/drivers/evms/AIXlvm_vge.c
--- linux-2002-09-30/drivers/evms/AIXlvm_vge.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/AIXlvm_vge.c	Fri Sep 27 14:55:45 2002
@@ -0,0 +1,3681 @@
+/* -*- linux-c -*- */
+
+/*
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+/*
+ * linux/drivers/evms/AIXlvm_vge.c
+ *
+ * EVMS AIX LVM Volume Group Emulator
+ *
+ *
+ */
+
+#define EVMS_DEBUG     1
+#define EVMS_AIX_DEBUG 1
+
+#define AIX_COMMON_SERVICES_MAJOR        0	// Required common services levels for the AIX kernel plugin
+#define AIX_COMMON_SERVICES_MINOR        5	// These must be incremented if new function is added to common
+#define AIX_COMMON_SERVICES_PATCHLEVEL   0	// services and the AIX kernel plugin uses the new function.
+#define AIX_INCREMENT_REQUEST            1
+#define AIX_DECREMENT_REQUEST           -1
+#define AIX_RESYNC_BLOCKSIZE           512
+#define AIX_SYNC_INCOMPLETE           0x01
+#define AIX_SYNC_COMPLETE             0x00
+#define AIX_MASTER                       0
+#define AIX_SLAVE_1                      1
+#define AIX_SLAVE_2                      2
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+
+#include <linux/genhd.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_aix.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/locks.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/completion.h>
+#include <linux/vmalloc.h>
+
+#ifdef EVMS_AIX_DEBUG
+static int AIX_volume_group_dump(void);
+#endif
+
+static struct aix_volume_group *AIXVolumeGroupList = NULL;
+static struct evms_thread *AIX_mirror_read_retry_thread;
+static struct evms_thread *AIX_mirror_resync_thread;
+static struct evms_pool_mgmt *AIX_BH_list_pool = NULL;
+static struct aix_mirror_bh *AIX_retry_list = NULL;
+static struct aix_mirror_bh **AIX_retry_tail = NULL;
+static spinlock_t AIX_retry_list_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t AIX_resync_list_lock = SPIN_LOCK_UNLOCKED;
+static spinlock_t AIX_resync_pp_lock = SPIN_LOCK_UNLOCKED;
+static int AIXResyncInProgress = FALSE;
+static struct aix_resync_struct *AIX_resync_list = NULL;
+
+// Plugin API prototypes
+
+static void AIXiod(void *data);
+static void AIXresync(void *data);
+static int discover_aix(struct evms_logical_node **evms_logical_disk_head);
+static int discover_volume_groups(struct evms_logical_node **);
+static int discover_logical_volumes(void);
+static int end_discover_aix(struct evms_logical_node **evms_logical_disk_head);
+static void read_aix(struct evms_logical_node *node, struct buffer_head *bh);
+static void write_aix(struct evms_logical_node *node, struct buffer_head *bh);
+static int ioctl_aix(struct evms_logical_node *logical_node,
+		     struct inode *inode,
+		     struct file *file, unsigned int cmd, unsigned long arg);
+
+static int aix_direct_ioctl(struct inode *inode,
+			    struct file *file,
+			    unsigned int cmd, unsigned long args);
+
+static int AIX_remap_sector(struct evms_logical_node *node, u64 org_sector,	// logical sector to remap
+			    u64 size,	// size (in sectors) of request to remap
+			    u64 * new_sector,	// remapped sector
+			    u64 * new_size,	// new size (in sectors)
+			    struct partition_list_entry **partition,	// new node for which new_sector is relative
+			    u32 * le, u32 * offset_in_le);
+
+static int validate_build_volume_group_disk_info(struct evms_logical_node
+						 *logical_node,
+						 struct AIXlvm_rec *AIXlvm);
+
+static int add_VG_data_to_VG_list(struct evms_logical_node *logical_node,
+				  struct aix_volume_group *new_group,
+				  short int pvNum);
+static int add_PV_to_volume_group(struct aix_volume_group *group,
+				  struct evms_logical_node *evms_partition,
+				  int pvNum);
+static struct aix_volume_group *AIX_create_volume_group(struct evms_logical_node
+							*logical_node,
+							struct AIXlvm_rec
+							*AIXlvm);
+
+static int AIX_update_volume_group(struct aix_volume_group *AIXVGLptr,
+				   struct evms_logical_node *logical_node,
+				   struct AIXlvm_rec *AIXlvm);
+
+static int AIX_evms_cs_notify_lv_io_error(struct evms_logical_node *node);
+
+static int AIX_pvh_data_posn(u32 vgda_psn, u32 * pvh_posn, struct partition_list_entry *partition, u32 numpvs);
+
+static int AIX_resync_lv_mirrors(struct aix_logical_volume *volume, int force);
+
+static int AIX_copy_on_read(struct aix_logical_volume *volume,
+			    struct partition_list_entry *master_part,
+			    struct partition_list_entry *slave1_part,
+			    struct partition_list_entry *slave2_part,
+			    u64 master_offset,
+			    u64 slave1_offset,
+			    u64 slave2_offset, u32 pe_size, int le);
+
+static int export_volumes(struct evms_logical_node **evms_logical_disk_head);
+static int lvm_cleanup(void);
+static int AIX_copy_header_info(struct vg_header *AIXvgh,
+				struct vg_header *AIXvgh2);
+static int build_pe_maps(struct aix_volume_group *volume_group);
+
+static struct aix_logical_volume *new_logical_volume(struct lv_entries
+						     *AIXlvent,
+						     struct aix_volume_group
+						     *group, char *lv_name,
+						     u32 stripesize);
+
+static int check_log_volume_and_pe_maps(struct aix_volume_group *group);
+static int check_volume_groups(void);
+static int init_io_aix(struct evms_logical_node *node, int io_flag,	/* 0=read, 1=write */
+		       u64 sect_nr,	/* disk LBA */
+		       u64 num_sects,	/* # of sectors */
+		       void *buf_addr);	/* buffer address */
+
+static int delete_logical_volume(struct aix_logical_volume *volume);
+static int delete_aix_node(struct evms_logical_node *logical_node);
+static int deallocate_volume_group(struct aix_volume_group *group);
+
+static void AIX_handle_read_mirror_drives(struct buffer_head *bh, int uptodate);
+
+static void AIX_handle_write_mirror_drives(struct buffer_head *bh,
+					   int uptodate);
+
+static void aix_notify_cache_ctor(void *foo, kmem_cache_t * cachep,
+				  unsigned long flags);
+
+static void AIX_schedule_resync(struct aix_logical_volume *resync_volume,
+				int force);
+static struct aix_logical_volume *AIX_get_volume_data(char *object_name);
+
+static void AIX_sync_mirrored_partitions(struct buffer_head *bh, int uptodate);
+
+static int AIX_get_set_mirror_offset(struct aix_mirror_bh *tmp_bh,
+				     int index, int offset);
+
+static struct aix_mirror_bh *AIX_alloc_rbh(struct evms_logical_node *node,
+					   struct buffer_head *bh,
+					   u32 mirror_copies,
+					   u32 le, u64 org_sector, int cmd);
+
+static struct aix_mirror_bh *AIX_alloc_wbh(struct evms_logical_node *node,
+					   struct evms_logical_node *node2,
+					   struct evms_logical_node *node3,
+					   struct buffer_head *bh,
+					   u32 mirror_copies,
+					   u32 le,
+					   u64 new_sector2, u64 new_sector3);
+
+static struct aix_mirror_bh *AIX_alloc_sbh(struct aix_logical_volume *volume,
+					   struct partition_list_entry
+					   *master_part,
+					   struct partition_list_entry
+					   *slave1_part,
+					   struct partition_list_entry
+					   *slave2_part, u64 master_offset,
+					   u64 slave1_offset, u64 slave2_offset,
+					   u32 pe_size);
+
+static void AIX_free_headers(struct vg_header *AIXvgh,
+			     struct vg_header *AIXvgh2,
+			     struct vg_trailer *AIXvgt,
+			     struct vg_trailer *AIXvgt2);
+
+static int remove_group_from_list(struct aix_volume_group *group);
+
+//****************************************************************************************************
+
+/* END of PROTOTYES*/
+
+#define GET_PHYSICAL_PART_SIZE(v1) (1 << v1)
+
+#define COMPARE_TIMESTAMPS(t1, t2)	( (t1).tv_sec  == (t2).tv_sec && \
+					  (t1).tv_nsec == (t2).tv_nsec )
+
+#define COMPARE_UNIQUE_IDS(id1, id2)	( (id1).word1 == (id2).word1 && \
+					  (id1).word2 == (id2).word2 && \
+					  (id1).word3 == (id2).word3 && \
+					  (id1).word4 == (id2).word4 )
+
+#define SECTOR_IN_RANGE(s1, s2)  ((s2 > s1) && (s2 < s1 + AIX_RESYNC_BLOCKSIZE))
+
+#define AIX_PV_STATE_VALID	         0	// Both VGDAs are valid and match.
+#define AIX_PV_STATE_FIRST_VGDA		 1	// Only the first VGDA is valid.
+#define AIX_PV_STATE_SECOND_VGDA	 2	// Only the second VGDA is valid.
+#define AIX_PV_STATE_EITHER_VGDA	-1	// Both VGDAs are valid, but do not match each other.
+#define AIX_PV_STATE_INVALID            -2	// We're in an invalid state but there's more PVs in this group
+
+#ifndef EVMS_AIX_DEBUG
+#define AIX_VOLUME_GROUP_DUMP()
+#else
+#define AIX_VOLUME_GROUP_DUMP() LOG_DEBUG("Called line:%d \n",__LINE__); \
+                                AIX_volume_group_dump()
+#endif
+
+// Global LVM data structures
+
+static struct evms_plugin_fops AIXlvm_fops = {
+	.discover = discover_aix,
+	.end_discover = end_discover_aix,
+	.delete = delete_aix_node,
+	.read = read_aix,
+	.write = write_aix,
+	.init_io = init_io_aix,
+	.ioctl = ioctl_aix,
+	.direct_ioctl = aix_direct_ioctl
+};
+
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_REGION_MANAGER,
+			  EVMS_AIX_FEATURE_ID),
+	.version = {
+		    .major = 1,
+		    .minor = 1,
+		    .patchlevel = 1},
+	.required_services_version = {
+				      .major = AIX_COMMON_SERVICES_MAJOR,
+				      .minor = AIX_COMMON_SERVICES_MINOR,
+				      .patchlevel =
+				      AIX_COMMON_SERVICES_PATCHLEVEL},
+	.fops = &AIXlvm_fops
+};
+
+/*
+ * Function: remap sector 
+ *  Common function to remap volume lba to partition lba in appropriate PE
+ */
+static int
+AIX_remap_sector(struct evms_logical_node *node, u64 org_sector,	// logical sector to remap
+		 u64 size,	// size (in sectors) of request to remap
+		 u64 * new_sector,	// remapped sector
+		 u64 * new_size,	// new size (in sectors)
+		 struct partition_list_entry **partition,	// new node for which new_sector is relative
+		 u32 * le, u32 * offset_in_le)
+{
+	struct aix_logical_volume *volume;
+
+	u32 sectors_per_stripe;
+	u32 partition_to_use;
+	u32 column;
+	u32 stripe_in_column;
+
+	u32 org_sector32;	// Until striping is 64-bit enabled.
+
+	volume = (struct aix_logical_volume *) node->private;
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG("-- %s volume:%p lv:%d size:" PFU64 " Name:%s\n",
+		  __FUNCTION__, volume, volume->lv_number, size, volume->name);
+	LOG_DEBUG(" node %p node_name [%s] org_sector:" PFU64 "\n", node,
+		  node->name, org_sector);
+	LOG_DEBUG(" mirror_copies:%d volume->lv_size:" PFU64 "\n",
+		  volume->mirror_copies, volume->lv_size);
+#endif
+
+	org_sector32 = org_sector;
+
+	*(new_size) = size;
+
+	// Check if volume is striped. Reset the size if the request
+	// crosses a stripe boundary.
+	if (volume->stripes > 1) {
+#ifdef EVMS_DEBUG
+		LOG_DEBUG(" *** STRIPED ***\n");
+		LOG_DEBUG(" ------- volume->stripe_size:%d org_sector:%d volume_stripes:%d\n",
+		     volume->stripe_size, org_sector32, volume->stripes);
+#endif
+
+		*(le) = org_sector >> volume->pe_size_shift;	// 64-bit safe
+		*(offset_in_le) = org_sector & (volume->pe_size - 1);	// 64-bit safe
+
+#ifdef EVMS_DEBUG
+		LOG_DEBUG("OLD - le:%d -- offset_in_le:%d \n", *(le),
+			  *(offset_in_le));
+#endif
+
+		sectors_per_stripe = volume->stripe_size / AIX_SECTOR_SIZE;
+		partition_to_use =
+		    (org_sector32 / sectors_per_stripe) % volume->stripes;
+		stripe_in_column =
+		    ((((org_sector32 / volume->stripe_size) / volume->stripes) *
+		      volume->stripe_size) +
+		     (org_sector32 % sectors_per_stripe));
+		column =
+		    ((org_sector32 / sectors_per_stripe) / volume->stripes) *
+		    sectors_per_stripe;
+
+#ifdef EVMS_DEBUG
+		LOG_DEBUG("offset_in_le:%d org_sector:" PFU64
+			  " pe_shift:%d stripe_shift:%d\n", *(offset_in_le),
+			  org_sector, volume->pe_size_shift,
+			  volume->stripe_size_shift);
+
+		LOG_DEBUG(" org_sector:%d  sectors_per_stripe:%d partition_to_use:%d stripe_in_column:%d column:%d\n",
+		     org_sector32, sectors_per_stripe, partition_to_use,
+		     stripe_in_column, column);
+		LOG_DEBUG(" offset_in_le + size:" PFU64
+			  " volume->pe_size:%d volume->lv_size:" PFU64 "\n",
+			  (*(offset_in_le) + size), volume->pe_size,
+			  volume->lv_size);
+#endif
+
+		if (*(offset_in_le) + size > volume->pe_size) {
+			*new_size = volume->pe_size - *(offset_in_le);
+			LOG_DEBUG("  new_size " PFU64 "\n", *new_size);
+		}
+
+	}
+	// Non-striped volume. Just find LE and offset. Reset the size
+	// if the request crosses an LE boundary.
+	else {
+#ifdef EVMS_DEBUG
+		LOG_DEBUG(" *** NON-STRIPED ***\n");
+#endif
+
+		*(le) = org_sector >> volume->pe_size_shift;	// 64-bit safe
+		*(offset_in_le) = org_sector & (volume->pe_size - 1);	// 64-bit safe
+
+	}
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" offset_in_le:%d org_sector:" PFU64 " shift:%d\n",
+		  *(offset_in_le), org_sector, volume->pe_size_shift);
+
+	if (*(le) >= volume->num_le) {
+		LOG_DEBUG(" le Memory Overwrite !! le:%d vs volume->num_le:%d\n",
+		     *(le), volume->num_le);
+		return -EINVAL;
+	}
+#endif
+
+	*(new_sector) = volume->le_to_pe_map[*(le)].pe_sector_offset + *(offset_in_le);
+	*(partition) = volume->le_to_pe_map[*(le)].owning_pv;
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" new_sector:" PFU64 "\n", *(new_sector));
+	LOG_DEBUG(" Owning Part %p\n", *(partition));
+	LOG_DEBUG(" End %s\n", __FUNCTION__);
+#endif
+
+	return (0);
+}
+
+/*
+ * Function: read_aix
+ */
+static void
+read_aix(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	struct partition_list_entry *partition;
+	u64 org_sector;
+	u64 new_sector;
+	u64 new_size;
+	struct aix_logical_volume *volume;
+	struct aix_mirror_bh *tmp_bh;
+	u32 le, offset_in_le, count;
+	int flags = 0;
+
+	volume = (struct aix_logical_volume *) node->private;
+//#ifdef EVMS_DEBUG
+//	LOG_DEBUG(" ***** %s ***** bh:%p volume->iter:%d\n", __FUNCTION__, bh,
+//		  volume->mirror_iterations);
+//#endif
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" node->total_vsectors:" PFU64 "\n", node->total_vsectors);
+	LOG_DEBUG(" rsector:%lu rsize:%u node_flags:%u\n", bh->b_rsector,
+		  bh->b_size, node->flags);
+#endif
+
+	// Check if I/O goes past end of logical volume.
+	if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) >
+	    node->total_vsectors) {
+		LOG_CRITICAL(" read_aix ERROR %d\n", __LINE__);
+		buffer_IO_error(bh);
+		return;
+	}
+
+	// Logical-to-physical remapping.
+	if (AIX_remap_sector
+	    (node, bh->b_rsector, (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT),
+	     &new_sector, &new_size, &partition, &le, &offset_in_le)
+	    || (!partition || !new_sector)) {
+		LOG_CRITICAL(" read_aix bh: ERROR %d\n", __LINE__);
+		buffer_IO_error(bh);
+		return;
+	}
+
+	org_sector = bh->b_rsector;
+	bh->b_rsector = new_sector;
+	//bh->b_size    = new_size;  
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" read_aix Mirror_Copies:%d\n", volume->mirror_copies);
+#endif
+
+	if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) {
+
+		tmp_bh =
+		    AIX_alloc_rbh(node, bh, 1, le, new_sector, AIX_LV_READ);
+
+		if (!tmp_bh) {
+			buffer_IO_error(bh);
+			return;
+		}
+
+		if (volume->le_to_pe_map_mir1) {
+			tmp_bh->mir_node1 =
+			    volume->le_to_pe_map_mir1[le].owning_pv->
+			    logical_node;
+			tmp_bh->mir_sector1 =
+			    volume->le_to_pe_map_mir1[le].pe_sector_offset +
+			    offset_in_le;
+		}
+
+		if (volume->mirror_copies == AIX_MAX_MIRRORS) {
+			tmp_bh->mir_node2 =
+			    volume->le_to_pe_map_mir2[le].owning_pv->
+			    logical_node;
+			tmp_bh->mir_sector2 =
+			    volume->le_to_pe_map_mir2[le].pe_sector_offset +
+			    offset_in_le;
+		}
+
+		if (evms_cs_volume_request_in_progress
+		    (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) {
+			buffer_IO_error(bh);
+			return;
+		}
+
+		if (AIXResyncInProgress) {
+			if (SECTOR_IN_RANGE
+			    (tmp_bh->bh_req.b_rsector,
+			     AIX_resync_list->master_offset)) {
+				spin_lock_irqsave(&AIX_resync_list_lock, flags);
+			}
+		}
+
+		R_IO(partition->logical_node, &tmp_bh->bh_req);
+
+		if (AIXResyncInProgress) {
+			if (SECTOR_IN_RANGE
+			    (tmp_bh->bh_req.b_rsector,
+			     AIX_resync_list->master_offset)) {
+				spin_unlock_irqrestore(&AIX_resync_list_lock,
+						       flags);
+			}
+		}
+
+	} else {
+
+		R_IO(partition->logical_node, bh);
+	}
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" ***** %s ***** returning\n", __FUNCTION__);
+#endif
+	return;
+}
+
+/*
+ * Function: write_aix
+ */
+static void
+write_aix(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	struct partition_list_entry *partition;
+	u64 new_sector, new_sector2 = 0, new_sector3 = 0;
+	u64 org_sector;
+	u64 new_size;
+	struct aix_logical_volume *volume;
+	struct aix_mirror_bh *tmp_bh;
+	struct evms_logical_node *node2 = NULL, *node3 = NULL;
+	u32 le, offset_in_le, count;
+	int flags = 0;
+
+	volume = (struct aix_logical_volume *) node->private;
+
+#ifdef EVMS_DEBUG
+//	LOG_DEBUG(" ***** %s ***** bh:%p volume->iter:%d\n", __FUNCTION__, bh,
+//		  volume->mirror_iterations);
+	LOG_DEBUG(" write_aix rsector:%lu rsize:%u\n", bh->b_rsector,
+		  bh->b_size);
+	LOG_DEBUG(" write_aix total_sectors:" PFU64 "\n", node->total_vsectors);
+#endif
+
+	if (volume->lv_access & EVMS_LV_INCOMPLETE) {	//No writes allowed on incomplete volumes
+		LOG_CRITICAL(" write_aix incomplete volume ERROR %d\n",
+			     __LINE__);
+		buffer_IO_error(bh);
+		return;
+	}
+
+	// Check if I/O goes past end of logical volume.
+	if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) >
+	    node->total_vsectors) {
+		LOG_CRITICAL(" write_aix ERROR %d\n", __LINE__);
+		buffer_IO_error(bh);
+		return;
+	}
+	// Logical-to-Physical remapping
+	if (AIX_remap_sector
+	    (node, bh->b_rsector, (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT),
+	     &new_sector, &new_size, &partition, &le, &offset_in_le)
+	    || (!new_sector || !partition)) {
+		LOG_CRITICAL(" write_aix ERROR %d\n", __LINE__);
+		buffer_IO_error(bh);
+		return;
+	}
+
+	org_sector = bh->b_rsector;
+	bh->b_rsector = new_sector;
+	//bh->b_size   = new_size;  
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" write_aix  Mirror_Copies:%d\n", volume->mirror_copies);
+#endif
+
+	if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) {
+
+		if (volume->le_to_pe_map_mir1) {
+			new_sector2 =
+			    volume->le_to_pe_map_mir1[le].pe_sector_offset +
+			    offset_in_le;
+			node2 =
+			    volume->le_to_pe_map_mir1[le].owning_pv->
+			    logical_node;
+		}
+
+		if (volume->mirror_copies == AIX_MAX_MIRRORS) {
+
+			new_sector3 =
+			    volume->le_to_pe_map_mir2[le].pe_sector_offset +
+			    offset_in_le;
+			node3 =
+			    volume->le_to_pe_map_mir2[le].owning_pv->
+			    logical_node;
+		}
+
+		tmp_bh =
+		    AIX_alloc_wbh(partition->logical_node, node2, node3, bh, le,
+				  volume->mirror_copies, new_sector2,
+				  new_sector3);
+
+		if (!tmp_bh) {
+			buffer_IO_error(bh);
+			return;
+		}
+		tmp_bh->node = node;
+
+		tmp_bh = tmp_bh->mirror_bh_list;
+
+		if (evms_cs_volume_request_in_progress
+		    (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) {
+			buffer_IO_error(bh);
+			// free memory here
+			return;
+		}
+
+		if (AIXResyncInProgress) {
+			if (SECTOR_IN_RANGE
+			    (tmp_bh->bh_req.b_rsector,
+			     AIX_resync_list->master_offset)) {
+				spin_lock_irqsave(&AIX_resync_list_lock, flags);
+			}
+		}
+
+		W_IO(tmp_bh->node, &tmp_bh->bh_req);
+
+		if (AIXResyncInProgress) {
+			if (SECTOR_IN_RANGE
+			    (tmp_bh->bh_req.b_rsector,
+			     AIX_resync_list->master_offset)) {
+				spin_unlock_irqrestore(&AIX_resync_list_lock,
+						       flags);
+			}
+		}
+
+		tmp_bh = tmp_bh->next_r1;
+
+		if (tmp_bh) {
+			W_IO(tmp_bh->node, &tmp_bh->bh_req);
+			tmp_bh = tmp_bh->next_r1;
+		}
+
+		if (tmp_bh) {
+			W_IO(tmp_bh->node, &tmp_bh->bh_req);
+		}
+
+	} else {
+
+		W_IO(partition->logical_node, bh);
+	}
+
+#ifdef EVMS_DEBUG
+	LOG_DEBUG(" ***** %s returning *****\n", __FUNCTION__);
+#endif
+	return;
+}
+
+/*
+ * Function: ioctl_aix
+ *
+ */
+static int
+ioctl_aix(struct evms_logical_node *logical_node,
+	  struct inode *inode,
+	  struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct aix_logical_volume *volume =
+	    (struct aix_logical_volume *) (logical_node->private);
+	int rc = 0;
+
+	LOG_EXTRA(" Ioctl %u\n", cmd);
+
+	switch (cmd) {
+
+	case HDIO_GETGEO:
+		{
+			// Fixed geomerty for all LVM volumes 
+			unsigned char heads = 64;
+			unsigned char sectors = 32;
+			long start = 0;
+			struct hd_geometry *hd = (struct hd_geometry *) arg;
+			short cylinders;
+			cylinders = logical_node->total_vsectors;
+			cylinders = (cylinders / heads) / sectors;
+
+			if (hd == NULL) {
+				return -EINVAL;
+			}
+
+			if (copy_to_user
+			    ((char *) (&hd->heads), &heads, sizeof (heads)) != 0
+			    || copy_to_user((char *) (&hd->sectors), &sectors,
+					    sizeof (sectors)) != 0
+			    || copy_to_user((short *) (&hd->cylinders),
+					    &cylinders, sizeof (cylinders)) != 0
+			    || copy_to_user((long *) (&hd->start), &start,
+					    sizeof (start)) != 0) {
+				return -EFAULT;
+			}
+		}
+		break;
+
+	case EVMS_QUIESCE_VOLUME:
+		break;
+
+	case EVMS_GET_DISK_LIST:
+	case EVMS_CHECK_MEDIA_CHANGE:
+	case EVMS_REVALIDATE_DISK:
+	case EVMS_OPEN_VOLUME:
+	case EVMS_CLOSE_VOLUME:
+	case EVMS_CHECK_DEVICE_STATUS:
+		{
+			// These five ioctl all need to be broadcast to all PVs.
+			struct aix_volume_group *group = volume->group;
+			struct partition_list_entry *partition;
+			for (partition = group->partition_list; partition;
+			     partition = partition->next) {
+				rc |=
+				    IOCTL(partition->logical_node, inode, file,
+					  cmd, arg);
+			}
+		}
+		break;
+
+	default:
+		// Currently the VGE does not send any ioctl's down to the
+		// partitions. Which partition would they go to?
+		rc = -ENOTTY;
+	}
+
+	return rc;
+}
+
+/* Function: aix_direct_ioctl
+ *
+ *	This function provides a method for user-space to communicate directly
+ *	with a plugin in the kernel.
+ */
+static int
+aix_direct_ioctl(struct inode *inode,
+		 struct file *file, unsigned int cmd, unsigned long args)
+{
+	struct aix_logical_volume *volume = NULL;
+	struct evms_plugin_ioctl_pkt argument;
+	int rc = 0;
+
+	MOD_INC_USE_COUNT;
+	LOG_DEBUG(" Function:%s cmd:%d \n", __FUNCTION__, cmd);
+
+	// Copy user's parameters to kernel space
+	if (copy_from_user
+	    (&argument, (struct evms_plugin_ioctl *) args, sizeof (argument))) {
+		MOD_DEC_USE_COUNT;
+		return -EFAULT;
+	}
+	// Make sure this is supposed to be our ioctl.
+	if (argument.feature_id != plugin_header.id) {
+		MOD_DEC_USE_COUNT;
+		return -EINVAL;
+	}
+
+	argument.feature_command = 1;
+
+	switch (argument.feature_command) {
+
+	case EVMS_AIX_RESYNC_MIRRORS:
+		{
+			struct aix_volume_resync_ioctl aix_lv_resync;
+
+			if (copy_from_user
+			    (&aix_lv_resync,
+			     (struct aix_volume_resync_ioctl *) argument.
+			     feature_ioctl_data, sizeof (aix_lv_resync))) {
+				rc = -EINVAL;
+				break;
+			}
+
+			volume = AIX_get_volume_data(aix_lv_resync.object_name);
+
+			if (volume) {
+				AIX_schedule_resync(volume, FALSE);
+			} else {
+				LOG_DEBUG
+				    (" Function:%s object_name:%s -- no match found\n",
+				     __FUNCTION__, aix_lv_resync.object_name);
+				rc = -EINVAL;
+			}
+
+		}
+		break;
+
+	default:
+		rc = -EINVAL;
+		break;
+	}
+
+	argument.status = rc;
+	copy_to_user((struct evms_plugin_ioctl *) args, &argument,
+		     sizeof (argument));
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/* Function: aix_direct_ioctl
+ *
+ *	This function provides a method for user-space to communicate directly
+ *	with a plugin in the kernel.
+ */
+static struct aix_logical_volume *
+AIX_get_volume_data(char *object_name)
+{
+
+	struct aix_volume_group *VG_ptr;
+	struct aix_logical_volume *volume = NULL;
+	int i;
+
+	LOG_DEBUG(" Function:%s object_name:%s \n", __FUNCTION__, object_name);
+
+	if (!object_name || !strlen(object_name)) {
+		return NULL;
+	}
+
+	for (VG_ptr = AIXVolumeGroupList; VG_ptr; VG_ptr = VG_ptr->next) {
+		for (i = 0; VG_ptr->volume_list[i]; i++) {
+			if (!strcmp(VG_ptr->volume_list[i]->name, object_name)) {
+				LOG_DEBUG
+				    (" Function:%s FOUND!! volume_name:%s \n",
+				     __FUNCTION__,
+				     VG_ptr->volume_list[i]->name);
+				volume = VG_ptr->volume_list[i];
+				break;
+			}
+		}
+	}
+
+	if (!volume) {
+		LOG_DEBUG(" Function:%s object_name:%s NOT FOUND !! volume:%p \n",
+		     __FUNCTION__, object_name, volume);
+	}
+
+	return volume;
+}
+
+/*
+ * Function: init_io_aix
+ *
+ */
+static int
+init_io_aix(struct evms_logical_node *node, int io_flag,	/* 0=read, 1=write */
+	    u64 sect_nr,	/* disk LBA */
+	    u64 num_sects,	/* # of sectors */
+	    void *buf_addr)
+{				/* buffer address */
+	struct partition_list_entry *partition;
+	u64 new_sector = 0;
+	u64 new_size = 0;
+	int rc = 0;
+	u32 le, offset;
+
+	LOG_DEBUG(" ************ init_io_aix() num_sects:" PFU64
+		  " node:%p sect_nr:" PFU64 "\n", num_sects, node, sect_nr);
+
+	// Init IO needs to deal with the possibility that a request can come
+	// in that spans PEs or stripes. This is possible because there is no
+	// limit on num_sects. To fix this, we loop through AIX_remap_sector and
+	// INIT_IO until num_sects reaches zero.
+
+	while (num_sects > 0) {
+
+		if (AIX_remap_sector(node, sect_nr, num_sects, &new_sector, &new_size,
+		     &partition, &le, &offset) || (!new_sector || !partition)) {
+			LOG_CRITICAL("--- Error returned from AIX_remap_sector %d\n",
+			     __LINE__);
+			return -EIO;
+		}
+
+		LOG_DEBUG(" init_io_aix() line:%d logical_node:%p io_flag:%d new_sector:"
+		     PFU64 " new_size:" PFU64 "\n", __LINE__,
+		     partition->logical_node, io_flag, new_sector, new_size);
+
+		rc = INIT_IO(partition->logical_node, io_flag, new_sector,
+			     new_size, buf_addr);
+		num_sects -= new_size;
+		sect_nr += new_size;
+		buf_addr = (void *) (((unsigned long) buf_addr) +
+			      (unsigned long) (new_size << EVMS_VSECTOR_SIZE_SHIFT));
+	}
+
+	return rc;
+}
+
+/*
+ * Function: AIXlvm_vge_init
+ *
+ */
+int __init
+AIXlvm_vge_init(void)
+{
+
+	LOG_DEBUG(" %s --------\n", __FUNCTION__);
+
+	MOD_INC_USE_COUNT;
+	return evms_cs_register_plugin(&plugin_header);	/* register with EVMS */
+}
+
+module_init(AIXlvm_vge_init);
+
+/********** Required Plugin Functions **********/
+
+/*
+ * Function: discover_aix
+ *
+ *  This is the entry point into the LVM discovery process.
+ */
+static int
+discover_aix(struct evms_logical_node **evms_logical_disk_head)
+{
+	int rc = 0, count = 0;
+
+	MOD_INC_USE_COUNT;
+	LOG_DEBUG("[%s] discover_volume_groups\n", __FUNCTION__);
+
+	rc = discover_volume_groups(evms_logical_disk_head);
+
+	if (rc) {
+		LOG_ERROR("[%s] discover_volume_groups rc=%d\n", __FUNCTION__,rc);
+	}
+
+	if (AIXVolumeGroupList && !rc) {
+
+		LOG_DEBUG("[%s] discover_logical_volumes\n", __FUNCTION__);
+
+		rc = discover_logical_volumes();
+
+		if (rc) {
+			LOG_ERROR("[%s] discover_logical_volumes rc=%d\n",
+				  __FUNCTION__, rc);
+		}
+
+		LOG_DEBUG("[%s] export_volumes\n", __FUNCTION__);
+
+		count = export_volumes(evms_logical_disk_head);
+
+		LOG_DEBUG("[%s] export_volumes count=%d\n", __FUNCTION__,
+			  count);
+	}
+
+	MOD_DEC_USE_COUNT;
+	return (count);
+}
+
+static int
+discover_volume_groups(struct evms_logical_node **evms_logical_disk_head)
+{
+	struct evms_logical_node *logical_node;
+	struct evms_logical_node *next_node;
+	struct aix_ipl_rec_area *AIXpv;
+	struct AIXlvm_rec *AIXlvm;	// Temp holder for the LVM on disk rec
+
+	LOG_DEBUG(" Begin %s\n", __FUNCTION__);
+
+	AIXpv = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXpv) {
+		return -ENOMEM;
+	}
+
+	// We'll create at least one volume entry, if we don't find any AIX volumes we'll clean it up later
+
+	AIXlvm = kmalloc(sizeof (struct AIXlvm_rec), GFP_KERNEL);
+	if (!AIXlvm) {
+		kfree(AIXpv);
+		return -ENOMEM;
+	}
+
+	for (logical_node = *evms_logical_disk_head; logical_node;
+	     logical_node = next_node) {
+
+		// Grab the next list item in case we remove this partition from the global list.
+		next_node = logical_node->next;
+
+		// Read the first sector and see if it has a valid AIX PV signature.
+
+		if (INIT_IO(logical_node, 0, 0, 1, AIXpv)) {
+			// On an I/O error, continue on to the next
+			// partition. The group that this partition
+			// belongs to will be incomplete, but we still
+			// need to discover any other groups.
+
+			LOG_ERROR(" Error reading PV [%p]\n", logical_node);
+			continue;
+		}
+
+		if (AIXpv->IPL_record_id == IPLRECID) {
+
+			// This partition is definitely a PV,
+			// but is it part of a valid VG?
+			LOG_DEBUG(" DVG removing node from list logical_node %p\n",
+			     logical_node);
+
+			if (INIT_IO(logical_node, 0, PSN_LVM_REC, 1, AIXlvm)) {
+				LOG_ERROR(" Error reading PV [%p]\n",logical_node);
+				continue;
+			}
+
+			if (AIXlvm->lvm_id == AIX_LVM_LVMID) {
+
+				if (validate_build_volume_group_disk_info(
+					logical_node, AIXlvm)) {
+					// Again, continue on and we'll
+					// clean up later.
+					continue;
+				}
+
+				evms_cs_remove_logical_node_from_list(
+				    evms_logical_disk_head, logical_node);
+
+			} else {
+				LOG_DEBUG(" Found an AIX PV with no parent LVM (LVM ID: %d)\n",
+				     AIXlvm->lvm_id);
+				continue;
+			}
+		} else {
+			LOG_DEBUG(" Found a PV not belonging to AIX [%p]\n",
+				  logical_node);
+		}
+	}
+
+	AIX_VOLUME_GROUP_DUMP();
+
+	if (check_volume_groups()) {
+		return -EINVAL;
+	}
+
+	kfree(AIXpv);
+	kfree(AIXlvm);
+
+	return 0;
+}
+
+/*
+ * Function:  validate_build_volume_group_disk_info
+ *
+ *  Creates and validates the volume groups found on the disk structures.
+ *  
+ */
+static int
+validate_build_volume_group_disk_info(struct evms_logical_node *logical_node,
+				      struct AIXlvm_rec *AIXlvm)
+{
+
+	struct aix_volume_group *AIXVGLptr = AIXVolumeGroupList;
+
+	LOG_DEBUG(" VBVGDI pv_num:%d\n", AIXlvm->pv_num);
+
+	while (AIXVGLptr) {
+		if (COMPARE_UNIQUE_IDS(AIXlvm->vg_id, AIXVGLptr->vg_id)) {
+			break;
+		}
+		AIXVGLptr = AIXVGLptr->next;	// There is more than one so walk the list 
+	}
+
+	if (!AIXVGLptr) {
+		LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr,__LINE__);
+		AIXVGLptr = AIX_create_volume_group(logical_node, AIXlvm);
+        if (AIXVGLptr) {
+            AIXVGLptr->next = AIXVolumeGroupList;
+            AIXVolumeGroupList = AIXVGLptr;
+        }
+	} else {
+		LOG_DEBUG(" VBVGDI Rediscover AIXVGLptr:%p line:%d\n",
+			  AIXVGLptr, __LINE__);
+
+		if (AIX_update_volume_group(AIXVGLptr, logical_node, AIXlvm)) {
+			LOG_DEBUG
+			    (" VBVGDI ERROR on Rediscover AIXVGLptr:%p  line:%d\n",
+			     AIXVGLptr, __LINE__);
+		}
+	}
+
+	if (!AIXVGLptr) {
+
+		LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr,
+			  __LINE__);
+		LOG_DEBUG(" VBVGDI flags:%d\n", AIXVGLptr->flags);
+		LOG_CRITICAL("Unable to allocate volume group data struct Volume Group Corruption !!\n");
+		return -EINVAL;
+	} else {
+
+		LOG_DEBUG(" VBVGDI AIXVolumeGroupList:%p line:%d\n",
+			  AIXVolumeGroupList, __LINE__);
+		LOG_DEBUG(" VBVGDI AIXVGLptr:%p line:%d\n", AIXVGLptr,
+			  __LINE__);
+		LOG_DEBUG(" VBVGDI flags:%d\n", AIXVGLptr->flags);
+
+		if (add_PV_to_volume_group(AIXVGLptr, logical_node, AIXlvm->pv_num)) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Function: add_VG_data_to_VG_list
+ *
+ *  Allocate space for a new LVM volume group and all of its sub-fields.
+ *  Initialize the appropriate fields.
+ */
+
+static int
+add_VG_data_to_VG_list(struct evms_logical_node *logical_node,
+		       struct aix_volume_group *new_group, short int pvNum)
+{
+//	int pvh_pos;
+
+//	struct pv_header *AIXpvh;
+
+	// The array of pointer to the logical volumes.
+	// Leave this allocation at the max permitted, the lv numbering may not be sequential so you may have gaps
+	// in the array allocation i.e. 1,2,3,4,5,6,7,8,11,15,21,33 etc. even though you only have 12 LVs.
+
+	LOG_DEBUG(" AVGDVGL Entering pvNum:%d vgda_PSN:%d\n", pvNum,
+		  new_group->vgda_psn);
+
+//	pvh_pos = AIX_PVH_DATA_PSN(new_group->vgda_psn, pvNum);
+
+/*	AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXpvh) {
+		return -ENOMEM;
+	}
+
+	memset(AIXpvh, 0, AIX_SECTOR_SIZE);
+
+	LOG_DEBUG(" AVGDVGL pvh_pos:%d\n", pvh_pos);
+
+	if (INIT_IO(logical_node, 0, pvh_pos, 1, AIXpvh)) {
+		return -EIO;
+	}
+
+	LOG_DEBUG(" AVGDVGL AIXpvh->pv_num:%d\n", pvNum);
+*/
+	if (!new_group->volume_list) {
+		new_group->volume_list =
+		    kmalloc(LVM_MAXLVS * sizeof (struct aix_logical_volume *),
+			    GFP_KERNEL);
+		if (!new_group->volume_list) {
+//			kfree(AIXpvh);
+			return -ENOMEM;
+		}
+		memset(new_group->volume_list, 0,
+		       (LVM_MAXLVS * sizeof (struct aix_logical_volume *)));
+	}
+
+	new_group->vg_id.word1 = new_group->AIXvgh->vg_id.word1;
+	new_group->vg_id.word2 = new_group->AIXvgh->vg_id.word2;
+	new_group->vg_id.word3 = new_group->AIXvgh->vg_id.word3;
+	new_group->vg_id.word4 = new_group->AIXvgh->vg_id.word4;
+//	new_group->numpvs = new_group->AIXvgh->numpvs;
+//	new_group->numlvs = new_group->AIXvgh->numlvs;
+//	new_group->lv_max = new_group->AIXvgh->maxlvs;
+	new_group->pe_size = GET_PHYSICAL_PART_SIZE(new_group->AIXvgh->pp_size) /
+				AIX_SECTOR_SIZE;
+
+//	new_group->block_size = 0;
+//	new_group->hard_sect_size = 0;
+	new_group->flags |= AIX_VG_DIRTY;
+
+//	kfree(AIXpvh);
+
+	LOG_DEBUG(" AVGDVGL Vol Group ID %x\n", new_group->vg_id.word2);
+
+	return 0;
+}
+
+/*
+ * Function: add_PV_to_volume_group
+ *
+ *  Create a new partition_list_entry for the specified volume group.
+ *  Initialize the new partition with the evms node and lvm pv information,
+ *  and add the new partition to the group's list.
+ */
+
+static int
+add_PV_to_volume_group(struct aix_volume_group *group,
+		       struct evms_logical_node *evms_partition, int pvNum)
+{
+	struct partition_list_entry *new_partition;
+
+	LOG_DEBUG(" APVVG Entering pvNum:%d\n", pvNum);
+
+	group->flags |= AIX_VG_DIRTY;
+
+	for (new_partition = group->partition_list; new_partition != NULL;
+	     new_partition = new_partition->next) {
+		if (new_partition->logical_node == evms_partition) {
+			return 0;
+		}
+	}
+
+	new_partition =
+	    kmalloc(sizeof (struct partition_list_entry), GFP_KERNEL);
+	if (!new_partition) {
+		return -ENOMEM;
+	}
+
+	memset(new_partition, 0, sizeof (struct partition_list_entry));
+
+	// Add this partition to this group's list.
+	new_partition->logical_node = evms_partition;
+	new_partition->pv_number = pvNum;
+
+	if (evms_partition->hardsector_size > group->hard_sect_size) {
+		group->hard_sect_size = evms_partition->hardsector_size;
+	}
+	if (evms_partition->block_size > group->block_size) {
+		group->block_size = evms_partition->block_size;
+	}
+
+	// Add this partition to the beginning of its group's list.
+	new_partition->next = group->partition_list;
+	group->partition_list = new_partition;
+	group->partition_count++;
+
+	LOG_DEBUG(" APVVG partition_count:%d pv_num:%d\n",
+		  group->partition_count, pvNum);
+
+	return 0;
+}
+
+/****************************************************
+*
+*
+*
+*****************************************************/
+static struct aix_volume_group *
+AIX_create_volume_group(struct evms_logical_node *logical_node,
+			struct AIXlvm_rec *AIXlvm)
+{
+	struct vg_header *AIXvgh = NULL, *AIXvgh2 = NULL;
+	struct vg_trailer *AIXvgt = NULL, *AIXvgt2 = NULL;
+	struct aix_volume_group *AIXVGLptr;
+
+	AIXvgh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgh) {
+		return NULL;
+	}
+
+	AIXvgh2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgh2) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	AIXvgt = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgt) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	AIXvgt2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgt2) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	memset(AIXvgh, 0, AIX_SECTOR_SIZE);
+	memset(AIXvgh2, 0, AIX_SECTOR_SIZE);
+	memset(AIXvgt, 0, AIX_SECTOR_SIZE);
+	memset(AIXvgt2, 0, AIX_SECTOR_SIZE);
+
+	// First time thru we want to read this in, we may only have one PV in this group, all others 
+	// may be corrupt, etc. If the info is clean we shouldn't get here.
+
+	if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[0], 1, AIXvgh)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[1], 1, AIXvgh2)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1), 1,
+	     AIXvgt)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1), 1,
+	     AIXvgt2)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+
+	LOG_DEBUG("CVG AIXvgh->vgda_psn[%d]:%d\n", 0, AIXlvm->vgda_psn[0]);
+	LOG_DEBUG("CVG AIXvgh->vgda_psn[%d]:%d\n", 1, AIXlvm->vgda_psn[1]);
+	LOG_DEBUG("CVG AIXvgt psn[%d]:%d\n", 0,(AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1));
+	LOG_DEBUG("CVG AIXvgt psn[%d]:%d\n", 1,(AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1));
+	LOG_DEBUG("CVG Allocating AIXVGLptr:size:%d \n",(int) sizeof (struct aix_volume_group));
+
+	AIXVGLptr = kmalloc(sizeof (struct aix_volume_group), GFP_KERNEL);
+	if (!AIXVGLptr) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+	memset(AIXVGLptr, 0, sizeof (struct aix_volume_group));
+
+	AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+	AIXVGLptr->flags |= AIX_VG_DIRTY;
+
+	LOG_DEBUG("CVG AIXVGLptr:%p line %d\n", AIXVGLptr, __LINE__);
+
+	AIXVGLptr->AIXvgh = kmalloc(sizeof (struct vg_header), GFP_KERNEL);
+	if (!AIXVGLptr->AIXvgh) {
+		kfree(AIXVGLptr);
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return NULL;
+	}
+	memset(AIXVGLptr->AIXvgh, 0, sizeof (struct vg_header));
+
+	LOG_DEBUG("CVG COMP TS AIXVGLptr->CleanVGInfo:%d \n",
+		  AIXVGLptr->CleanVGInfo);
+
+	if (AIXVGLptr->CleanVGInfo == AIX_PV_STATE_INVALID) {
+		if (COMPARE_TIMESTAMPS(AIXvgh->vg_timestamp, AIXvgt->timestamp)) {
+			if (COMPARE_TIMESTAMPS
+			    (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) {
+				if (COMPARE_TIMESTAMPS
+				    (AIXvgh->vg_timestamp,
+				     AIXvgh2->vg_timestamp)) {
+					// All timestamps match. Yea!
+					AIXVGLptr->CleanVGInfo =
+					    AIX_PV_STATE_VALID;
+				} else {
+					// Both VGDAs are good, but timestamps are
+					// different. Can't tell yet which one is
+					// correct. 
+					AIXVGLptr->CleanVGInfo =
+					    AIX_PV_STATE_EITHER_VGDA;
+				}
+			} else {
+				// First VGDA is good, second is bad.
+				AIXVGLptr->CleanVGInfo =
+				    AIX_PV_STATE_FIRST_VGDA;
+			}
+		} else {
+			if (COMPARE_TIMESTAMPS
+			    (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) {
+				// First VGDA is bad, second is good.
+				AIXVGLptr->CleanVGInfo =
+				    AIX_PV_STATE_SECOND_VGDA;
+			} else if (AIXvgh->numpvs == 1) {	// We only have 1 PV in this group, mismatch or not this will have to do 
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_VALID;
+			} else {
+				// This should never happen.
+				LOG_DEBUG("All four VG timestamps for %d are different. What happened?!?\n",
+				     AIXVGLptr->vg_id.word2);
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+
+			}
+		}
+
+		LOG_DEBUG("CVG SWITCH TS AIXVGLptr->CleanVGInfo:%d \n",
+			  AIXVGLptr->CleanVGInfo);
+
+		switch (AIXVGLptr->CleanVGInfo) {
+		case AIX_PV_STATE_VALID:
+		case AIX_PV_STATE_FIRST_VGDA:
+
+			LOG_DEBUG("CVG SWITCH VALID %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  (int) sizeof (struct vg_header));
+
+			AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh);	// Get the info. we need
+
+			AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0];
+			AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			break;
+
+		case AIX_PV_STATE_SECOND_VGDA:
+			LOG_DEBUG("CVG SWITCH SECOND VGDA %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  (int) sizeof (struct vg_header));
+
+			AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh2);	// Get the info. we need
+
+			AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[1];
+			AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			break;
+
+		case AIX_PV_STATE_EITHER_VGDA:
+			LOG_DEBUG("CVG SWITCH EITHER VGDA %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,(int) sizeof (struct vg_header));
+			if (COMPARE_UNIQUE_IDS(AIXvgh->vg_id, AIXvgh2->vg_id)) {
+
+				AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh);	// Get the info. we need
+
+				AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0];
+				AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			} else {
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+				// Not sure where this PV belongs. It thinks it is
+				// supposed to be in two different containers. We will
+				// probably need to put this on a separate, temporary
+				// list, and determine later which container is missing
+				// a PV.
+			}
+			break;
+
+		default:
+			LOG_ERROR("Invalid PV state (%d) for %d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  AIXVGLptr->vg_id.word2);
+			AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+			break;
+		}
+
+	}
+
+    // Currently AIX Big VGDA is not supported - cleanup and return NULL so this VG doesn't get added
+
+    if (AIXVGLptr->AIXvgh->bigvg != 0) {
+        LOG_SERIOUS("Error creating Volume Group AIX Big VGDA is not currently supported\n");
+        if (AIXVGLptr->AIXvgh) {
+            kfree(AIXVGLptr->AIXvgh);
+            AIXVGLptr->AIXvgh = NULL;
+        }
+
+        if (AIXVGLptr) {
+            kfree(AIXVGLptr);
+            AIXVGLptr = NULL;
+        }
+
+        AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+        return NULL;
+    }
+
+	add_VG_data_to_VG_list(logical_node, AIXVGLptr, AIXlvm->pv_num);
+
+	AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+
+	LOG_DEBUG("CVG Exiting CleanVGInfo:%d\n", AIXVGLptr->CleanVGInfo);
+
+	return AIXVGLptr;
+}
+
+/****************************************************
+*
+*
+*
+*****************************************************/
+static int
+AIX_update_volume_group(struct aix_volume_group *AIXVGLptr,
+			struct evms_logical_node *logical_node,
+			struct AIXlvm_rec *AIXlvm)
+{
+	struct vg_header *AIXvgh = NULL, *AIXvgh2 = NULL;
+	struct vg_trailer *AIXvgt = NULL, *AIXvgt2 = NULL;
+
+	AIXvgh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgh) {
+		return -ENOMEM;
+	}
+
+	AIXvgh2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgh2) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	AIXvgt = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgt) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	AIXvgt2 = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXvgt2) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	// First time thru we want to read this in, we may only have one PV in this group, all others 
+	// may be corrupt, etc. If the info is clean we shouldn't get here.
+
+	if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[0], 1, AIXvgh)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	if (INIT_IO(logical_node, 0, AIXlvm->vgda_psn[1], 1, AIXvgh2)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1), 1,
+	     AIXvgt)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	if (INIT_IO(logical_node, 0, (AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1), 1,
+	     AIXvgt2)) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+
+	LOG_DEBUG("UVG AIXvgh->vgda_psn[%d]:%d\n", 0, AIXlvm->vgda_psn[0]);
+	LOG_DEBUG("UVG AIXvgh->vgda_psn[%d]:%d\n", 1, AIXlvm->vgda_psn[1]);
+	LOG_DEBUG("UVG AIXvgt psn[%d]:%d\n", 0,(AIXlvm->vgda_psn[0] + AIXlvm->vgda_len - 1));
+	LOG_DEBUG("UVG AIXvgt psn[%d]:%d\n", 1,(AIXlvm->vgda_psn[1] + AIXlvm->vgda_len - 1));
+
+	AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+	AIXVGLptr->flags |= AIX_VG_DIRTY;
+
+	LOG_DEBUG("UVG AIXVGLptr:%p line %d\n", AIXVGLptr, __LINE__);
+
+	AIXVGLptr->AIXvgh = kmalloc(sizeof (struct vg_header), GFP_KERNEL);
+	if (!AIXVGLptr->AIXvgh) {
+		AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+		return -ENOMEM;
+	}
+	memset(AIXVGLptr->AIXvgh, 0, sizeof (struct vg_header));
+
+	LOG_DEBUG("UVG COMP TS AIXVGLptr->CleanVGInfo:%d \n",AIXVGLptr->CleanVGInfo);
+
+	if (AIXVGLptr->CleanVGInfo == AIX_PV_STATE_INVALID) {
+		if (COMPARE_TIMESTAMPS(AIXvgh->vg_timestamp, AIXvgt->timestamp)) {
+			if (COMPARE_TIMESTAMPS
+			    (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) {
+				if (COMPARE_TIMESTAMPS
+				    (AIXvgh->vg_timestamp,
+				     AIXvgh2->vg_timestamp)) {
+					// All timestamps match. Yea!
+					AIXVGLptr->CleanVGInfo =
+					    AIX_PV_STATE_VALID;
+				} else {
+					// Both VGDAs are good, but timestamps are
+					// different. Can't tell yet which one is
+					// correct. 
+					AIXVGLptr->CleanVGInfo =
+					    AIX_PV_STATE_EITHER_VGDA;
+				}
+			} else {
+				// First VGDA is good, second is bad.
+				AIXVGLptr->CleanVGInfo =
+				    AIX_PV_STATE_FIRST_VGDA;
+			}
+		} else {
+			if (COMPARE_TIMESTAMPS
+			    (AIXvgh2->vg_timestamp, AIXvgt2->timestamp)) {
+				// First VGDA is bad, second is good.
+				AIXVGLptr->CleanVGInfo =
+				    AIX_PV_STATE_SECOND_VGDA;
+			} else if (AIXvgh->numpvs == 1) {	// We only have 1 PV in this group, mismatch or not this will have to do 
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_VALID;
+			} else {
+				// This should never happen.
+				LOG_DEBUG
+				    ("All four VG timestamps for %d are different. What happened?!?\n",
+				     AIXVGLptr->vg_id.word2);
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+
+			}
+		}
+
+		LOG_DEBUG("UVG SWITCH TS AIXVGLptr->CleanVGInfo:%d \n",
+			  AIXVGLptr->CleanVGInfo);
+
+		switch (AIXVGLptr->CleanVGInfo) {
+		case AIX_PV_STATE_VALID:
+		case AIX_PV_STATE_FIRST_VGDA:
+
+			LOG_DEBUG("UVG SWITCH VALID %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  (int) sizeof (struct vg_header));
+
+			AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh);	// Get the info. we need
+
+			AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0];
+			AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			break;
+
+		case AIX_PV_STATE_SECOND_VGDA:
+			LOG_DEBUG("UVG SWITCH SECOND VGDA %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  (int) sizeof (struct vg_header));
+
+			AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh2);	// Get the info. we need
+
+			AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[1];
+			AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			break;
+
+		case AIX_PV_STATE_EITHER_VGDA:
+			LOG_DEBUG("UVG SWITCH EITHER VGDA %d size:%d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  (int) sizeof (struct vg_header));
+			if (COMPARE_UNIQUE_IDS(AIXvgh->vg_id, AIXvgh2->vg_id)) {
+
+				AIX_copy_header_info(AIXVGLptr->AIXvgh, AIXvgh);	// Get the info. we need
+
+				AIXVGLptr->vgda_psn = AIXlvm->vgda_psn[0];
+				AIXVGLptr->vgda_len = AIXlvm->vgda_len;
+			} else {
+				AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+				// Not sure where this PV belongs. It thinks it is
+				// supposed to be in two different containers. We will
+				// probably need to put this on a separate, temporary
+				// list, and determine later which container is missing
+				// a PV.
+			}
+			break;
+
+		default:
+			LOG_ERROR("UVG Invalid PV state (%d) for %d\n",
+				  AIXVGLptr->CleanVGInfo,
+				  AIXVGLptr->vg_id.word2);
+			AIXVGLptr->CleanVGInfo = AIX_PV_STATE_INVALID;
+			break;
+		}
+
+	}
+
+//	add_VG_data_to_VG_list(logical_node, AIXVGLptr, AIXlvm->pv_num);
+	AIXVGLptr->flags |= AIX_VG_DIRTY;
+
+	AIX_free_headers(AIXvgh, AIXvgh2, AIXvgt, AIXvgt2);
+
+	LOG_DEBUG("UVG Exiting CleanVGInfo:%d\n", AIXVGLptr->CleanVGInfo);
+
+	return 0;
+}
+
+/****************************************************
+* Function: check_volume_groups
+*
+* We just want to make sure the volume groups have found
+* all their drives.
+*
+* If not, we'll continue and build what we can
+*****************************************************/
+static int
+check_volume_groups(void)
+{
+	struct aix_volume_group *group;
+	struct aix_volume_group *next_group;
+//	struct partition_list_entry *partitions;
+//	int NumPVS = 0;
+
+	LOG_DEBUG("CHVG Checking volume groups:\n");
+
+
+	for (group = AIXVolumeGroupList; group; group = next_group) {
+		next_group = group->next;
+
+		if (group->flags & AIX_VG_DIRTY){
+			if (group->AIXvgh->numlvs == 0) {
+				remove_group_from_list(group);
+				deallocate_volume_group(group);
+			} else {
+				if (group->partition_count != group->AIXvgh->numpvs) {
+					group->flags |= AIX_VG_INCOMPLETE;
+					LOG_ERROR("CHVG Found incomplete VG !! flags:%x\n",
+						  group->flags);
+					LOG_ERROR("CHVG Found %d PVs should have %d PVs\n",
+						  group->partition_count, group->AIXvgh->numpvs);
+				}
+			}
+		}
+	}
+
+	LOG_DEBUG("CHVG Finished Checking volume groups:\n");
+	return 0;
+
+}
+
+/************************************************************************
+ * Function: discover_logical_volumes
+ *
+ *  After all PVs have been claimed and added to the appropriate VG list,
+ *  the volumes for each VG must be constructed.
+ *
+ *
+ */
+static int
+discover_logical_volumes(void)
+{
+
+	struct aix_volume_group *AIXVGLPtr;
+	struct aix_logical_volume *new_LV;
+	struct partition_list_entry *partition;
+	struct evms_logical_node *node;
+	struct lv_entries *AIXlvent, *AIXlventHead;
+	int j, lv_found, all_lvs_found, rc;
+	struct namelist *AIXnamelist;
+	char *NameBuffer;
+
+	AIXlventHead =
+	    kmalloc(MAX_SECTORS_LV_ENTRIES * AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXlventHead) {
+		return -ENOMEM;
+	}
+
+	memset(AIXlventHead, 0, (MAX_SECTORS_LV_ENTRIES * AIX_SECTOR_SIZE));
+
+	NameBuffer =
+	    kmalloc(MAX_SECTORS_NAMELIST * AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!NameBuffer) {
+		kfree(AIXlventHead);
+		return -ENOMEM;
+	}
+
+	memset(NameBuffer, 0, (MAX_SECTORS_NAMELIST * AIX_SECTOR_SIZE));
+
+	for (AIXVGLPtr = AIXVolumeGroupList; AIXVGLPtr;
+		AIXVGLPtr = AIXVGLPtr->next ) {
+
+		partition = AIXVGLPtr->partition_list;
+
+		if (!(AIXVGLPtr->flags & AIX_VG_DIRTY)) {
+			continue;
+		}
+
+		if (partition == NULL) {
+			continue;
+		}
+
+		node = partition->logical_node;
+
+		if (node == NULL) {
+			continue;
+		}
+
+		LOG_DEBUG("DLV INIT_IO AIXNameList position:%d\n",
+			  ((AIXVGLPtr->vgda_psn + AIXVGLPtr->vgda_len) - 1 -
+			   MAX_SECTORS_NAMELIST));
+		LOG_DEBUG("AIXVGLPTR:%p partition:%p node:%p \n", AIXVGLPtr,
+			  partition, node);
+
+		if (INIT_IO(node, 0,
+		     ((AIXVGLPtr->vgda_psn + AIXVGLPtr->vgda_len) - 1 -
+		      MAX_SECTORS_NAMELIST), MAX_SECTORS_NAMELIST,
+		     NameBuffer)) {
+			continue;
+		}
+
+		LOG_DEBUG("DLV INIT_IO AIXNameList\n");
+
+		if (INIT_IO(node, 0, AIXVGLPtr->vgda_psn + PSN_LVE_REC,
+		     MAX_SECTORS_LV_ENTRIES, AIXlventHead)) {
+			continue;
+		}
+		AIXlvent = AIXlventHead;
+		AIXnamelist = (struct namelist *) NameBuffer;
+
+		LOG_DEBUG("DLV INIT_IO AIXlvent\n");
+		// Search through the LV structs for valid LV entries
+		// We're just going to search until all valid LVs are found
+		// The max. allowable LVs is 256 and we want don't want to
+		// search for 255 if only 8 are defined 1-8 however, there 
+		// could be gaps in the LV numbering. i.e 1,2,3,4,5,6,7,8, 27,43, etc.
+
+		for (j = 0, lv_found = 0, all_lvs_found = FALSE;
+		     !all_lvs_found && j < LVM_MAXLVS; j++, AIXlvent++) {
+
+			LOG_DEBUG(" ** DVIG:lv_size:%d lvname:[%s] j:%d lv_number:%d ** \n",
+			     AIXlvent->num_lps, AIXnamelist->name[j], j,
+			     AIXlvent->lvname);
+			LOG_DEBUG(" DVIG:stripe_exp:%u stripesize:%u lv_status:%d\n",
+			     AIXlvent->striping_width,
+			     GET_PHYSICAL_PART_SIZE(AIXlvent->stripe_exp),
+			     AIXlvent->lv_state);
+			LOG_DEBUG(" DVIG Group:%x.Access:%x\n",
+				  (unsigned int) AIXVGLPtr->vg_id.word2,
+				  AIXlvent->permissions);
+			LOG_DEBUG(" DVIG mirror:%d mirror_policy:%d mirwrt:%d \n",
+			     AIXlvent->mirror, AIXlvent->mirror_policy,
+			     AIXlvent->mirwrt_consist);
+
+			// This is the same check we used in "diskedit" and "readdisk"
+			if (AIXlvent->lv_state == 0 ||
+			    AIXlvent->permissions > 0x10) {
+				continue;
+			}
+
+       			lv_found++;
+       			if (lv_found == AIXVGLPtr->AIXvgh->numlvs) {
+       				all_lvs_found = TRUE;
+       			}
+
+       			LOG_DEBUG(" DVIG lv_found:%d all_lvs_found:%d \n",
+       			     lv_found, all_lvs_found);
+
+       			// Create a new logical volume and place it in the appropriate
+       			// spot in this VG's volume list. For re-discovery, make sure
+       			// this volume does not already exist.
+       			if (!AIXVGLPtr->volume_list[AIXlvent->lvname]) {
+       				new_LV =
+       				    new_logical_volume(AIXlvent,
+       						       AIXVGLPtr,
+       						       AIXnamelist->
+       						       name[j],
+       						       GET_PHYSICAL_PART_SIZE
+       						       (AIXlvent->
+       							stripe_exp));
+       				if (!new_LV) {
+       					continue;
+       				}
+       				LOG_DEBUG(" DVIG Adding new logical volume %d to group:%x \n",
+       				     new_LV->lv_number,AIXVGLPtr->vg_id.word2);
+
+       				AIXVGLPtr->volume_list[new_LV->lv_number] = new_LV;
+       			} else {
+       				LOG_DEBUG("DVIG Updating Vol Exists\n");
+       			}
+		}
+
+		// Build the le_to_pe_map for each volume that was discovered above.
+		// This has to be done after all volumes in the group are discovered
+		if ((rc = build_pe_maps(AIXVGLPtr))) {
+			continue;
+		}
+
+		check_log_volume_and_pe_maps(AIXVGLPtr);
+	}
+
+	kfree(NameBuffer);
+	kfree(AIXlventHead);
+
+	return 0;
+}
+
+/*
+ * Function: new_logical_volume
+ *
+ *  Allocate space for a new LVM logical volume, including space for the
+ *  PE map 
+ */
+static struct aix_logical_volume *
+new_logical_volume(struct lv_entries *AIXlvent,
+		   struct aix_volume_group *volume_group,
+		   char *lv_name, u32 stripesize)
+{
+
+	struct aix_logical_volume *new_volume;
+	const char *name = "evms_AIXiod";
+	const char *resync_name = "evms_AIXresync";
+
+	LOG_DEBUG(" NLV: lv_number:%d lv_allocated_le:%d lv_size:%d\n",
+		  AIXlvent->lvname, AIXlvent->num_lps,
+		  AIXlvent->num_lps * volume_group->pe_size);
+
+	// Allocate space for the new logical volume.
+	new_volume = kmalloc(sizeof (struct aix_logical_volume), GFP_KERNEL);
+	if (!new_volume) {
+		return NULL;
+	}
+	memset(new_volume, 0, sizeof (struct aix_logical_volume));
+
+	// Allocate space for the LE to PE mapping table
+	// We add 1 for the allocated le to ease mapping later on, all AIX le are 1 based
+	new_volume->le_to_pe_map =
+	    kmalloc((AIXlvent->num_lps + 1) * sizeof (struct pe_table_entry),
+		    GFP_KERNEL);
+	if (!new_volume->le_to_pe_map) {
+		delete_logical_volume(new_volume);
+		return NULL;
+	}
+
+	memset(new_volume->le_to_pe_map, 0,
+	       (AIXlvent->num_lps + 1) * sizeof (struct pe_table_entry));
+
+	if (AIXlvent->mirror > AIX_DEFAULT_MIRRORING) {
+		new_volume->le_to_pe_map_mir1 =
+		    kmalloc((AIXlvent->num_lps +
+			     1) * sizeof (struct pe_table_entry), GFP_KERNEL);
+		if (!new_volume->le_to_pe_map_mir1) {
+			delete_logical_volume(new_volume);
+			return NULL;
+		}
+		memset(new_volume->le_to_pe_map_mir1, 0,
+		       (AIXlvent->num_lps +
+			1) * sizeof (struct pe_table_entry));
+	}
+
+	if (AIXlvent->mirror == AIX_MAX_MIRRORS) {
+		new_volume->le_to_pe_map_mir2 =
+		    kmalloc((AIXlvent->num_lps + 1) 
+		    * sizeof (struct pe_table_entry), GFP_KERNEL);
+		if (!new_volume->le_to_pe_map_mir2) {
+			delete_logical_volume(new_volume);
+			return NULL;
+		}
+		memset(new_volume->le_to_pe_map_mir2, 0,
+		       (AIXlvent->num_lps +1)
+		       * sizeof (struct pe_table_entry));
+	}
+
+	// Initialize the rest of the new volume.
+	new_volume->lv_number = AIXlvent->lvname;
+	new_volume->lv_size = AIXlvent->num_lps * (volume_group->pe_size);
+	new_volume->lv_access = AIXlvent->permissions | EVMS_LV_NEW;	// All volumes start new.
+	new_volume->lv_status = AIXlvent->lv_state;
+	//new_volume->lv_minor          = MINOR(1);
+	new_volume->mirror_copies = AIXlvent->mirror;
+//	new_volume->mirror_iterations = AIX_DEFAULT_MIRRORING;
+	new_volume->stripes = AIXlvent->striping_width;
+	new_volume->stripe_size = stripesize;
+	new_volume->stripe_size_shift = evms_cs_log2(stripesize);
+	new_volume->pe_size = volume_group->pe_size;
+	new_volume->pe_size_shift = evms_cs_log2(volume_group->pe_size);
+	new_volume->num_le = AIXlvent->num_lps;
+//	new_volume->new_volume = TRUE;
+	new_volume->group = volume_group;
+
+	volume_group->numlvs++;
+
+	sprintf(new_volume->name, "aix/%s", lv_name);
+
+	if (!AIX_BH_list_pool
+	    && new_volume->mirror_copies > AIX_DEFAULT_MIRRORING) {
+
+		// We only need the ReSync thread if we have at least one mirrored LV.
+		// You can't ReSync a non-mirrored drive
+
+		AIX_BH_list_pool =
+		    evms_cs_create_pool(sizeof (struct aix_mirror_bh),
+					"EVMS_AIX_BH", aix_notify_cache_ctor,
+					NULL);
+		if (!AIX_BH_list_pool) {
+			return NULL;
+
+			AIX_mirror_read_retry_thread =
+			    evms_cs_register_thread(AIXiod, NULL, name);
+
+			AIX_mirror_resync_thread =
+			    evms_cs_register_thread(AIXresync, NULL,
+						    resync_name);
+		}
+	}
+
+	LOG_DEBUG("NLV lv_number:%d name:%s lv_size " PFU64 " \n",
+		  new_volume->lv_number, new_volume->name, new_volume->lv_size);
+	LOG_DEBUG("NLV stripe_size:%d stripe_size_shift:%d\n",
+		  new_volume->stripe_size, new_volume->stripe_size_shift);
+
+	return new_volume;
+}
+
+/* 
+ * Function: aix_notify_cache_ctor
+ * this function initializes the b_wait field in the buffer heads
+ * in our private buffer head pool.
+ */
+static void
+aix_notify_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		struct aix_mirror_bh *rbh = (struct aix_mirror_bh *) foo;
+		memset(rbh, 0, sizeof (struct aix_mirror_bh));
+		rbh->remaining = (atomic_t) ATOMIC_INIT(0);
+		init_waitqueue_head(&rbh->bh_req.b_wait);
+	}
+}
+
+/*
+ * Function: build_pe_maps
+ *
+ *  After all logical volumes have been discovered, the mappings from
+ *  logical extents to physical extents must be constructed. Each PV
+ *  contains a map on-disk of its PEs. Each PE map entry contains the
+ *  logical volume number and the logical extent number on that volume.
+ *  Our internal map is the reverse of this map for each volume, listing
+ *  the PV node and sector offset for every logical extent on the volume.
+ */
+static int
+    build_pe_maps(struct aix_volume_group *volume_group)
+{
+	struct partition_list_entry *partition;
+	struct partition_list_entry *mirror_partition;
+	struct pp_entries *AIXppent, *AIXppent_buff;
+	struct pv_header *AIXpvh;
+	u64 offset;
+	u32 le_number;
+	u32 j, pp_count, pvh_pos;
+	u32 MirrorFound;
+    u32 pvh_posn[LVM_MAXPVS];
+    u32 rc;
+#ifdef EVMS_DEBUG_MIRRORS
+	u32 lv_found, all_lvs_found;
+	u32 mirs = 0;
+#endif
+
+	LOG_DEBUG(" *** BPEM ***\n");
+	// For every partition in this VG
+
+	AIXppent_buff = kmalloc(AIX_SECTOR_SIZE * PHYS_VOL_OFFSET, GFP_KERNEL);
+	if (!AIXppent_buff) {
+		return -ENOMEM;
+	}
+
+	memset(AIXppent_buff, 0, AIX_SECTOR_SIZE * PHYS_VOL_OFFSET);
+    memset(pvh_posn, 0, LVM_MAXPVS);
+
+	AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXpvh) {
+		kfree(AIXppent_buff);
+		return -ENOMEM;
+	}
+
+	memset(AIXpvh, 0, AIX_SECTOR_SIZE);
+
+	LOG_DEBUG(" BPEM AIXppent_buff:%d \n",
+		  (AIX_SECTOR_SIZE * PHYS_VOL_OFFSET));
+
+    // This next section is to calculate the sector spacing between PV info for the VG
+    // AIX doesn't always space the info. the same. It could be 17 or 34 sectors apart
+    // depending on the PE size selected.
+
+    rc = AIX_pvh_data_posn(volume_group->vgda_psn, pvh_posn, volume_group->partition_list, volume_group->AIXvgh->numpvs);
+
+    if (rc != 0) {
+        kfree(AIXppent_buff);
+        kfree(AIXpvh);
+        return (rc);
+    }
+
+	for (partition = volume_group->partition_list; partition;
+	    partition = partition->next) {
+
+		LOG_DEBUG(" BPEM partition:%p next:%p\n", partition,
+			  partition->next);
+
+		pvh_pos = pvh_posn[partition->pv_number];
+
+		LOG_DEBUG(" BPEM pvh_pos:%d pv_number:%d\n", pvh_pos, partition->pv_number);
+
+		if (INIT_IO(partition->logical_node, 0, pvh_pos, 1, AIXpvh)) {
+			kfree(AIXppent_buff);
+			kfree(AIXpvh);
+			return -EIO;
+		}
+		// For every entry in the PE map, calculate the PE's sector offset
+		// and update the correct LV's PE map. LV number of 0 marks an unused PE.
+		// For re-discovery, only compute entries for new volumes.
+
+		if (INIT_IO(partition->logical_node, 0, pvh_pos, AIX_PVHPP_LENGTH,
+			    AIXppent_buff)) {
+			kfree(AIXppent_buff);
+			kfree(AIXpvh);
+			return -EIO;
+		}
+
+		AIXppent = AIXppent_buff;
+		AIXppent++;
+
+		pp_count = AIXpvh->pp_count;
+
+        LOG_DEBUG("BPEM AIXpvh data: pp_count:%d psn_part1:%d pv_id1:%d pv_id2:%d pv_id3:%d pv_id4:%d pv_num:%d pv_state:%d vgdas:%d res1:%d res2:%d\n", AIXpvh->pp_count, 
+                                     AIXpvh->psn_part1, 
+                                     AIXpvh->pv_id.word1,
+                                     AIXpvh->pv_id.word2,
+                                     AIXpvh->pv_id.word3,
+                                     AIXpvh->pv_id.word4,
+                                     AIXpvh->pv_num,
+                                    AIXpvh->pv_state, AIXpvh->pvnum_vgdas, AIXpvh->res1, AIXpvh->res2);
+
+		LOG_DEBUG(" PE Map: volgrp:%x AIXpvh->pv_num:%d partition:%p next:%p lv_index:%d pp_count:%d\n",
+		     volume_group->vg_id.word2, AIXpvh->pv_num, partition,
+		     partition->next, AIXppent->lv_index, pp_count);
+
+		for (j = 0; j < pp_count; j++,AIXppent++) {
+			if (!AIXppent->lv_index || AIXppent->pp_state == AIX_LVM_LVUNDEF) {
+				continue;
+			}
+
+			LOG_EXTRA(" -- pv:%x pp:%d st:%d nm:%s lv:%d lp:%d cp:%d fst v:%d fst p:%d snd v:%d snd p:%d \n",
+			     volume_group->vg_id.word2, j + 1,
+			     AIXppent->pp_state,
+			     volume_group->volume_list[AIXppent->lv_index -1]->name,
+			     AIXppent->lv_index, AIXppent->lp_num,
+			     AIXppent->copy, AIXppent->fst_alt_vol,
+			     AIXppent->fst_alt_part,
+			     AIXppent->snd_alt_vol,
+			     AIXppent->snd_alt_part);
+
+			le_number = AIXppent->lp_num - 1;	// AIX lp's start @ 1, we want a 0 index
+			offset = ((j * (volume_group->pe_size)) + AIXpvh->psn_part1);
+
+			LOG_DEBUG(" PE Map: le_number:%d partition:%p lv_index:%d lv_name:%s\n",
+			     le_number, partition, AIXppent->lv_index,
+			     volume_group->volume_list[AIXppent->lv_index -1]->name);
+
+			if (!volume_group->volume_list[AIXppent->lv_index - 1]) {
+				LOG_SERIOUS("Failed attempt to access volume without memory allocation lv:%d\n",
+				     AIXppent->lv_index - 1);
+				continue;
+			}
+
+       			if (volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map
+       			    && le_number <= volume_group->volume_list[AIXppent->lv_index - 1]->num_le) {
+
+				volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].owning_pv = partition;
+				volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].pe_sector_offset = offset;
+				volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map[le_number].pp_state = AIXppent->pp_state;
+       			}
+
+       			if (volume_group->volume_list[AIXppent->lv_index -1]->mirror_copies >
+       			    AIX_DEFAULT_MIRRORING) {
+
+       				LOG_EXTRA(" PE Map: Mirror found lv:%d -- \n",
+       				     AIXppent->lv_index);
+
+       				for (mirror_partition = volume_group->partition_list,
+					MirrorFound = FALSE;
+					mirror_partition && !MirrorFound;
+					mirror_partition = mirror_partition->next) {
+
+       					if (mirror_partition->pv_number == AIXppent->fst_alt_vol) {
+
+       						offset = (((AIXppent->fst_alt_part - 1) * (volume_group->pe_size)) + AIXpvh->psn_part1);
+
+       						volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].owning_pv = mirror_partition;
+       						volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].pe_sector_offset = offset;
+       						volume_group->volume_list[AIXppent->lv_index -1]->le_to_pe_map_mir1[le_number].pp_state = AIXppent->pp_state;
+
+       						LOG_EXTRA(" PE Map: mirror_partition:%p \n",
+       						     mirror_partition);
+       						LOG_EXTRA(" PE Map: mirror_sector_offet:%d\n",
+       						     AIXppent->fst_alt_part);
+
+       						MirrorFound = TRUE;
+       					}
+       				}
+
+       				if (volume_group->volume_list[AIXppent->lv_index -1]->mirror_copies == AIX_MAX_MIRRORS) {
+
+       					for (mirror_partition = volume_group->partition_list,
+       					     MirrorFound = FALSE;
+       					     mirror_partition && !MirrorFound;
+       					     mirror_partition = mirror_partition->next) {
+
+       						if (mirror_partition->pv_number == AIXppent->snd_alt_vol) {
+
+       							offset = (((AIXppent->snd_alt_part - 1) * (volume_group->pe_size)) + AIXpvh->psn_part1);
+
+       							volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].owning_pv = mirror_partition;
+       							volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].pe_sector_offset = offset;
+       							volume_group->volume_list[AIXppent->lv_index-1]->le_to_pe_map_mir2[le_number].pp_state = AIXppent->pp_state;
+
+       							LOG_EXTRA(" PE Map: mirror_partition2:%p \n",
+       							     mirror_partition);
+       							LOG_EXTRA(" PE Map: mirror_sector_offet2:%d\n",
+       							     AIXppent->snd_alt_part);
+
+       							MirrorFound = TRUE;
+       						}
+       					}
+       				}
+
+       			}	// End of if mirroring is enabled 
+		}
+	}
+
+//      LOG_EXTRA(" PE Map: PE maps:%d Mirror count:%d -- \n", lvs, mirs);
+
+#ifdef EVMS_DEBUG_MIRRORS
+	for (mirs = 0, lv_found = 0, all_lvs_found = FALSE;
+	    !all_lvs_found && mirs < LVM_MAXLVS; mirs++) {
+
+		if (volume_group->volume_list[mirs] != NULL) {
+			if (volume_group->volume_list[mirs]->lv_status ==
+			    LV_ACTIVE) {
+
+				lv_found++;
+
+				LOG_DEBUG(" PE Map: owning part lv %d -- %p\n",
+					  mirs,
+					  volume_group->volume_list[mirs]->
+					  le_to_pe_map[0].owning_pv);
+				if (volume_group->volume_list[mirs]->
+				    mirror_copies > AIX_DEFAULT_MIRRORING) {
+					LOG_DEBUG(" PE Map: mirror_partition lv %d -- %p \n",
+					     mirs,
+					     volume_group->volume_list[mirs]->
+					     le_to_pe_map_mir1[0].owning_pv);
+				}
+				if (volume_group->volume_list[mirs]->
+				    mirror_copies == AIX_MAX_MIRRORS) {
+					LOG_DEBUG(" PE Map: mirror_partition lv %d -- %p \n",
+					     mirs,
+					     volume_group->volume_list[mirs]->
+					     le_to_pe_map_mir2[0].owning_pv);
+				}
+			}
+			if (lv_found == volume_group->AIXvgh->numlvs) {
+				all_lvs_found = TRUE;
+				LOG_DEBUG(" PE Map: all_lvs_found\n");
+			}
+		}
+	}
+#endif
+
+	kfree(AIXpvh);
+	kfree(AIXppent_buff);
+
+	return 0;
+}
+
+/*
+ * Function: check_log_volume_and_pe_maps
+ *
+ *  Make sure all volumes in this group have valid LE-to-PE maps.
+ *  Any volume that doesn't is deleted. This is safe for re-discovery
+ *  because only new volumes could have corrupted PE maps.
+ */
+static int
+check_log_volume_and_pe_maps(struct aix_volume_group *group)
+{
+	struct aix_logical_volume *volume;
+	int i, j, lv_found, all_lvs_found;
+
+	LOG_DEBUG(" check_pe_map.\n");
+
+	for (i = 0, all_lvs_found = FALSE, lv_found = 0;
+	     !all_lvs_found && i < LVM_MAXLVS; i++) {
+		if (!group->volume_list[i]) {
+			LOG_DEBUG(" CPEM No Volume %d found \n", i);
+			continue;
+		}
+
+		volume = group->volume_list[i];
+		if (!volume->le_to_pe_map) {
+			LOG_DEBUG(" CPEM Volume %s has no PE map.\n",
+				  volume->name);
+			delete_logical_volume(volume);
+			continue;
+		}
+
+		LOG_DEBUG(" CPEM volume %s num_le: %d \n", volume->name,
+			  volume->num_le);
+
+		lv_found++;
+
+		if (lv_found == group->AIXvgh->numlvs) {
+			all_lvs_found = TRUE;
+		}
+
+		for (j = 0; j < volume->num_le; j++) {
+			if (!volume->le_to_pe_map[j].owning_pv ||
+			    !volume->le_to_pe_map[j].pe_sector_offset) {
+				LOG_SERIOUS(" CPEM Volume (%s) incomplete PE map (LE %d) \n",
+				     volume->name, j);
+				volume->lv_access |= EVMS_LV_INCOMPLETE;
+			}
+
+			if (volume->mirror_copies > AIX_DEFAULT_MIRRORING) {
+				if (!volume->le_to_pe_map_mir1[j].owning_pv ||
+				    !volume->le_to_pe_map_mir1[j].
+				    pe_sector_offset) {
+					LOG_SERIOUS(" CPEM Volume (%s) incomplete PE mirror map 1 (LE %d) \n",
+					     volume->name, j);
+					volume->lv_access |= EVMS_LV_INCOMPLETE;
+				}
+
+				if (volume->mirror_copies == AIX_MAX_MIRRORS) {
+					if (!volume->le_to_pe_map_mir2[j].
+					    owning_pv
+					    || !volume->le_to_pe_map_mir2[j].
+					    pe_sector_offset) {
+						LOG_SERIOUS(" CPEM Volume (%s) incomplete PE mirror map 2 (LE %d) \n",
+						     volume->name, j);
+						volume->lv_access |= EVMS_LV_INCOMPLETE;
+					}
+				}
+			}
+		}
+	}
+
+	LOG_EXTRA(" Leaving check_pe_map.\n");
+	return 0;
+}
+
+/*
+ * Function: export_volumes
+ *
+ *  The last thing this VGE must do is take each constructed volume and
+ *  place it back on the evms logical partition list.
+ */
+static int
+export_volumes(struct evms_logical_node **evms_partition_list)
+{
+	struct aix_volume_group *AIXVGLPtr;
+	struct evms_logical_node *new_node;
+	struct aix_logical_volume *volume;
+	int j, lv_found, all_lvs_found;
+	int count = 0;
+
+	for (AIXVGLPtr = AIXVolumeGroupList; AIXVGLPtr; AIXVGLPtr = AIXVGLPtr->next) {
+
+		if (!(AIXVGLPtr->flags & AIX_VG_DIRTY)) {
+			LOG_DEBUG(" EV Existing group(%d), not dirty, skipping\n",
+				  AIXVGLPtr->vg_id.word2);
+			continue;
+		}
+       		LOG_DEBUG(" Exporting all new volumes numpvs:%d numlvs:%d \n",
+       		     AIXVGLPtr->AIXvgh->numpvs, AIXVGLPtr->numlvs);
+
+       		// Export every valid volume in the group. For re-discovery,
+       		// make sure we are only exporting "new" volumes.
+
+       		for (j = 0, all_lvs_found = FALSE, lv_found = 0;
+       		     !all_lvs_found && j < LVM_MAXLVS; j++) {
+       			if (AIXVGLPtr->volume_list[j] != NULL) {
+       				if (AIXVGLPtr->volume_list[j]->lv_access & EVMS_LV_NEW) {
+
+       					LOG_DEBUG(" EV Checking LV:[%d] volume:%p\n",
+						     j,AIXVGLPtr->volume_list[j]);
+
+       					volume = AIXVGLPtr->volume_list[j];
+       					lv_found++;
+
+       					if (lv_found == AIXVGLPtr->AIXvgh->numlvs) {
+       						all_lvs_found = TRUE;
+       					}
+       					// For new volumes, create a new EVMS node and 
+       					// initialize the appropriate fields.
+       					if (evms_cs_allocate_logical_node(&new_node)) {
+       						LOG_DEBUG(" Export Vol Error allocating node !!\n");
+       						continue;
+       					} else {
+       						LOG_DEBUG(" EV Node allocated OK\n");
+       					}
+
+//       					volume->new_volume = 0;
+       					volume->volume_node = new_node;
+       					volume->lv_access &= (~EVMS_LV_NEW);
+       					new_node->hardsector_size = AIXVGLPtr->hard_sect_size;
+       					new_node->block_size = AIXVGLPtr->block_size;
+       					new_node->plugin = &plugin_header;
+       					new_node->private = volume;
+       					new_node->total_vsectors = volume->lv_size;
+
+       					LOG_DEBUG(" EV volume->name:[%s]\n",
+       					     volume->name);
+
+       					strncpy(new_node->name,volume->name,
+       						EVMS_VOLUME_NAME_SIZE + 1);
+
+       					// Is the volume read-only?
+       					if (!(volume->lv_access & AIX_LV_WRITE)
+       					    || volume->lv_access & EVMS_LV_INCOMPLETE)
+       					{
+       						new_node->flags |= EVMS_VOLUME_SET_READ_ONLY;
+       						LOG_DEBUG(" EV Read Only volume->lv_access:%d\n",
+       						     volume->lv_access);
+       					}
+
+       					evms_cs_add_logical_node_to_list(evms_partition_list,
+       					     new_node);
+       					count++;
+
+       					LOG_DEBUG(" Exporting LVM volume %p new_node:%p ESD->volume_name[%s]\n",
+       					     volume, new_node,new_node->name);
+       				} else {
+       					evms_cs_add_logical_node_to_list(evms_partition_list,
+       					     AIXVGLPtr->volume_list[j]->volume_node);
+       					count++;
+       					LOG_DEBUG(" ELV vol_list[%d]%p\n", j,
+       					     AIXVGLPtr->volume_list[j]);
+       				}
+       			} else {
+       				LOG_DEBUG(" EV Checking LV:[%d] == NULL\n",j);
+       			}
+       		}	// end checking all lvs
+
+		AIXVGLPtr->flags &= ~AIX_VG_DIRTY;
+	}
+
+	return count;
+}
+
+/*
+ * Function: delete_logical_volume
+ *
+ *  This function deletes the in-memory representation of a single LVM
+ *  logical volume, including its PE map and any snapshot data. It does
+ *  not alter the parent volume group, except to remove this volume from
+ *  its volume list.
+ */
+static int
+delete_logical_volume(struct aix_logical_volume *volume)
+{
+	struct aix_volume_group *group = volume->group;
+
+	LOG_DEBUG(" Deleting volume %s\n", volume->name);
+
+	// Now free up all the memory. This includes the LE-to-PE map, any
+	// mirror PEs, etc.
+	if (volume->le_to_pe_map) {
+		kfree(volume->le_to_pe_map);
+		volume->le_to_pe_map = NULL;
+	}
+
+	if (volume->le_to_pe_map_mir1) {
+		kfree(volume->le_to_pe_map_mir1);
+		volume->le_to_pe_map_mir1 = NULL;
+	}
+
+	if (volume->le_to_pe_map_mir2) {
+		kfree(volume->le_to_pe_map_mir2);
+		volume->le_to_pe_map_mir2 = NULL;
+	}
+	// Remove this volume from the volume-group's list.
+	if (group && group->volume_list[volume->lv_number] == volume) {
+		group->volume_list[volume->lv_number] = NULL;
+		group->numlvs--;
+	}
+
+	kfree(volume);
+
+	return 0;
+}
+
+/* Function: remove_group_from_list
+ *
+ *	Remove an LVM volume group from the global LVM list.
+ */
+static int
+remove_group_from_list(struct aix_volume_group *group)
+{
+	struct aix_volume_group **p_group;
+
+	for (p_group = &AIXVolumeGroupList; *p_group;
+	     p_group = &(*p_group)->next) {
+		if (*p_group == group) {
+			*p_group = (*p_group)->next;
+			group->next = NULL;
+			break;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Function: delete_aix_node
+ *
+ *  This function deletes the in-memory representation of an LVM
+ *  logical volume. Right now it makes a lot of assumptions about
+ *  the data in the group not being corrupted. It would be possible
+ *  to put in a lot of consistency checks before deleting everything
+ *  to indicate if problems have occurred during the lifetime of the
+ *  volume and its volume group.
+ */
+static int
+delete_aix_node(struct evms_logical_node *logical_node)
+{
+	struct aix_logical_volume *volume =
+	    (struct aix_logical_volume *) (logical_node->private);
+	struct aix_volume_group *group = volume->group;
+
+	if (delete_logical_volume(volume)) {
+		return -EINVAL;
+	}
+	// If we just removed the last volume from this group, the entire group
+	// can also be deleted.
+	if (group && group->numlvs == 0) {
+		remove_group_from_list(group);
+		deallocate_volume_group(group);
+	}
+	// Free the logical node.
+	evms_cs_deallocate_logical_node(logical_node);
+
+	return 0;
+}
+
+/* Function: deallocate_volume_group
+ *
+ *  This function deletes the entire in-memory representation of an LVM
+ *  volume group, including all partitions and logical volumes. If this
+ *  group is on the VGE's volume group list, it is removed.
+ */
+static int
+deallocate_volume_group(struct aix_volume_group *group)
+{
+	struct partition_list_entry *partition;
+	struct partition_list_entry *next_part;
+	int i;
+
+	LOG_DEBUG(" Deleting volume group %x\n", group->vg_id.word2);
+
+	// Delete all partitions from the group's list.
+	for (partition = group->partition_list; partition;
+	     partition = next_part) {
+
+		next_part = partition->next;
+
+		if (partition->logical_node) {
+			// Send a delete command down to the partition manager.
+			LOG_DEBUG(" Deleting PV %d from group %x\n",
+				  partition->pv_number, group->vg_id.word2);
+			DELETE(partition->logical_node);
+		}
+		kfree(partition);
+	}
+
+	// Delete all logical volumes, and the array of pointers.
+	for (i = 0; i < LVM_MAXLVS; i++) {
+		if (group->volume_list[i]) {
+			delete_logical_volume(group->volume_list[i]);
+		}
+	}
+
+	kfree(group);
+
+	return 0;
+}
+
+/* Function: end_discover_aix
+ *
+ *	The discovery process at the region-manager level is now iterative,
+ *	much like the EVMS feature level. To accomplish this correctly, and
+ *	also to accomplish partial volume discovery, a second discover
+ *	entry point is needed, so EVMS can tell the region managers that
+ *	discovery is over, and to finish up any discovery that is not yet
+ *	complete. When this function is called, it should be assumed that
+ *	the node list has had nothing new added to it since the last call
+ *	of the regular discover function. Therefore, when this function is
+ *	called, we do not need to try to discovery any additional volume
+ *	groups. We will, however, look for logical volumes once more. This
+ *	gives us the ability to export (read-only) volumes that have
+ *	partially corrupted LE maps due to missing PVs in their VG.
+ */
+static int
+end_discover_aix(struct evms_logical_node **evms_logical_disk_head)
+{
+
+	int rc;
+
+	MOD_INC_USE_COUNT;
+	LOG_DEBUG("Final Discovery:\n");
+
+	rc = discover_logical_volumes();
+
+	if (!rc) {
+		rc = export_volumes(evms_logical_disk_head);
+
+		lvm_cleanup();
+	}
+
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/****************************************************
+* Function: AIX_alloc_wbh
+*
+* Alloc any buffer heads from the pool and return a linked list
+*
+*
+*****************************************************/
+static struct aix_mirror_bh *
+AIX_alloc_wbh(struct evms_logical_node *node,
+	      struct evms_logical_node *node2,
+	      struct evms_logical_node *node3,
+	      struct buffer_head *bh,
+	      u32 mirror_copies, u32 le, u64 new_sector2, u64 new_sector3)
+{
+	struct aix_mirror_bh *tmp_bh = NULL, *head_bh = NULL;
+	int i;
+
+	head_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE);
+
+	if (!head_bh) {
+		LOG_SERIOUS("Unable to allocate memory for mirror pool line:%d\n",
+		     __LINE__);
+		return NULL;
+	}
+
+	head_bh->master_bh = bh;
+	head_bh->mirror_bh_list = NULL;
+	head_bh->remaining = (atomic_t) ATOMIC_INIT(0);
+
+	for (i = AIX_DEFAULT_MIRRORING; i <= mirror_copies; i++) {
+
+		tmp_bh =
+		    evms_cs_allocate_from_pool(AIX_BH_list_pool,
+					       EVMS_BLOCKABLE);
+		if (!tmp_bh) {
+			LOG_SERIOUS("Unable to allocate memory for mirror pool line:%d\n",
+			     __LINE__);
+			return NULL;
+		}
+
+		tmp_bh->next_r1 = head_bh->mirror_bh_list;
+		head_bh->mirror_bh_list = tmp_bh;
+		atomic_inc(&head_bh->remaining);
+
+		memcpy(&tmp_bh->bh_req, bh, sizeof (struct buffer_head));
+		tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0);
+		init_waitqueue_head(&tmp_bh->bh_req.b_wait);
+		//tmp_bh->bh_req.b_size   = bh->b_size;
+
+		switch (i) {
+
+		case AIX_DEFAULT_MIRRORING:
+			tmp_bh->node = node;
+			tmp_bh->bh_req.b_rsector = bh->b_rsector;
+			break;
+
+		case AIX_FIRST_MIRROR:
+			tmp_bh->node = node2;
+			tmp_bh->bh_req.b_rsector = new_sector2;
+			break;
+
+		case AIX_MAX_MIRRORS:
+			tmp_bh->node = node3;
+			tmp_bh->bh_req.b_rsector = new_sector3;
+			break;
+		}
+
+		tmp_bh->bh_req.b_end_io = AIX_handle_write_mirror_drives;	//setup callback routine 
+		tmp_bh->bh_req.b_private = (void *) head_bh;
+
+	}
+
+	return head_bh;
+
+}
+
+/****************************************************
+* Function: AIX_handle_write_mirror_drives
+*
+* Handles a write from a set of mirrored AIX LVs
+
+*
+*
+*****************************************************/
+static void
+AIX_handle_write_mirror_drives(struct buffer_head *bh, int uptodate)
+{
+	struct aix_logical_volume *volume;
+	struct evms_logical_node *node;
+	struct aix_mirror_bh *tmp_bh = NULL, *tmp_bh2 = NULL;
+	kdev_t tmp_b_rdev;
+	u32 count, le = 0;
+
+	tmp_bh = (struct aix_mirror_bh *) bh->b_private;
+	tmp_b_rdev = tmp_bh->master_bh->b_rdev;
+	node = tmp_bh->node;
+	volume = (struct aix_logical_volume *) node->private;
+
+	LOG_DEBUG("AHWMD node:%p bh_flags:%lu uptodate:%d mirror_copies:%d \n",
+		  node, bh->b_state, uptodate, volume->mirror_copies);
+
+	if (!uptodate) {
+		le = tmp_bh->le;
+
+		switch (tmp_bh->iteration) {
+		case AIX_DEFAULT_MIRRORING:
+			volume->le_to_pe_map[le].pp_state += AIX_LVM_LVSTALE;
+			break;
+
+		case AIX_FIRST_MIRROR:
+			volume->le_to_pe_map_mir1[le].pp_state +=
+			    AIX_LVM_LVSTALE;
+			break;
+
+		case AIX_MAX_MIRRORS:
+			volume->le_to_pe_map_mir2[le].pp_state +=
+			    AIX_LVM_LVSTALE;
+			break;
+		}
+
+		AIX_evms_cs_notify_lv_io_error(node);
+	}
+
+	if (atomic_dec_and_test(&tmp_bh->remaining)) {
+		tmp_bh->master_bh->b_end_io(tmp_bh->master_bh, uptodate);
+		tmp_bh2 = tmp_bh->mirror_bh_list;
+		evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh);
+
+		while (tmp_bh2) {
+			tmp_bh = tmp_bh2->next_r1;
+			evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh2);
+			tmp_bh2 = tmp_bh;
+		}
+
+		evms_cs_volume_request_in_progress(tmp_b_rdev,
+						   AIX_DECREMENT_REQUEST,
+						   &count);
+	}
+
+	return;
+}
+
+/****************************************************
+* Function: AIX_alloc_rbh
+*
+* Alloc any buffer heads from the pool and return a linked list
+*
+*
+*****************************************************/
+static struct aix_mirror_bh *
+AIX_alloc_rbh(struct evms_logical_node *node,
+	      struct buffer_head *bh,
+	      u32 mirror_copies, u32 le, u64 org_sector, int cmd)
+{
+	struct aix_mirror_bh *tmp_bh = NULL;
+
+	tmp_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE);
+
+	if (!tmp_bh) {
+		LOG_SERIOUS
+		    ("Unable to allocate memory for mirror pool line:%d\n",
+		     __LINE__);
+		return NULL;
+	}
+
+	memcpy(&tmp_bh->bh_req, bh, sizeof (struct buffer_head));
+	tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0);
+	tmp_bh->node = node;
+	tmp_bh->master_bh = bh;
+	tmp_bh->iteration = AIX_FIRST_MIRROR;
+	//tmp_bh->eio.rsector     = eio->rsector;
+	//tmp_bh->eio.rsize       = eio->rsize;
+	tmp_bh->le = le;
+	//tmp_bh->eio.bh          = &tmp_bh->bh_req;
+
+	if (cmd == AIX_LV_READ) {
+		tmp_bh->bh_req.b_end_io = AIX_handle_read_mirror_drives;	//setup callback routine 
+	} else {
+		tmp_bh->bh_req.b_end_io = AIX_sync_mirrored_partitions;	//setup callback routine 
+	}
+
+	tmp_bh->bh_req.b_private = (void *) tmp_bh;
+
+	tmp_bh->cmd = cmd;
+	tmp_bh->next_r1 = NULL;
+	tmp_bh->node = node;
+
+	return tmp_bh;
+
+}
+
+/****************************************************
+* Function: AIX_reschedule_retry
+*
+* reschedule a read of one of our mirror copies
+*
+*
+*****************************************************/
+static void
+AIX_reschedule_retry(struct aix_mirror_bh *aix_bh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&AIX_retry_list_lock, flags);
+	if (AIX_retry_list == NULL)
+		AIX_retry_tail = &AIX_retry_list;
+	*AIX_retry_tail = aix_bh;
+	AIX_retry_tail = &aix_bh->next_r1;
+	aix_bh->next_r1 = NULL;
+	spin_unlock_irqrestore(&AIX_retry_list_lock, flags);
+	evms_cs_wakeup_thread(AIX_mirror_read_retry_thread);
+}
+
+/****************************************************
+* Function: AIX_handle_read_mirror_drives
+*
+* Handles a read from a set of mirrored AIX LVs
+
+*
+*
+*****************************************************/
+static void
+AIX_handle_read_mirror_drives(struct buffer_head *bh, int uptodate)
+{
+	struct aix_logical_volume *volume;
+	struct evms_logical_node *node;
+	struct aix_mirror_bh *tmp_bh;
+	kdev_t tmp_b_rdev;
+	u32 count, le = 0;
+
+	tmp_bh = (struct aix_mirror_bh *) bh->b_private;
+	tmp_b_rdev = tmp_bh->master_bh->b_rdev;
+	volume = (struct aix_logical_volume *) tmp_bh->node->private;
+	node = tmp_bh->node;
+	le = tmp_bh->le;
+
+	LOG_DEBUG("AHRMD node:%p bh_flags:%lu uptodate:%d mirror_copies:%d \n",
+		  node, bh->b_state, uptodate, volume->mirror_copies);
+
+	switch (tmp_bh->iteration) {
+	case AIX_DEFAULT_MIRRORING:
+		count = volume->le_to_pe_map[le].pp_state;
+		break;
+
+	case AIX_FIRST_MIRROR:
+		count = volume->le_to_pe_map[le].pp_state;
+		break;
+
+	case AIX_MAX_MIRRORS:
+		count = volume->le_to_pe_map[le].pp_state;
+		break;
+	}
+
+	if (count == (AIX_LVM_LVSTALE + AIX_LVM_LVDEFINED)) {
+		uptodate = 0;
+		count = 0;
+	}
+
+	if (!uptodate && tmp_bh->iteration < volume->mirror_copies) {
+		AIX_evms_cs_notify_lv_io_error(node);
+		AIX_reschedule_retry(tmp_bh);
+	} else {
+		tmp_bh->master_bh->b_end_io(tmp_bh->master_bh, uptodate);
+		evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh);
+		evms_cs_volume_request_in_progress(tmp_b_rdev,
+						   AIX_DECREMENT_REQUEST,
+						   &count);
+
+	}
+
+	return;
+}
+
+/****************************************************
+* This is a temporary function until a common EVMS
+* notification function can be created.
+*
+*****************************************************/
+static int
+AIX_evms_cs_notify_lv_io_error(struct evms_logical_node *node)
+{
+	struct aix_logical_volume *volume;
+
+	volume = (struct aix_logical_volume *) node->private;
+
+	LOG_CRITICAL("Notify_ERROR !!  node:%p volume->lv_status:%d volume->name:[%s]\n",
+	     node, volume->lv_status, volume->name);
+
+	return 0;
+}
+
+/* Function: lvm_cleanup
+ *
+ *	This function runs through the entire lvm data structure, removing
+ *	all items that are not needed at runtime. Currently, this is just the
+ *	vg_disk_t structure and the pv_disk_t structure for each PV. Also, any
+ *	groups that don't contain any volumes are deleted. All of the other
+ *	volume_group, logical_volume and evms_logical_node structures will be
+ *	kept around at run-time.
+ */
+static int
+lvm_cleanup(void)
+{
+	struct aix_volume_group *group;
+
+	group = AIXVolumeGroupList;
+
+	while (group) {
+
+		if (group->AIXvgh) {
+			kfree(group->AIXvgh);
+			group->AIXvgh = NULL;
+		}
+
+		group = group->next;
+	}
+
+	return 0;
+}
+
+/****************************************************
+* Function: AIX_copy_header_info
+*
+* Copy the disk header info into the volume struct
+* so we can use it later.
+*
+* 
+*
+*****************************************************/
+static int
+AIX_copy_header_info(struct vg_header *AIXvgh, struct vg_header *AIXvgh2)
+{
+
+	LOG_DEBUG("CHI  AIXvgh:%p AIXvgh2:%p\n", AIXvgh, AIXvgh2);
+
+	if (AIXvgh) {
+
+		AIXvgh->vg_timestamp.tv_sec = AIXvgh2->vg_timestamp.tv_sec;
+		AIXvgh->vg_timestamp.tv_nsec = AIXvgh2->vg_timestamp.tv_nsec;
+		AIXvgh->vg_id.word1 = AIXvgh2->vg_id.word1;
+		AIXvgh->vg_id.word2 = AIXvgh2->vg_id.word2;
+		AIXvgh->vg_id.word3 = AIXvgh2->vg_id.word3;
+		AIXvgh->vg_id.word4 = AIXvgh2->vg_id.word4;
+		AIXvgh->numlvs = AIXvgh2->numlvs;
+		AIXvgh->maxlvs = AIXvgh2->maxlvs;
+		AIXvgh->pp_size = AIXvgh2->pp_size;
+		AIXvgh->numpvs = AIXvgh2->numpvs;
+		AIXvgh->total_vgdas = AIXvgh2->total_vgdas;
+		AIXvgh->vgda_size = AIXvgh2->vgda_size;
+		AIXvgh->bigvg = AIXvgh2->bigvg;
+		AIXvgh->quorum = AIXvgh2->quorum;
+		AIXvgh->auto_varyon = AIXvgh2->auto_varyon;
+		AIXvgh->checksum = AIXvgh2->checksum;
+		AIXvgh->bigda_size = AIXvgh2->bigda_size;
+
+	} else {
+		return -ENOMEM;
+	}
+
+	LOG_DEBUG("Returning CHI  AIXvgh:%p AIXvgh2:%p\n", AIXvgh, AIXvgh2);
+
+	return 0;
+}
+
+/****************************************************
+* Function: AIX_free_header
+*
+* 
+* 
+* 
+*
+*****************************************************/
+static void
+AIX_free_headers(struct vg_header *AIXvgh, struct vg_header *AIXvgh2,
+		 struct vg_trailer *AIXvgt, struct vg_trailer *AIXvgt2)
+{
+
+	if (AIXvgh) {
+		kfree(AIXvgh);
+		AIXvgh = NULL;
+	}
+
+	if (AIXvgh2) {
+		kfree(AIXvgh2);
+		AIXvgh2 = NULL;
+	}
+
+	if (AIXvgt) {
+		kfree(AIXvgt);
+		AIXvgt = NULL;
+	}
+
+	if (AIXvgt2) {
+		kfree(AIXvgt2);
+		AIXvgt2 = NULL;
+	}
+
+}
+
+/****************************************************
+* Function: AIXiod
+*
+* This is a kernel thread that handles read of mirrors
+* This shouldn't ever run on a non-mirrored LV read
+* 
+*
+*****************************************************/
+static void
+AIXiod(void *data)
+{
+	struct aix_mirror_bh *r1_bh;
+	struct evms_logical_node *node;
+	unsigned long flags;
+
+	while (1) {
+
+		spin_lock_irqsave(&AIX_retry_list_lock, flags);
+		if (AIX_retry_list == NULL) {
+			spin_unlock_irqrestore(&AIX_retry_list_lock, flags);
+			break;
+		}
+		r1_bh = AIX_retry_list;
+		AIX_retry_list = r1_bh->next_r1;
+		spin_unlock_irqrestore(&AIX_retry_list_lock, flags);
+		r1_bh->next_r1 = NULL;	// for mark
+
+		switch (r1_bh->cmd) {
+		case AIX_LV_READ:
+
+			r1_bh->iteration++;
+			LOG_DEBUG("Report from thread AIXiod READ\n");
+
+			if (r1_bh->iteration == AIX_FIRST_MIRROR) {
+				node = r1_bh->mir_node1;
+				r1_bh->bh_req.b_rsector = r1_bh->mir_sector1;
+			} else {
+				node = r1_bh->mir_node2;
+				r1_bh->bh_req.b_rsector = r1_bh->mir_sector2;
+			}
+
+			R_IO(node, &r1_bh->bh_req);
+
+			break;
+
+		default:
+			LOG_DEBUG("AIXiod unknown cmd passed to thread:%d\n",
+				  r1_bh->cmd);
+			break;
+		}
+
+	}
+	return;
+}
+
+/****************************************************
+* Function: AIX_schedule_resync
+*
+* schedule a resync of one of our lv mirror copies
+*
+*
+*****************************************************/
+static void
+AIX_schedule_resync(struct aix_logical_volume *resync_volume, int force)
+{
+	unsigned long flags;
+
+	LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__,
+		  resync_volume->name);
+
+	spin_lock_irqsave(&AIX_resync_list_lock, flags);
+
+	if (!AIX_resync_list) {
+		AIX_resync_list =
+		    kmalloc(sizeof (struct aix_resync_struct), GFP_ATOMIC);
+		if (!AIX_resync_list) {
+			return;
+		}
+		memset(AIX_resync_list, 0, sizeof (struct aix_resync_struct));
+	}
+
+	AIX_resync_list->resync_vol = resync_volume;
+	AIX_resync_list->next_resync_vol = NULL;
+
+	spin_unlock_irqrestore(&AIX_resync_list_lock, flags);
+	evms_cs_wakeup_thread(AIX_mirror_resync_thread);
+}
+
+/****************************************************
+* Function: AIXresync
+*
+* This is a kernel thread that handles resync of mirrors
+* This shouldn't ever run on a non-mirrored LV
+* 
+*
+*****************************************************/
+static void
+AIXresync(void *data)
+{
+
+	struct aix_logical_volume *volume = NULL;
+	int force = FALSE;	// Currently we don't force a resync of non-stale pe's
+
+	if (AIX_resync_list == NULL) {
+		LOG_ERROR("No Volumes on list to resync\n");
+		return;
+	}
+
+	volume = AIX_resync_list->resync_vol;
+	LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__, volume->name);
+
+	if (!volume) {
+		LOG_ERROR("Invalid volume passed to sync\n");
+		return;
+	}
+
+	if (AIXResyncInProgress) {
+		LOG_ERROR("Unable to resync multiple LVs concurrently %s\n",
+			  volume->name);
+		return;
+	}
+
+	if (volume->mirror_copies == AIX_DEFAULT_MIRRORING) {
+		LOG_ERROR("Unable to resync non-mirrored LV %s \n",
+			  volume->name);
+		return;
+	}
+
+	AIXResyncInProgress = TRUE;
+
+	AIX_resync_lv_mirrors(volume, force);
+
+	return;
+}
+
+/****************************************************
+* Function: AIX_resync_lv_mirrors
+*
+* 
+* 
+* 
+*
+*****************************************************/
+static int
+AIX_resync_lv_mirrors(struct aix_logical_volume *volume, int force)
+{
+
+	int i;
+	char pp_stale = FALSE;
+
+	struct partition_list_entry *master_part = NULL;
+	struct partition_list_entry *slave1_part = NULL;
+	struct partition_list_entry *slave2_part = NULL;
+
+	u64 master_offset = 0;
+	u64 slave1_offset = 0;
+	u64 slave2_offset = 0;
+
+	LOG_DEBUG("Function %s volume: %s \n", __FUNCTION__, volume->name);
+
+	for (i = 0; i < volume->num_le; i++, pp_stale = FALSE) {
+
+		// We need to see which mirror has a valid non-stale copy.
+		// The first non-stale copy will be our master and we'll
+		// copy to the slave(s).
+
+		if ((volume->le_to_pe_map[i].pp_state & AIX_LVM_LVSTALE)) {
+			pp_stale = TRUE;
+		}
+
+		if (volume->le_to_pe_map_mir1 != NULL) {
+			if ((volume->le_to_pe_map_mir1[i].
+			     pp_state & AIX_LVM_LVSTALE)) {
+				pp_stale = TRUE;
+			}
+		}
+
+		if (volume->le_to_pe_map_mir2 != NULL) {
+			if ((volume->le_to_pe_map_mir2[i].
+			     pp_state & AIX_LVM_LVSTALE)) {
+				pp_stale = TRUE;
+			}
+		}
+
+		LOG_DEBUG("Function %s pp_stale:%d force:%d \n", __FUNCTION__,
+			  pp_stale, force);
+
+		if (pp_stale || force) {
+			if (!(volume->le_to_pe_map[i].pp_state & AIX_LVM_LVSTALE)) {
+
+				master_part = volume->le_to_pe_map[i].owning_pv;
+				master_offset = volume->le_to_pe_map[i].pe_sector_offset;
+
+				if (volume->le_to_pe_map_mir1 != NULL) {
+					slave1_part = volume->le_to_pe_map_mir1[i].owning_pv;
+					slave1_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset;
+				}
+
+				if (volume->le_to_pe_map_mir2 != NULL) {
+					slave2_part = volume->le_to_pe_map_mir2[i].owning_pv;
+					slave2_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset;
+				}
+			} else
+			    if (!(volume->le_to_pe_map_mir1[i].pp_state & AIX_LVM_LVSTALE)) {
+				master_part = volume->le_to_pe_map_mir1[i].owning_pv;
+				master_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset;
+
+				if (volume->le_to_pe_map != NULL) {
+					slave1_part = volume->le_to_pe_map[i].owning_pv;
+					slave1_offset = volume->le_to_pe_map[i].pe_sector_offset;
+				}
+
+				if (volume->le_to_pe_map_mir2 != NULL) {
+					slave2_part = volume->le_to_pe_map_mir2[i].owning_pv;
+					slave2_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset;
+				}
+			} else
+			    if (!(volume->le_to_pe_map_mir2[i].pp_state & AIX_LVM_LVSTALE)) {
+				master_part = volume->le_to_pe_map_mir2[i].owning_pv;
+				master_offset = volume->le_to_pe_map_mir2[i].pe_sector_offset;
+
+				if (volume->le_to_pe_map != NULL) {
+					slave1_part = volume->le_to_pe_map[i].owning_pv;
+					slave1_offset = volume->le_to_pe_map[i].pe_sector_offset;
+				}
+
+				if (volume->le_to_pe_map_mir1 != NULL) {
+					slave2_part = volume->le_to_pe_map_mir1[i].owning_pv;
+					slave2_offset = volume->le_to_pe_map_mir1[i].pe_sector_offset;
+				}
+			}
+
+			if (AIX_copy_on_read(volume, master_part, slave1_part, slave2_part,
+			     master_offset, slave1_offset, slave2_offset,
+			     volume->pe_size, i)) {
+
+				LOG_CRITICAL("ReSync of logical Volume %s FAILED !!\n",
+				     volume->name);
+				AIX_evms_cs_notify_lv_io_error(volume->
+							       volume_node);
+				break;
+			}
+
+		}
+
+	}
+
+	return 0;
+}
+
+/****************************************************
+* Function: AIX_copy_on_read
+*
+* 
+* 
+* 
+*
+*****************************************************/
+static int
+AIX_copy_on_read(struct aix_logical_volume *volume,
+		 struct partition_list_entry *master_part,
+		 struct partition_list_entry *slave1_part,
+		 struct partition_list_entry *slave2_part,
+		 u64 master_offset,
+		 u64 slave1_offset, u64 slave2_offset, u32 pe_size, int le)
+{
+	unsigned long flags;
+	struct aix_mirror_bh *tmp_bh = NULL;
+
+	// Check for valid partitions we need at least 2 good partitions so slave2 doesn't have to be valid
+
+	if (!master_part || !slave1_part) {
+		LOG_ERROR("Invalid partitions for resync master part:%p slave1_part:%p slave2_part:%p\n",
+		     master_part, slave1_part, slave2_part);
+		return -EINVAL;
+	}
+
+	LOG_DEBUG("Function %s volume:%s master_part:%d, slave1_part:%d, slave2_part:%d master_offset:"
+	     PFU64 ", slave1_offset:" PFU64 " slave2_offset:" PFU64 ",  \n",
+	     __FUNCTION__, volume->name, master_part->pv_number,
+	     slave1_part->pv_number, slave2_part->pv_number, master_offset,
+	     slave1_offset, slave2_offset);
+
+	LOG_DEBUG("pe_size:%d le:%d\n", pe_size, le);
+
+	tmp_bh =
+	    AIX_alloc_sbh(volume, master_part, slave1_part, slave2_part,
+			  master_offset, slave1_offset, slave2_offset, pe_size);
+
+	if (!tmp_bh) {
+		buffer_IO_error(&tmp_bh->bh_req);
+		return -ENOMEM;
+	}
+
+/*	if (evms_cs_volume_request_in_progress
+	    (tmp_bh->bh_req.b_rdev, AIX_INCREMENT_REQUEST, &count)) {
+		buffer_IO_error(&tmp_bh->bh_req);
+		return -EIO;
+	} */
+
+	spin_lock_irqsave(&AIX_resync_pp_lock, flags);
+
+	LOG_DEBUG("Function:%s kicking off read node:%p\n", __FUNCTION__,
+		  master_part->logical_node);
+
+	R_IO(master_part->logical_node, &tmp_bh->bh_req);
+
+	spin_unlock_irqrestore(&AIX_resync_pp_lock, flags);
+
+	return 0;
+}
+
+/****************************************************
+* Function: AIX_alloc_sbh
+*
+* Alloc any buffer heads from the pool and return a linked list
+*
+*
+*****************************************************/
+static struct aix_mirror_bh *
+AIX_alloc_sbh(struct aix_logical_volume *volume,
+	      struct partition_list_entry *master_part,
+	      struct partition_list_entry *slave1_part,
+	      struct partition_list_entry *slave2_part,
+	      u64 master_offset,
+	      u64 slave1_offset, u64 slave2_offset, u32 pe_size)
+{
+	struct aix_mirror_bh *tmp_bh = NULL, *head_bh = NULL;
+	unsigned long flags;
+
+	LOG_DEBUG("Function:%s Enter\n", __FUNCTION__);
+
+	head_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE);
+	if (!head_bh) {
+		LOG_SERIOUS
+		    ("Unable to allocate memory for mirror pool line:%d\n",
+		     __LINE__);
+		return NULL;
+	}
+	// Update buffer so we block on a read/write on the normal IO path
+	// if we're trying to sync the same sector on the disk
+	// We don't want to block if it's different sectors
+
+	spin_lock_irqsave(&AIX_resync_list_lock, flags);
+
+	AIX_resync_list->master_part = master_part;
+	AIX_resync_list->slave1_part = slave1_part;
+	AIX_resync_list->slave2_part = slave2_part;
+	AIX_resync_list->master_offset = master_offset;
+	AIX_resync_list->slave1_offset = slave1_offset;
+	AIX_resync_list->slave2_offset = slave2_offset;
+
+	head_bh->bh_req.b_data = kmalloc(AIX_RESYNC_BLOCKSIZE + 1, GFP_NOIO);
+	if (!head_bh->bh_req.b_data) {
+		evms_cs_deallocate_to_pool(AIX_BH_list_pool, head_bh);
+		LOG_SERIOUS
+		    ("Unable to allocate memory for mirror pool line:%d\n",
+		     __LINE__);
+		return NULL;
+	}
+
+	memset(head_bh->bh_req.b_data, 0, AIX_RESYNC_BLOCKSIZE + 1);
+
+	head_bh->remaining = (atomic_t) ATOMIC_INIT(0);
+	head_bh->bh_req.b_rsector = master_offset;
+	head_bh->bh_req.b_size = AIX_RESYNC_BLOCKSIZE;
+	head_bh->sync_flag = AIX_SYNC_INCOMPLETE;
+	head_bh->bh_req.b_end_io = AIX_sync_mirrored_partitions;
+	head_bh->bh_req.b_page = virt_to_page(head_bh->bh_req.b_data);
+	head_bh->bh_req.b_state = 0;
+	set_bit(BH_Dirty, &head_bh->bh_req.b_state);
+	set_bit(BH_Lock, &head_bh->bh_req.b_state);
+	set_bit(BH_Req, &head_bh->bh_req.b_state);
+	set_bit(BH_Mapped, &head_bh->bh_req.b_state);
+	head_bh->master_bh = NULL;
+	head_bh->mirror_bh_list = NULL;
+
+	tmp_bh = evms_cs_allocate_from_pool(AIX_BH_list_pool, EVMS_BLOCKABLE);
+	if (!tmp_bh) {
+		LOG_SERIOUS
+		    ("Unable to allocate memory for mirror pool line:%d\n",
+		     __LINE__);
+		return NULL;
+	}
+
+	head_bh->next_r1 = tmp_bh;
+	memcpy(&tmp_bh->bh_req, head_bh, sizeof (struct buffer_head));
+	tmp_bh->remaining = (atomic_t) ATOMIC_INIT(0);
+	tmp_bh->bh_req.b_end_io = NULL;
+
+	if (volume->mirror_copies == AIX_MAX_MIRRORS) {
+		tmp_bh->next_r1 =
+		    evms_cs_allocate_from_pool(AIX_BH_list_pool,
+					       EVMS_BLOCKABLE);
+		if (!tmp_bh->next_r1) {
+			LOG_SERIOUS
+			    ("Unable to allocate memory for mirror pool line:%d\n",
+			     __LINE__);
+			return NULL;
+		}
+
+		memcpy(&tmp_bh->next_r1->bh_req, head_bh,
+		       sizeof (struct buffer_head));
+		tmp_bh->next_r1->bh_req.b_end_io = NULL;
+		tmp_bh->next_r1->remaining = (atomic_t) ATOMIC_INIT(0);
+	}
+
+	init_waitqueue_head(&head_bh->bh_req.b_wait);
+
+	spin_unlock_irqrestore(&AIX_resync_list_lock, flags);
+
+	LOG_DEBUG("Function:%s Exit head_bh:%p\n", __FUNCTION__, head_bh);
+
+	return head_bh;
+}
+
+/****************************************************
+* Function: AIX_sync_mirrored_partitions
+*
+* 
+* 
+* 
+*
+*****************************************************/
+static void
+AIX_sync_mirrored_partitions(struct buffer_head *bh, int uptodate)
+{
+	struct aix_logical_volume *volume = NULL;
+	struct aix_mirror_bh *tmp_bh, *head_bh;
+
+	head_bh = tmp_bh = (struct aix_mirror_bh *) bh->b_private;
+	volume = (struct aix_logical_volume *) tmp_bh->node->private;
+
+	LOG_DEBUG("Function:%s Enter uptodate:%d\n", __FUNCTION__, uptodate);
+
+	if (!uptodate) {
+
+		AIX_evms_cs_notify_lv_io_error(tmp_bh->node);
+	}
+
+	tmp_bh = head_bh->next_r1;
+
+	LOG_DEBUG("Function:%s line:%d write to mirror:%p\n", __FUNCTION__,
+		  __LINE__, tmp_bh);
+
+	if (tmp_bh) {
+		W_IO(tmp_bh->node, &tmp_bh->bh_req);
+		AIX_get_set_mirror_offset(tmp_bh, AIX_SLAVE_1,
+					  AIX_RESYNC_BLOCKSIZE);
+	}
+
+	tmp_bh = tmp_bh->next_r1;
+	LOG_DEBUG("Function:%s line:%d write to mirror:%p\n", __FUNCTION__,
+		  __LINE__, tmp_bh);
+
+	if (tmp_bh) {
+		W_IO(tmp_bh->node, &tmp_bh->bh_req);
+		AIX_get_set_mirror_offset(tmp_bh, AIX_SLAVE_2,
+					  AIX_RESYNC_BLOCKSIZE);
+	}
+
+	LOG_DEBUG("Function:%s line:%d read from  master:%p\n", __FUNCTION__,
+		  __LINE__, head_bh);
+
+	if (head_bh && head_bh->sync_flag) {
+		AIX_get_set_mirror_offset(head_bh, AIX_MASTER,
+					  AIX_RESYNC_BLOCKSIZE);
+		if (head_bh->sync_flag == AIX_SYNC_INCOMPLETE) {
+			R_IO(head_bh->node, &head_bh->bh_req);
+		}
+	}
+
+	LOG_DEBUG("Function:%s line:%d head_bh->sync_flag:%d\n", __FUNCTION__,
+		  __LINE__, head_bh->sync_flag);
+
+	if (!head_bh->sync_flag) {
+		tmp_bh = head_bh;
+		head_bh = head_bh->next_r1;
+
+		while (tmp_bh != NULL) {
+			evms_cs_deallocate_to_pool(AIX_BH_list_pool, tmp_bh);
+			tmp_bh = head_bh;
+		}
+
+		AIXResyncInProgress = FALSE;
+/*		evms_cs_volume_request_in_progress(tmp_bh->bh_req.b_rdev,
+						   AIX_DECREMENT_REQUEST,
+						   &count); */
+
+		if (AIX_resync_list) {
+			kfree(AIX_resync_list);
+		}
+	}
+
+	return;
+}
+
+/****************************************************
+* Function: AIX_get_set_mirror_offset
+*
+* 
+* 
+* 
+*
+*****************************************************/
+static int
+AIX_get_set_mirror_offset(struct aix_mirror_bh *tmp_bh, int index, int offset)
+{
+	int flags;
+
+	if (!tmp_bh) {
+		return -EINVAL;
+	}
+
+	LOG_DEBUG("Function:%s Enter offset:%d\n", __FUNCTION__, offset);
+
+	tmp_bh->bh_req.b_rsector += tmp_bh->bh_req.b_rsector + offset;
+
+	if (tmp_bh->bh_req.b_rsector > tmp_bh->node->total_vsectors) {
+		tmp_bh->sync_flag = AIX_SYNC_COMPLETE;
+		return -EIO;
+	}
+	// Update buffer so we block on a read/write on the normal IO path
+	// if we're trying to sync the same sector on the disk
+	// We don't want to block if it's different sectors
+
+	spin_lock_irqsave(&AIX_resync_list_lock, flags);
+
+	if (AIX_resync_list->master_part->logical_node == tmp_bh->node) {
+		AIX_resync_list->master_offset += offset;
+	}
+
+	if (AIX_resync_list->slave1_part->logical_node == tmp_bh->node) {
+		AIX_resync_list->slave1_offset += offset;
+	}
+
+	if (AIX_resync_list->slave2_part->logical_node == tmp_bh->node) {
+		AIX_resync_list->slave2_offset += offset;
+	}
+
+	spin_unlock_irqrestore(&AIX_resync_list_lock, flags);
+
+	return 0;
+
+}
+
+static int AIX_pvh_data_posn(u32 vgda_psn, u32 * pvh_posn, struct partition_list_entry *partition, u32 numpvs)
+{
+    struct partition_list_entry * pv;
+    struct pv_header            * AIXpvh;
+    int posn = 0;
+    int num_pps;
+    int tmp,i;
+
+    LOG_DEBUG("APDP - vgda_psn:%d numpvs:%d \n", vgda_psn, numpvs);
+
+	AIXpvh = kmalloc(AIX_SECTOR_SIZE, GFP_KERNEL);
+	if (!AIXpvh) {
+		return -ENOMEM;
+	}
+
+    memset(AIXpvh, 0 , sizeof(struct pv_header));
+
+    // Adjust this because when AIX VGs/Volumes are created on Intel platforms, the
+    // pp_count could be anything since we don't give up the entire physical drive.
+    // This is for calculation purposes only.
+
+    pvh_posn[0] = 0;
+    pv = partition;
+
+    for (i = 1; i <= numpvs; i++) {
+        for (pv = partition; pv->pv_number != i; pv = pv->next ); 
+
+        LOG_DEBUG("APDP line:%d pp_count:%d \n", __LINE__,  AIXpvh->pp_count);
+
+        num_pps = AIXpvh->pp_count;
+        num_pps++; // Account for the pv_header on the front
+
+        while ((num_pps * sizeof(struct pp_entries)) % AIX_SECTOR_SIZE) {
+            LOG_EXTRA("num_pps:%d \n", num_pps);
+            num_pps++;
+        }
+
+        tmp = (num_pps * sizeof(struct pp_entries)) / AIX_SECTOR_SIZE;
+
+        LOG_DEBUG("APDP tmp:%d num_pps:%d \n", tmp,num_pps);
+
+        posn = ((vgda_psn + PSN_PPH_OFFSET) + ((pv->pv_number -1) * tmp));
+
+        pvh_posn[pv->pv_number] = posn;
+
+        if (INIT_IO(pv->logical_node, 0, posn, 1, AIXpvh)) {
+            kfree(AIXpvh);
+            return -EIO;
+        }
+
+        pv = partition;
+    }
+
+    kfree(AIXpvh);
+
+    return 0;
+}
+
+/****************************************************
+* Function: AIX_volume_group_dump
+*
+* This is for debug purposes and will walk the volume group list
+* and LV's within the volume groups
+*
+* It can be called at anytime however the output to the display is large
+*
+*****************************************************/
+#ifdef EVMS_AIX_DEBUG
+static int
+AIX_volume_group_dump(void)
+{
+	struct aix_volume_group *AIXVGLDebugPtr;
+	struct partition_list_entry *DebugPartitionList;
+	struct aix_logical_volume *DebugLVList;
+	int i;
+
+	AIXVGLDebugPtr = AIXVolumeGroupList;
+
+	if (!AIXVGLDebugPtr) {
+		LOG_DEBUG("***********************************************\n");
+		LOG_DEBUG("ERROR Nothing built in the list to check !!!   \n");
+		LOG_DEBUG("***********************************************\n");
+		return 0;
+	}
+
+	LOG_DEBUG("***********************************************    \n");
+	LOG_DEBUG("Begin Volume Group Dump \n");
+	LOG_DEBUG("***********************************************    \n");
+
+	while (AIXVGLDebugPtr) {
+
+		LOG_DEBUG("vg_number      %x\n", AIXVGLDebugPtr->vg_id.word2);
+		LOG_DEBUG("numpsrtitions  %d\n", AIXVGLDebugPtr->partition_count);
+		LOG_DEBUG("numlvs         %d\n", AIXVGLDebugPtr->numlvs);
+		LOG_DEBUG("hard_sect_size %d\n", AIXVGLDebugPtr->hard_sect_size);
+		LOG_DEBUG("block_size     %d\n", AIXVGLDebugPtr->block_size);
+		LOG_DEBUG("flags          %d\n", AIXVGLDebugPtr->flags);
+//		LOG_DEBUG("lv_max         %d\n", AIXVGLDebugPtr->lv_max);
+		LOG_DEBUG("pe_size        %d\n", AIXVGLDebugPtr->pe_size);
+		LOG_DEBUG("CleanVGInfo    %d\n", AIXVGLDebugPtr->CleanVGInfo);
+
+		DebugPartitionList = AIXVGLDebugPtr->partition_list;
+
+		LOG_DEBUG("********* Begin Volume Partition Dump ********* \n");
+
+		if (!DebugPartitionList) {
+			LOG_DEBUG("No partitions to check !!  \n");
+		}
+
+		while (DebugPartitionList) {
+			LOG_DEBUG("logical_node       %p\n",
+				  DebugPartitionList->logical_node);
+			LOG_DEBUG("pv_number          %d\n",
+				  DebugPartitionList->pv_number);
+			LOG_DEBUG("block_size         %d\n",
+				  DebugPartitionList->block_size);
+			LOG_DEBUG("hard_sect_size     %d\n",
+				  DebugPartitionList->hard_sect_size);
+			LOG_DEBUG("-------------------------------------------------------------\n");
+			DebugPartitionList = DebugPartitionList->next;
+		}
+
+		LOG_DEBUG("********* End Volume Partition Dump **********\n");
+
+		LOG_DEBUG("********** Begin Logical Volume Partition Dump **********\n");
+
+		DebugLVList = AIXVGLDebugPtr->volume_list[0];
+
+		if (!DebugLVList) {
+			LOG_DEBUG("No logical volumes to check !!  \n");
+		}
+
+		for (i = 0; i < LVM_MAXLVS && DebugLVList; i++) {
+
+			DebugLVList = AIXVGLDebugPtr->volume_list[i];
+
+			if (DebugLVList) {
+				LOG_DEBUG("volume_list #    %d \n", i);
+				LOG_DEBUG("lv_number        %d \n",
+					  DebugLVList->lv_number);
+				LOG_DEBUG("LV name          %s \n",
+					  DebugLVList->name);
+				LOG_DEBUG("lv_size          " PFU64 " \n",
+					  DebugLVList->lv_size);
+				LOG_DEBUG("lv_access        %d \n",
+					  DebugLVList->lv_access);
+				LOG_DEBUG("lv_status        %d \n",
+					  DebugLVList->lv_status);
+//				LOG_DEBUG("lv_minor         %d \n",
+//					  DebugLVList->lv_minor);
+				LOG_DEBUG("mirror_copies    %d \n",
+					  DebugLVList->mirror_copies);
+//				LOG_DEBUG("mirror_number    %d \n",
+//					  DebugLVList->mirror_number);
+				LOG_DEBUG("stripes          %d \n",
+					  DebugLVList->stripes);
+				LOG_DEBUG("stripe_size      %d \n",
+					  DebugLVList->stripe_size);
+				LOG_DEBUG("stripe_size_shift%d \n",
+					  DebugLVList->stripe_size_shift);
+				LOG_DEBUG("pe_size          %d \n",
+					  DebugLVList->pe_size);
+				LOG_DEBUG("pe_size_shift    %d \n",
+					  DebugLVList->pe_size_shift);
+				LOG_DEBUG("num_le           %d \n",
+					  DebugLVList->num_le);
+//				LOG_DEBUG("new_volume       %d \n",
+//					  DebugLVList->new_volume);
+				LOG_DEBUG("group            %p \n",
+					  DebugLVList->group);
+			}
+
+		}
+
+		AIXVGLDebugPtr = AIXVGLDebugPtr->next;
+
+		LOG_DEBUG("********** End Logical Volume Partition Dump **********\n");
+
+	}
+
+	LOG_DEBUG("***********************************************\n");
+	LOG_DEBUG("End Volume Group Dump                          \n");
+	LOG_DEBUG("***********************************************\n");
+
+	return 0;
+
+}
+#endif
diff -Naur linux-2002-09-30/drivers/evms/Config.in evms-2002-09-30/drivers/evms/Config.in
--- linux-2002-09-30/drivers/evms/Config.in	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/Config.in	Mon Sep 16 15:55:24 2002
@@ -0,0 +1,60 @@
+#
+#   Copyright (c) International Business Machines  Corp., 2000
+#
+#   This program is free software;  you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+#   the GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program;  if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+#
+# EVMS driver configuration
+#
+
+mainmenu_option next_comment
+comment 'Enterprise Volume Management System'
+
+tristate     'EVMS Kernel Runtime' CONFIG_EVMS
+dep_tristate '  EVMS Local Device Manager' CONFIG_EVMS_LOCAL_DEV_MGR $CONFIG_EVMS
+dep_tristate '  EVMS DOS Segment Manager' CONFIG_EVMS_DOS_SEGMENT_MGR $CONFIG_EVMS
+dep_tristate '  EVMS GPT Segment Manager' CONFIG_EVMS_GPT_SEGMENT_MGR $CONFIG_EVMS
+if [ "$CONFIG_ARCH_S390" = "y" ]; then
+dep_tristate '  EVMS S/390 Segment Manager' CONFIG_EVMS_S390_SEGMENT_MGR $CONFIG_EVMS
+fi
+dep_tristate '  EVMS SnapShot Feature' CONFIG_EVMS_SNAPSHOT $CONFIG_EVMS
+dep_tristate '  EVMS DriveLink Feature' CONFIG_EVMS_DRIVELINK $CONFIG_EVMS
+dep_tristate '  EVMS Bad Block Relocation (BBR) Feature' CONFIG_EVMS_BBR $CONFIG_EVMS
+dep_tristate '  EVMS Linux LVM Package' CONFIG_EVMS_LVM $CONFIG_EVMS
+dep_tristate '  EVMS Linux MD Package' CONFIG_EVMS_MD $CONFIG_EVMS
+dep_tristate '    EVMS MD Linear (append) mode' CONFIG_EVMS_MD_LINEAR $CONFIG_EVMS_MD
+dep_tristate '    EVMS MD RAID-0 (stripe) mode' CONFIG_EVMS_MD_RAID0 $CONFIG_EVMS_MD
+dep_tristate '    EVMS MD RAID-1 (mirroring) mode' CONFIG_EVMS_MD_RAID1 $CONFIG_EVMS_MD
+dep_tristate '    EVMS MD RAID-4/RAID-5 mode' CONFIG_EVMS_MD_RAID5 $CONFIG_EVMS_MD
+dep_tristate '  EVMS AIX LVM Package' CONFIG_EVMS_AIX $CONFIG_EVMS
+dep_tristate '  EVMS OS/2 LVM Package' CONFIG_EVMS_OS2 $CONFIG_EVMS
+#dep_tristate '  EVMS Clustering Package' CONFIG_EVMS_ECR $CONFIG_EVMS
+
+if [ "$CONFIG_EVMS" != "n" ]; then
+	choice '  EVMS Debug Level' \
+		"Critical	CONFIG_EVMS_INFO_CRITICAL \
+		 Serious	CONFIG_EVMS_INFO_SERIOUS \
+		 Error		CONFIG_EVMS_INFO_ERROR \
+		 Warning	CONFIG_EVMS_INFO_WARNING \
+		 Default	CONFIG_EVMS_INFO_DEFAULT \
+		 Details	CONFIG_EVMS_INFO_DETAILS \
+		 Debug		CONFIG_EVMS_INFO_DEBUG \
+		 Extra		CONFIG_EVMS_INFO_EXTRA \
+		 Entry_Exit	CONFIG_EVMS_INFO_ENTRY_EXIT \
+		 Everything	CONFIG_EVMS_INFO_EVERYTHING" Default
+fi
+
+endmenu
+
diff -Naur linux-2002-09-30/drivers/evms/Makefile evms-2002-09-30/drivers/evms/Makefile
--- linux-2002-09-30/drivers/evms/Makefile	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/Makefile	Mon Sep 16 15:55:24 2002
@@ -0,0 +1,64 @@
+#
+# Makefile for the kernel EVMS driver and modules.
+#
+# 08 March 2001, Mark Peloquin <peloquin@us.ibm.com>
+#
+
+O_TARGET := evmsdrvr.o
+
+export-objs := evms.o evms_passthru.o ldev_mgr.o dos_part.o lvm_vge.o \
+               snapshot.o evms_drivelink.o evms_bbr.o AIXlvm_vge.o \
+               os2lvm_vge.o evms_ecr.o md_core.o md_linear.o md_raid0.o \
+               md_raid1.o md_raid5.o md_xor.o s390_part.o gpt_part.o
+
+# Link order is important! Plugins must come first, then the EVMS core.
+
+obj-$(CONFIG_EVMS_LOCAL_DEV_MGR)	+= ldev_mgr.o
+obj-$(CONFIG_EVMS_DOS_SEGMENT_MGR)	+= dos_part.o
+obj-$(CONFIG_EVMS_GPT_SEGMENT_MGR)	+= gpt_part.o
+obj-$(CONFIG_EVMS_S390_SEGMENT_MGR)	+= s390_part.o
+obj-$(CONFIG_EVMS_MD)			+= md_core.o
+obj-$(CONFIG_EVMS_MD_LINEAR)		+= md_linear.o
+obj-$(CONFIG_EVMS_MD_RAID0)		+= md_raid0.o
+obj-$(CONFIG_EVMS_MD_RAID1)		+= md_raid1.o
+obj-$(CONFIG_EVMS_MD_RAID5)		+= md_raid5.o md_xor.o
+obj-$(CONFIG_EVMS_LVM)			+= lvm_vge.o
+obj-$(CONFIG_EVMS_AIX)			+= AIXlvm_vge.o
+obj-$(CONFIG_EVMS_OS2)			+= os2lvm_vge.o
+obj-$(CONFIG_EVMS_DRIVELINK)		+= evms_drivelink.o
+obj-$(CONFIG_EVMS_BBR)			+= evms_bbr.o
+obj-$(CONFIG_EVMS_SNAPSHOT)		+= snapshot.o
+obj-$(CONFIG_EVMS_ECR)			+= evms_ecr.o
+obj-$(CONFIG_EVMS)			+= evms_passthru.o evms.o
+
+EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DEFAULT
+ifeq ($(CONFIG_EVMS_INFO_CRITICAL),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_CRITICAL
+endif
+ifeq ($(CONFIG_EVMS_INFO_SERIOUS),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_SERIOUS
+endif
+ifeq ($(CONFIG_EVMS_INFO_ERROR),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_ERROR
+endif
+ifeq ($(CONFIG_EVMS_INFO_WARNING),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_WARNING
+endif
+ifeq ($(CONFIG_EVMS_INFO_DETAILS),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DETAILS
+endif
+ifeq ($(CONFIG_EVMS_INFO_DEBUG),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_DEBUG
+endif
+ifeq ($(CONFIG_EVMS_INFO_EXTRA),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_EXTRA
+endif
+ifeq ($(CONFIG_EVMS_INFO_ENTRY_EXIT),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_ENTRY_EXIT
+endif
+ifeq ($(CONFIG_EVMS_INFO_EVERYTHING),y)
+	EXTRA_CFLAGS=-DEVMS_INFO_LEVEL=EVMS_INFO_EVERYTHING
+endif
+
+include $(TOPDIR)/Rules.make
+
diff -Naur linux-2002-09-30/drivers/evms/dos_part.c evms-2002-09-30/drivers/evms/dos_part.c
--- linux-2002-09-30/drivers/evms/dos_part.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/dos_part.c	Fri Sep 13 16:09:55 2002
@@ -0,0 +1,1452 @@
+/* -*- linux-c -*- */
+/*
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+/*
+ * linux/drivers/evms/dos_part.c
+ *
+ * EVMS DOS partition manager
+ *
+ * Partial code extracted from
+ *
+ *  linux/fs/partitions/msdos.c
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+#include <linux/init.h>
+#include <linux/iobuf.h>	/* for kiobuf stuffs */
+
+#ifdef CONFIG_BLK_DEV_IDE
+#include <linux/ide.h>		/* IDE xlate */
+#endif				/* CONFIG_BLK_DEV_IDE */
+
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_os2.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+/* prefix used in logging messages */
+#define LOG_PREFIX "dos_part: "
+
+/* #include "msdos.h" */
+#define MSDOS_LABEL_MAGIC               0xAA55
+#define GPT_ENTIRE_DISK_INDICATOR       0xEE
+#define GPT_ESP_INDICATOR               0xEF
+
+/**
+ * struct mbr_ebr -  Skeletal MBR/EBR structure useful for our purposes
+ * @unused1: skip IPL record code
+ * @partitions: partition table
+ * @signature: DOS magic
+ *
+ * skeletal access to parition table in MBR/EBR
+ **/
+struct mbr_ebr {
+	u8 unused1[0x1be];
+	struct partition partitions[4];
+	u16 signature;
+};
+
+/**
+ * struct dos_private -  Private data structure for this plugin
+ * @source_object: object this IO will get remapped to
+ * @start_sect: source object relative starting address in 512 byte units
+ * @nr_sect: partition size in 512 bytes units
+ * @type: partition type or filesystem format indicator
+ * 
+ * private copy of the just the fields we require to remap IO requests
+ * to the underlying object.
+ **/
+struct dos_private {
+	struct evms_logical_node *source_disk;
+	u64 start_sect;
+	u64 nr_sects;
+	unsigned char type;
+};
+
+/**
+ * struct extended_part - Structure used to track progress traversing an EBR chain
+ * @extended: partition table in the extended boot record
+ * @start_sect: address of the extended boot record in 512 byte units
+ * @next_ebr_start: address of next ebr in the chain
+ * @done: progress flag
+ *
+ * struct used to track extended boot record chain traversals.
+ **/
+struct extended_part {
+	struct partition *extended;
+	u64 start_sect;
+	u64 next_ebr_start;
+	int done;
+};
+
+/* Global variables */
+static int cur_comp_part_num;	/* used to track non-primary
+				 * partition numbers
+				 */
+static int exported_nodes;	/* total # of exported segments
+				 * produced during this discovery.
+				 */
+
+/* External references */
+#if CONFIG_BLK_DEV_MD && CONFIG_AUTODETECT_RAID
+extern void md_autodetect_dev(kdev_t dev);
+#endif
+
+/* Prototypes */
+static int mbr_ebr_partition_discover(struct evms_logical_node **);
+static int mbr_ebr_partition_delete(struct evms_logical_node *);
+static void mbr_ebr_partition_read(struct evms_logical_node *,
+				   struct buffer_head *);
+static void mbr_ebr_partition_write(struct evms_logical_node *,
+				    struct buffer_head *);
+static int mbr_ebr_partition_ioctl(struct evms_logical_node *, struct inode *,
+				   struct file *, unsigned int, unsigned long);
+static int mbr_ebr_partition_init_io(struct evms_logical_node *,
+				     int, u64, u64, void *);
+
+static struct evms_plugin_fops fops = {
+	.discover	= mbr_ebr_partition_discover,
+	.delete		= mbr_ebr_partition_delete,
+	.read		= mbr_ebr_partition_read,
+	.write		= mbr_ebr_partition_write,
+	.init_io	= mbr_ebr_partition_init_io,
+	.ioctl		= mbr_ebr_partition_ioctl
+};
+
+#define EVMS_MSDOS_PARTITION_MANAGER_ID 1
+
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_SEGMENT_MANAGER,
+			  EVMS_MSDOS_PARTITION_MANAGER_ID),
+	.version = {
+		.major		= 1,
+		.minor		= 1,
+		.patchlevel	= 1
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &fops
+};
+
+/*
+ * Many architectures don't like unaligned accesses, which is
+ * frequently the case with the nr_sects and start_sect partition
+ * table entries.
+ */
+#include <asm/unaligned.h>
+
+#define SYS_IND(p)      (get_unaligned(&p->sys_ind))
+#define NR_SECTS(p)     (u64)({ __typeof__(p->nr_sects) __a =        \
+                                get_unaligned(&p->nr_sects);    \
+                                le32_to_cpu(__a); \
+                        })
+
+#define START_SECT(p)   (u64)({ __typeof__(p->start_sect) __a =      \
+                                get_unaligned(&p->start_sect);  \
+                                le32_to_cpu(__a); \
+                        })
+
+/******************************************/
+/* List Support -  Variables, & Functions */
+/******************************************/
+
+/* Typedefs */
+
+struct segment_list_node {
+	struct evms_logical_node *segment;
+	struct segment_list_node *next;
+};
+
+struct disk_list_node {
+	struct evms_logical_node *disk;
+	struct segment_list_node *segment_list;
+	struct disk_list_node *next;
+};
+
+/* Variables */
+
+static struct disk_list_node *my_disk_list;
+
+/* Functions */
+
+static struct disk_list_node **
+lookup_disk(struct evms_logical_node *disk)
+{
+	struct disk_list_node **ldln;
+
+	ldln = &my_disk_list;
+	while (*ldln) {
+		if ((*ldln)->disk == disk)
+			break;
+		ldln = &(*ldln)->next;
+	}
+	return (ldln);
+}
+
+static struct segment_list_node **
+lookup_segment(struct disk_list_node *disk, struct evms_logical_node *segment)
+{
+	struct segment_list_node **lsln;
+
+	lsln = &disk->segment_list;
+	while (*lsln) {
+		if ((*lsln)->segment == segment)
+			break;
+		lsln = &(*lsln)->next;
+	}
+	return (lsln);
+}
+
+static struct evms_logical_node *
+find_segment_on_disk(struct evms_logical_node *disk,
+		     u64 start_sect, u64 nr_sects)
+{
+	struct evms_logical_node *rc = NULL;
+	struct disk_list_node **ldln;
+	struct segment_list_node **lsln;
+	struct dos_private *dos_prv;
+
+	ldln = lookup_disk(disk);
+	if (*ldln) {
+		/* disk found in list */
+		/* attempt to find segment */
+
+		lsln = &(*ldln)->segment_list;
+		while (*lsln) {
+			dos_prv = (*lsln)->segment->private;
+			if (dos_prv->start_sect == start_sect)
+				if (dos_prv->nr_sects == nr_sects)
+					break;
+			lsln = &(*lsln)->next;
+		}
+		if (*lsln)
+			rc = (*lsln)->segment;
+	}
+	return (rc);
+}
+
+/* function description: add_segment_to_disk
+ *
+ * this function attempts to add a segment to the segment
+ * list of a disk. if the specified disk is not found, it
+ * will be added to the global disk list. this function will
+ * return a pointer to the matching segment in the disk's
+ * segment list. the caller must compare the returned pointer
+ * to the specified segment to see if the
+ * specified segment was already present in the disk's segment
+ * list. if the return pointer matches the specified segment,
+ * then the specified segment was added to the list. if the
+ * return segment pointer to does not match the specified
+ * segment pointer, then the specified segment pointer was
+ * a duplicate and can be thrown away.
+ */
+static int
+add_segment_to_disk(struct evms_logical_node *disk,
+		    struct evms_logical_node *segment)
+{
+	int rc = 0;
+	struct disk_list_node **ldln, *new_disk;
+	struct segment_list_node **lsln, *new_segment;
+
+	ldln = lookup_disk(disk);
+	if (*ldln == NULL) {
+		/* disk not in list, add disk */
+		new_disk = kmalloc(sizeof (*new_disk), GFP_KERNEL);
+		if (new_disk) {
+			memset(new_disk, 0, sizeof (*new_disk));
+			new_disk->disk = disk;
+			*ldln = new_disk;
+		} else {
+			rc = -ENOMEM;
+		}
+	}
+	if (!rc) {
+		/* attempt to add segment */
+		lsln = lookup_segment(*ldln, segment);
+		if (*lsln == NULL) {
+			/* segment not in list, add segment */
+			new_segment =
+			    kmalloc(sizeof (*new_segment), GFP_KERNEL);
+			if (new_segment) {
+				memset(new_segment, 0, sizeof (*new_segment));
+				new_segment->segment = segment;
+				*lsln = new_segment;
+			} else {
+				rc = -ENOMEM;
+			}
+		} else
+			rc = -1;
+	}
+	return (rc);
+}
+
+static int
+remove_segment_from_disk(struct evms_logical_node *disk,
+			 struct evms_logical_node *segment,
+			 struct evms_logical_node **empty_disk)
+{
+	int rc = 0;
+	struct disk_list_node **ldln, *tmp_disk_node;
+	struct segment_list_node **lsln, *tmp_segment_node;
+
+	*empty_disk = NULL;
+	ldln = lookup_disk(disk);
+	if (*ldln == NULL) {
+		rc = -1;
+	} else {
+		/* disk found in list */
+		/* attempt to add segment */
+		lsln = lookup_segment(*ldln, segment);
+		if (*lsln == NULL) {
+			rc = -2;
+		} else {
+			tmp_segment_node = *lsln;
+			/* remove segment from list */
+			*lsln = (*lsln)->next;
+			/* free the segment list node */
+			kfree(tmp_segment_node);
+
+			if ((*ldln)->segment_list == NULL) {
+				tmp_disk_node = *ldln;
+				*empty_disk = tmp_disk_node->disk;
+				/* remove disk from list */
+				*ldln = (*ldln)->next;
+				/* free the disk list node */
+				kfree(tmp_disk_node);
+			}
+		}
+	}
+	return (rc);
+}
+
+static inline int
+is_extended_partition(struct partition *p)
+{
+	return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
+		SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
+		SYS_IND(p) == LINUX_EXTENDED_PARTITION);
+}
+
+static inline u64
+part_start(struct partition *part, u64 ext_start, u64 ebr_start)
+{
+	u64 pstart = START_SECT(part);
+	pstart += (is_extended_partition(part)) ? ext_start : ebr_start;
+	return (pstart);
+}
+
+static int
+validate_mbr_ebr(struct evms_logical_node *node,
+		 struct mbr_ebr *mbr_ebr, u64 ext_start,
+		 u64 ebr_start)
+{
+	int valid_mbr_ebr, i, j, mbr_flag;
+	struct partition *pi, *pj;
+	u64 pi_start, pi_end, pj_start, pj_end;
+
+	/* assume an MBR */
+	mbr_flag = TRUE;
+
+	/* assume its valid */
+	valid_mbr_ebr = TRUE;
+
+	/* check for valid signature */
+	if (mbr_ebr->signature != cpu_to_le16(MSDOS_LABEL_MAGIC)) {
+		LOG_DEBUG("%s: invalid signature on '%s'!\n",
+			  __FUNCTION__, node->name);
+		valid_mbr_ebr = FALSE;
+	}
+
+	/* check for an AIX IPL signature */
+#define IPLRECID 0xc9c2d4c1	/* Value is EBCIDIC 'IBMA'           */
+	if (*(unsigned int *) mbr_ebr == IPLRECID) {
+		LOG_DEBUG("%s: found an AIX IPL signature on '%s'\n",
+			  __FUNCTION__, node->name);
+		valid_mbr_ebr = FALSE;
+	}
+
+	/* check for boot sector fields */
+
+#if 0				//Remove checking of the first byte
+
+	/* attempt to make some initial assumptions about
+	 * what type of data structure this could be. we
+	 * start by checking the 1st byte. we can tell a
+	 * few things based on what is or isn't there.
+	 */
+	if (valid_mbr_ebr == TRUE)
+		switch (*(u_char *) mbr_ebr) {
+			/* check for JMP as 1st instruction
+			 * if found, assume (for now), that
+			 * this is a boot sector.
+			 */
+			/* Removed the JMP opcode check because it's not enough to determine
+			 * that this sector does not have a valid MBR.
+			 * Note:  To avoid going thru validation process of partition table,
+			 * it's necessary to have a better boot sector check
+			 * (eg. JMP opcode && other conditions) */
+			/*
+			   case 0xEB:
+			   LOG_DEBUG("%s: boot sector detected!\n", __FUNCTION__);
+			   valid_mbr_ebr = FALSE;
+			 */
+			/* let this fall thru to pick up the
+			 * mbr_flag == FALSE.
+			 */
+
+			/* the MBR should contain boot strap
+			 * code, so we don't expect the 1st
+			 * byte to be a 0x0. If the 1st byte
+			 * IS 0x0, its assumed (for now) to
+			 * be an EBR.
+			 */
+		case 0:
+			mbr_flag = FALSE;
+			break;
+		}
+#endif				//Remove checking of the first byte
+
+	if (valid_mbr_ebr == TRUE) {
+		/* dump the partition table entries in debug mode */
+		LOG_DEBUG
+		    ("%s: disk relative starts: ext_part("PFU64"), ebr("PFU64").\n",
+		     __FUNCTION__, ext_start, ebr_start);
+		for (i = 0; i < 4; i++) {
+			pi = &mbr_ebr->partitions[i];
+			LOG_DEBUG
+			    ("%s: Partition: index(%d), start("PFU64"), size("PFU64"), sys(0x%x).\n",
+			     __FUNCTION__, i, START_SECT(pi), NR_SECTS(pi),
+			     SYS_IND(pi));
+		}
+
+		/* check for PMBR (Protected Master Boot Record)
+		 * and skip this node if found
+		 */
+		for (i = 0; i < 4; i++) {
+			pi = &mbr_ebr->partitions[i];
+
+			if (SYS_IND(pi) == 0xEE) {
+				valid_mbr_ebr = FALSE;
+				LOG_DETAILS
+				    ("%s: detected PMBR on '%s', skipping.\n",
+				     __FUNCTION__, node->name);
+				break;
+			}
+		}
+
+		/* check of this segment is marked as non-dividable
+		 * and skip if found
+		 */
+		if (node->iflags & EVMS_TOP_SEGMENT) {
+			valid_mbr_ebr = FALSE;
+		}
+	}
+
+	if (valid_mbr_ebr == TRUE) {
+		/* check for mbr/ebr partition table validity */
+		for (i = 0; i < 4; i++) {
+			pi = &mbr_ebr->partitions[i];
+			if (NR_SECTS(pi)) {
+				/* check for partition extending past end of node */
+				pi_start = part_start(pi, ext_start, ebr_start);
+				pi_end = pi_start + NR_SECTS(pi) - 1;
+				if (pi_end >= node->total_vsectors) {
+					LOG_DEBUG
+					    ("%s: partition(%d) ends("PFU64") beyond the end of the disk(%s,"PFU64")!\n",
+					     __FUNCTION__, i, pi_end,
+					     node->name, node->total_vsectors);
+					valid_mbr_ebr = FALSE;
+				}
+				if (valid_mbr_ebr == FALSE)
+					break;
+
+				/* check for partition overlap */
+				for (j = i + 1; j < 4; j++) {
+					pj = &mbr_ebr->partitions[j];
+					if (NR_SECTS(pj)) {
+						pj_start =
+						    part_start(pj, ext_start,
+							       ebr_start);
+						pj_end =
+						    pj_start + NR_SECTS(pj) - 1;
+						if (pi_start == pj_start) {
+							valid_mbr_ebr = FALSE;
+						} else if (pi_start < pj_start) {
+							if (pi_end >= pj_start)
+								valid_mbr_ebr =
+								    FALSE;
+						} else if (pi_start <= pj_end)
+							valid_mbr_ebr = FALSE;
+
+						if (valid_mbr_ebr == FALSE) {
+							LOG_DEBUG
+							    ("%s: overlapping partitions(%d,%d) detected on '%s'!\n",
+							     __FUNCTION__, i, j,
+							     node->name);
+							break;
+						}
+					}
+				}
+				if (valid_mbr_ebr == FALSE)
+					break;
+			}
+		}
+	}
+	if (valid_mbr_ebr == TRUE) {
+		LOG_DEBUG("%s: valid %cBR detected on '%s'!\n", __FUNCTION__,
+			  (mbr_flag == TRUE) ? 'M' : 'E', node->name);
+	} else {
+		LOG_DEBUG("%s: no valid MBR/EBR detected on '%s'!\n",
+			  __FUNCTION__, node->name);
+	}
+	return (valid_mbr_ebr);
+}
+
+/*
+ * Function:  add_segment
+ */
+static int
+mbr_ebr_process_segment(struct evms_logical_node **discover_list,
+			struct evms_logical_node *node,
+			u64 start_sect,
+			u64 nr_sects,
+			unsigned char type, int part_num, char *partition_name)
+{
+	struct dos_private *dos_prv = NULL;
+	struct evms_logical_node *segment;
+	int rc = 0;
+
+	segment = find_segment_on_disk(node, start_sect, nr_sects);
+	if (segment) {
+		LOG_DETAILS("exporting segment '%s'.\n", segment->name);
+	} else {
+		dos_prv = kmalloc(sizeof (*dos_prv), GFP_KERNEL);
+		if (dos_prv) {
+			memset(dos_prv, 0, sizeof (*dos_prv));
+			dos_prv->source_disk = node;
+			dos_prv->start_sect = start_sect;
+			dos_prv->nr_sects = nr_sects;
+			dos_prv->type = type;
+			rc = evms_cs_allocate_logical_node(&segment);
+		} else {
+			rc = -ENOMEM;
+		}
+		if (!rc) {
+			segment->plugin = &plugin_header;
+			segment->system_id = (unsigned int) type;
+			segment->total_vsectors = nr_sects;
+			segment->block_size = node->block_size;
+			segment->hardsector_size = node->hardsector_size;
+			segment->private = dos_prv;
+			segment->flags = node->flags;
+			if (partition_name)
+				strcpy(segment->name, partition_name);
+			else {
+				strcpy(segment->name, node->name);
+				if (GetPluginType(node->plugin->id) ==
+				    EVMS_SEGMENT_MANAGER) {
+					strcat(segment->name, ".");
+				}
+				sprintf(segment->name + strlen(segment->name),
+					"%d", part_num);
+			}
+			/* watch for super floppy format gpt system partition
+			 * and dont let it be sub divided
+			 */
+			if (segment->system_id == GPT_ESP_INDICATOR) {
+				node->iflags |= EVMS_TOP_SEGMENT;
+			}
+			LOG_DETAILS("creating segment '%s'.\n", segment->name);
+			rc = add_segment_to_disk(node, segment);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) adding segment '%s'!\n",
+				     __FUNCTION__, rc, segment->name);
+				rc = 0;
+			} else {
+				MOD_INC_USE_COUNT;
+			}
+		}
+		if (rc) {
+			if (dos_prv)
+				kfree(dos_prv);
+			if (segment)
+				evms_cs_deallocate_logical_node(segment);
+		}
+	}
+	if (!rc) {
+		evms_cs_add_logical_node_to_list(discover_list, segment);
+		exported_nodes++;
+	}
+	return rc;
+}
+
+static void
+print_partition_info(char *leading_comment, struct partition *p)
+{
+	LOG_EXTRA
+	    ("%s: boot_ind(0x%02x), sys_ind(0x%02x), startCHS(%u,%u,%u), endCHS(%u,%u,%u), startLBA("PFU64"), sizeLBA("PFU64")\n",
+	     leading_comment, p->boot_ind, p->sys_ind, p->cyl, p->head,
+	     p->sector, p->end_cyl, p->end_head, p->end_sector, START_SECT(p),
+	     NR_SECTS(p));
+}
+
+#ifdef CONFIG_BSD_DISKLABEL
+#define BSD_DISKLABEL_PART_TABLE_SECTOR_OFFSET 1
+static void
+print_bsd_partition_info(char *leading_comment, struct bsd_partition *p)
+{
+	LOG_EXTRA
+	    ("%s: p_size(%u), p_offset(%u), p_fsize(%u), p_fstype(0x%02X), p_frag(0x%02X), p_cpg(%u)\n",
+	     leading_comment, p->p_size, p->p_offset, p->p_fsize, p->p_fstype,
+	     p->p_frag, p->p_cpg);
+}
+
+/*
+ * bsd_disklabel_partition
+ *
+ * Return:
+ *  - 0 for 0 partition
+ *  - (positive) number for number of BSD partitions found
+ *  - (negative) error code
+ */
+static int
+bsd_disklabel_partition(struct evms_logical_node **discover_list,
+			struct evms_logical_node *node, struct partition *bsd)
+{
+	struct bsd_disklabel *l;
+	struct bsd_partition *p;
+	int max_partitions;
+	char *data;
+	int rc = 0;
+	int count = 0;
+
+	data = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (data)
+		rc = INIT_IO(node,
+			     0,
+			     START_SECT(bsd) +
+			     BSD_DISKLABEL_PART_TABLE_SECTOR_OFFSET, 1, data);
+	else
+		rc = -ENOMEM;
+	if (!rc) {
+
+		l = (struct bsd_disklabel *) data;
+		if (l->d_magic == BSD_DISKMAGIC) {
+
+			max_partitions =
+			    ((SYS_IND(bsd) ==
+			      OPENBSD_PARTITION) ? OPENBSD_MAXPARTITIONS :
+			     BSD_MAXPARTITIONS);
+			if (l->d_npartitions < max_partitions)
+				max_partitions = l->d_npartitions;
+			for (p = l->d_partitions;
+			     p - l->d_partitions < max_partitions; p++) {
+				if (p->p_fstype != BSD_FS_UNUSED) {
+					evmsLOG2(EVMS_INFO_EXTRA,
+						 (print_bsd_partition_info
+						  (__FUNCTION__, p)));
+					rc = mbr_ebr_process_segment
+					    (discover_list, node,
+					     (u64) p->p_offset,
+					     (u64) p->p_size, p->p_fstype,
+					     cur_comp_part_num++, NULL);
+					if (rc)
+						break;
+					count++;
+				}
+			}
+		}
+	}
+	if (data)
+		kfree(data);
+	if (!rc)
+		rc = count;
+	LOG_DETAILS("%s: exported (%d) partitions\n", __FUNCTION__, rc);
+	return rc;
+}
+#endif
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+#define UNIXWARE_PART_TABLE_SECTOR_OFFSET 29
+
+/*
+ * unixware_partition
+ *
+ * Return:
+ *  - 0 for 0 partition
+ *  - (positive) number for number of UNIXWARE partitions found
+ *  - (negative) error code
+ */
+static int
+unixware_partition(struct evms_logical_node **discover_list,
+		   struct evms_logical_node *node,
+		   struct partition *unixware_part)
+{
+	struct unixware_disklabel *l;
+	struct unixware_slice *p;
+	char *data = NULL;
+	int rc = 0;
+	int count = 0;
+
+	data = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (data)
+		rc = INIT_IO(node,
+			     0,
+			     START_SECT(unixware_part) +
+			     UNIXWARE_PART_TABLE_SECTOR_OFFSET, 1, data);
+	else
+		rc = -ENOMEM;
+	if (!rc) {
+		l = (struct unixware_disklabel *) data;
+		if (le32_to_cpu(l->d_magic) == UNIXWARE_DISKMAGIC &&
+		    le32_to_cpu(l->vtoc.v_magic) == UNIXWARE_DISKMAGIC2) {
+			p = &l->vtoc.v_slice[1];	/* The 0th slice is the same as whole disk. */
+			while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
+				if (p->s_label != UNIXWARE_FS_UNUSED) {
+					rc = mbr_ebr_process_segment
+					    (discover_list, node, START_SECT(p),
+					     NR_SECTS(p), UNIXWARE_PARTITION,
+					     cur_comp_part_num++, NULL);
+					if (rc)
+						break;
+					count++;
+				}
+				p++;
+			}
+		}
+	}
+	if (data)
+		kfree(data);
+	if (!rc)
+		rc = count;
+	LOG_DETAILS("%s: exported (%d) partitions\n", __FUNCTION__, rc);
+	return rc;
+}
+#endif
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+#define SOLARIS_X86_PART_TABLE_SECTOR_OFFSET 1
+/*
+ * solaris_x86_partition
+ *
+ * Return:
+ *  - 0 for 0 partition
+ *  - (positive) number for number of solaris partitions found
+ *  - (negative) error code
+ */
+static int
+solaris_x86_partition(struct evms_logical_node **discover_list,
+		      struct evms_logical_node *node,
+		      struct partition *solaris_x86, int probe_only)
+{				/* if TRUE, do not add segments */
+	long offset = START_SECT(solaris_x86);
+	struct solaris_x86_vtoc *v;
+	struct solaris_x86_slice *s;
+	int i;
+	char *data = NULL;
+	int rc = 0;
+	int count = 0;
+
+	data = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (data)
+		rc = INIT_IO(node,
+			     0,
+			     START_SECT(solaris_x86) +
+			     SOLARIS_X86_PART_TABLE_SECTOR_OFFSET, 1, data);
+	else
+		rc = -ENOMEM;
+	if (!rc) {
+
+		v = (struct solaris_x86_vtoc *) data;
+
+		if (v->v_sanity == SOLARIS_X86_VTOC_SANE) {
+			if (v->v_version != 1) {
+				LOG_WARNING
+				    ("%s: cannot handle version %d vtoc>\n",
+				     __FUNCTION__, v->v_version);
+			} else {
+				for (i = 0; i < v->v_nparts; i++) {
+					s = &v->v_slice[i];
+					LOG_EXTRA
+					    ("s[%d] s_tag(%u), s_flag(%u), s_start(%u), s_size(%u), last_sector(%u)\n",
+					     i, s->s_tag, s->s_flag, s->s_start,
+					     s->s_size,
+					     s->s_start + s->s_size - 1);
+
+					if ((s->s_size == 0)
+					    || (s->s_tag == 0x05))
+						continue;
+					if (!probe_only) {
+						rc = mbr_ebr_process_segment
+						    (discover_list, node,
+						     (u64) (s->s_start +
+								  offset),
+						     (u64) s->s_size,
+						     SOLARIS_X86_PARTITION,
+						     cur_comp_part_num++, NULL);
+						if (rc)
+							break;
+					}
+					count++;
+				}
+			}
+		}
+	}
+	if (data)
+		kfree(data);
+	if (!rc)
+		rc = count;
+	LOG_DETAILS("%s: %s (%d) partitions\n",
+		    __FUNCTION__, probe_only ? " " : "exported", rc);
+	return rc;
+}
+#endif
+
+/*
+ * os2lvm_partition() looks for DLAT at last sector of the track containing MBR/EBR
+ *
+ * Returns:     1 - os2 DLAT was found
+ *              0 otherwise
+ *
+ */
+static int
+os2lvm_partition(u64 MBR_EBR_sect,
+		 struct evms_logical_node *node, struct dla_table_sector *dlat)
+{
+	struct hd_geometry geometry;
+	int rc;
+	u32 crc_hold;
+
+	rc = evms_cs_kernel_ioctl(node, HDIO_GETGEO, (unsigned long) &geometry);
+	if (rc) {
+		LOG_SERIOUS("%s: ioctl failed(%u) on '%s'\n",
+			    __FUNCTION__, rc, node->name);
+	} else
+	    if (!INIT_IO(node, 0, MBR_EBR_sect + geometry.sectors - 1, 1, dlat))
+	{
+		if ((dlat->DLA_Signature1 == cpu_to_le32(DLA_TABLE_SIGNATURE1))
+		    && (dlat->DLA_Signature2 ==
+			cpu_to_le32(DLA_TABLE_SIGNATURE2))) {
+			crc_hold = le32_to_cpu(dlat->DLA_CRC);
+			dlat->DLA_CRC = 0;
+			if (evms_cs_calculate_crc
+			    (EVMS_INITIAL_CRC, (void *) dlat,
+			     node->hardsector_size) == crc_hold)
+				return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+mbr_ebr_process_logical_drive(struct evms_logical_node **discover_list,
+			      struct evms_logical_node *node,
+			      struct extended_part *ext_info,
+			      int i,
+			      struct partition *p,
+			      int os2lvm, struct dla_table_sector *dlat)
+{
+	int rc = 0;
+	char tmp_buf[EVMS_VOLUME_NAME_SIZE], *partition_name;
+
+	LOG_EXTRA("%s: PartitionTableIndex(%i), Start("PFU64"), Size("PFU64")\n",
+		  __FUNCTION__, i, START_SECT(p), NR_SECTS(p));
+
+	if (NR_SECTS(p)) {
+		if (is_extended_partition(p)) {
+			ext_info->next_ebr_start =
+			    (u64) (START_SECT(p) +
+					 START_SECT(ext_info->extended));
+			ext_info->done = FALSE;	/* not done yet */
+		} else {
+			partition_name = NULL;
+			if (os2lvm && p->sys_ind != LVM_PARTITION_INDICATOR &&
+			    le32_to_cpu(dlat->DLA_Array[i].Partition_Start) ==
+			    (ext_info->start_sect + START_SECT(p))
+			    && le32_to_cpu(dlat->DLA_Array[i].Partition_Size) ==
+			    NR_SECTS(p)
+			    && dlat->DLA_Array[i].Drive_Letter != '\0') {
+				sprintf(tmp_buf, "os2/%c",
+					dlat->DLA_Array[i].Drive_Letter);
+				partition_name = tmp_buf;
+			}
+			evmsLOG2(EVMS_INFO_EXTRA,
+				 (print_partition_info(__FUNCTION__, p)));
+
+			rc = mbr_ebr_process_segment(discover_list,
+						     node,
+						     ext_info->start_sect +
+						     START_SECT(p), NR_SECTS(p),
+						     p->sys_ind,
+						     cur_comp_part_num++,
+						     partition_name);
+		}
+	}
+	return (rc);
+}
+
+static int
+mbr_ebr_process_ebr(struct evms_logical_node **discover_list,
+		    struct evms_logical_node *node,
+		    struct extended_part *ext_info, struct mbr_ebr *ebr)
+{
+	int rc = 0, i, os2lvm;
+	struct partition *p;
+	struct dla_table_sector *dlat = NULL;
+
+	/* allocate space for the OS2 DLAT info */
+	dlat = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (dlat) {
+		/* read the dlat for this mbr */
+		os2lvm = os2lvm_partition(ext_info->start_sect, node, dlat);
+
+		/* walk thru the partition table in the mbr
+		 * processing each partition record.
+		 */
+		for (i = 0; i < 4; i++) {
+			p = &ebr->partitions[i];
+			rc = mbr_ebr_process_logical_drive(discover_list,
+							   node,
+							   ext_info,
+							   i, p, os2lvm, dlat);
+		}
+	} else {
+		rc = -ENOMEM;
+	}
+
+	/* free the space used for OS2 DLAT info */
+	if (dlat)
+		kfree(dlat);
+
+	return (rc);
+}
+
+static int
+mbr_ebr_probe_for_ebr(struct evms_logical_node **discover_list,
+		      struct evms_logical_node *node,
+		      struct extended_part *ext_info)
+{
+	int rc = 0;
+	u_char *sector_buffer = NULL;
+	struct mbr_ebr *ebr = NULL;
+
+	/* allocate a sector size buffer */
+	sector_buffer = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (sector_buffer)
+		/* read the location of the mbr sector */
+		rc = INIT_IO(node, 0, ext_info->start_sect, 1, sector_buffer);
+	else
+		rc = -ENOMEM;
+
+	if (!rc) {
+		ebr = (struct mbr_ebr *) sector_buffer;
+		if (validate_mbr_ebr(node, ebr,
+				     START_SECT(ext_info->extended),
+				     ext_info->start_sect) == TRUE)
+			rc = mbr_ebr_process_ebr(discover_list,
+						 node, ext_info, ebr);
+	}
+
+	if (sector_buffer)
+		kfree(sector_buffer);
+
+	return (rc);
+}
+
+static int
+mbr_ebr_process_extended_partition(struct evms_logical_node **discover_list,
+				   struct evms_logical_node *node,
+				   struct partition *p)
+{
+	int rc = 0;
+	struct extended_part ext_info;
+
+	memset(&ext_info, 0, sizeof (ext_info));
+	ext_info.done = FALSE;
+	ext_info.extended = p;
+	ext_info.next_ebr_start = START_SECT(p);
+	while (ext_info.done == FALSE) {
+		ext_info.done = TRUE;	/* assume done, unless we find another EBR */
+		ext_info.start_sect = ext_info.next_ebr_start;
+		rc = mbr_ebr_probe_for_ebr(discover_list, node, &ext_info);
+	}
+	return rc;
+}
+
+/*
+ * is_non_dos_extended
+ *
+ * This function returns TRUE if the partition entry represents a non-DOS
+ * extended partition such as UnixWare, Solaris x86 and BSD
+ */
+static int
+is_non_dos_extended(struct evms_logical_node **discover_list,
+		    struct evms_logical_node *node, struct partition *p)
+{
+	if (NR_SECTS(p)) {
+#ifdef CONFIG_BSD_DISKLABEL
+		if (SYS_IND(p) == BSD_PARTITION ||
+		    SYS_IND(p) == NETBSD_PARTITION ||
+		    SYS_IND(p) == OPENBSD_PARTITION)
+			return TRUE;
+#endif
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+		if (SYS_IND(p) == UNIXWARE_PARTITION)
+			return TRUE;
+#endif
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+		if ((SYS_IND(p) == SOLARIS_X86_PARTITION) &&
+		    (solaris_x86_partition(discover_list, node, p, TRUE) > 0))
+			return TRUE;
+#endif
+	}
+	return (FALSE);
+}
+
+/*
+ * mbr_ebr_process_other_primary_partition
+ * This function processes other (non-DOS) primary partitions such as
+ * UnixWare, Solaris x86 and BSD
+ */
+static int
+mbr_ebr_process_other_primary_partition(struct evms_logical_node
+					**discover_list,
+					struct evms_logical_node *node,
+					struct partition *p)
+{
+	if (NR_SECTS(p)) {
+#ifdef CONFIG_BSD_DISKLABEL
+		if (SYS_IND(p) == BSD_PARTITION ||
+		    SYS_IND(p) == NETBSD_PARTITION ||
+		    SYS_IND(p) == OPENBSD_PARTITION)
+			return bsd_disklabel_partition(discover_list, node, p);
+#endif
+
+#ifdef CONFIG_UNIXWARE_DISKLABEL
+		if (SYS_IND(p) == UNIXWARE_PARTITION)
+			return unixware_partition(discover_list, node, p);
+#endif
+
+#ifdef CONFIG_SOLARIS_X86_PARTITION
+		if (SYS_IND(p) == SOLARIS_X86_PARTITION)
+			return solaris_x86_partition(discover_list, node, p,
+						     FALSE);
+#endif
+	}
+	return (0);
+}
+
+static int
+mbr_ebr_process_dos_primary_partition(struct evms_logical_node **discover_list,
+				      struct evms_logical_node *node,
+				      int i,
+				      struct partition *p,
+				      int os2lvm, struct dla_table_sector *dlat)
+{
+	int rc = 0;
+	char tmp_buf[EVMS_VOLUME_NAME_SIZE], *partition_name;
+
+	LOG_EVERYTHING("%s: PartitionTableIndex(%i), Start("PFU64"), Size("PFU64")\n",
+		       __FUNCTION__, i, START_SECT(p), NR_SECTS(p));
+
+	if (NR_SECTS(p)) {
+
+		if (is_extended_partition(p))
+			rc = mbr_ebr_process_extended_partition(discover_list,
+								node, p);
+
+		else {
+			partition_name = NULL;
+			if (os2lvm && p->sys_ind != LVM_PARTITION_INDICATOR &&
+			    le32_to_cpu(dlat->DLA_Array[i].Partition_Start) ==
+			    START_SECT(p)
+			    && le32_to_cpu(dlat->DLA_Array[i].Partition_Size) ==
+			    NR_SECTS(p)
+			    && dlat->DLA_Array[i].Drive_Letter != '\0') {
+				sprintf(tmp_buf, "os2/%c",
+					dlat->DLA_Array[i].Drive_Letter);
+				partition_name = tmp_buf;
+			}
+			evmsLOG2(EVMS_INFO_EXTRA,
+				 (print_partition_info(__FUNCTION__, p)));
+
+			rc = mbr_ebr_process_segment(discover_list,
+						     node,
+						     START_SECT(p),
+						     NR_SECTS(p),
+						     p->sys_ind,
+						     i + 1, partition_name);
+		}
+	}
+	return (rc);
+}
+
+static int
+mbr_ebr_process_mbr(struct evms_logical_node **discover_list,
+		    struct evms_logical_node *node, struct mbr_ebr *mbr)
+{
+	int rc = 0, i, os2lvm;
+	struct partition *p;
+	struct dla_table_sector *dlat = NULL;
+
+	cur_comp_part_num = 5;	/* set this value for each disk */
+
+	/* allocate space for the OS2 DLAT info */
+	dlat = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (dlat) {
+		/* read the dlat for this mbr */
+		os2lvm = os2lvm_partition(0, node, dlat);
+
+		/* Pass 1: walk thru the partition table in the mbr
+		 * processing each partition record.
+		 */
+		for (i = 0; i < 4; i++) {
+			p = &mbr->partitions[i];
+			if (is_non_dos_extended(discover_list, node, p)) {
+				LOG_DETAILS
+				    (" Found and skip a non-dos extended partition.\n");
+				continue;
+			}
+
+			mbr_ebr_process_dos_primary_partition(discover_list,
+							      node,
+							      i,
+							      p, os2lvm, dlat);
+		}
+
+		/* Pass 2: walk thru the partition table in the mbr
+		 * processing each partition record for non-DOS extended partitions
+		 */
+		for (i = 0; i < 4; i++) {
+			p = &mbr->partitions[i];
+			mbr_ebr_process_other_primary_partition(discover_list,
+								node, p);
+		}
+
+	} else {
+		rc = -ENOMEM;
+	}
+
+	/* free the space used for OS2 DLAT info */
+	if (dlat)
+		kfree(dlat);
+
+	return (rc);
+}
+
+static int
+mbr_ebr_probe_for_mbr(struct evms_logical_node **discover_list,
+		      struct evms_logical_node *node)
+{
+	int rc = 0;
+	u_char *sector_buffer = NULL;
+	struct mbr_ebr *mbr = NULL;
+
+	LOG_DEBUG("%s: probing (%s).\n", __FUNCTION__, node->name);
+
+	/* allocate a sector size buffer */
+	sector_buffer = kmalloc(node->hardsector_size, GFP_KERNEL);
+	if (sector_buffer)
+		/* read the location of the mbr sector */
+		rc = INIT_IO(node, 0, 0, 1, sector_buffer);
+	else
+		rc = -ENOMEM;
+	if (rc) {
+		LOG_ERROR("%s: read error(%d) on '%s'.\n",
+			  __FUNCTION__, rc, node->name);
+	} else {
+		mbr = (struct mbr_ebr *) sector_buffer;
+		if (validate_mbr_ebr(node, mbr, 0, 0) == TRUE) {
+			/* since it looks like this disk has a
+			 * valid MBR, remove the disk node from
+			 * the discover list. it may already be
+			 * on the global list, or it will be
+			 * added to it. in the case of an mbr
+			 * with no partitions, it is simply
+			 * removed and forgotten. when one or
+			 * more partitions are created, the
+			 * disk will be examined and handled
+			 * properly during the following
+			 * rediscover operation.
+			 */
+			evms_cs_remove_logical_node_from_list(discover_list,
+							      node);
+
+			rc = mbr_ebr_process_mbr(discover_list, node, mbr);
+		}
+	}
+
+	if (sector_buffer)
+		kfree(sector_buffer);
+
+	return (rc);
+}
+
+/*
+ * Function: mbr_ebr_partition_discover
+ *
+ */
+static int
+mbr_ebr_partition_discover(struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+	struct evms_logical_node *node, *next_node;
+
+	MOD_INC_USE_COUNT;
+	LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__);
+
+	/* initialize global variable */
+	exported_nodes = 0;
+
+	/* examine each node on the discover list */
+	next_node = *discover_list;
+	while (next_node) {
+		node = next_node;
+		next_node = node->next;
+		if (node->plugin->id == plugin_header.id)
+			/* don't recurse into our own objects
+			 */
+			continue;
+		mbr_ebr_probe_for_mbr(discover_list, node);
+	}
+
+	LOG_ENTRY_EXIT("%s: EXIT(exported nodes:%d, error code:%d)\n",
+		       __FUNCTION__, exported_nodes, rc);
+	if (exported_nodes)
+		rc = exported_nodes;
+	MOD_DEC_USE_COUNT;
+	return (rc);
+}
+
+/*
+ * Function: mbr_ebr_partition_delete
+ *
+ */
+static int
+mbr_ebr_partition_delete(struct evms_logical_node *segment)
+{
+	int rc = 0;
+	struct dos_private *dos_prv;
+	struct evms_logical_node *empty_disk = NULL;
+
+	LOG_DETAILS("deleting segment '%s'.\n", segment->name);
+
+	if (!segment) {
+		rc = -ENODEV;
+	} else {
+		dos_prv = segment->private;
+		if (dos_prv) {
+			/* remove the segment from the
+			 * disk's segment list
+			 */
+			rc = remove_segment_from_disk(dos_prv->source_disk,
+						      segment, &empty_disk);
+			/* free the local instance data */
+			kfree(dos_prv);
+		}
+		/* free the segment node */
+		evms_cs_deallocate_logical_node(segment);
+		MOD_DEC_USE_COUNT;
+		/* if the last segment on the disk was
+		 * deleted, delete the disk node too
+		 */
+		if (empty_disk)
+			DELETE(empty_disk);
+	}
+	return (rc);
+}
+
+/*
+ * function: mbr_ebr_partition_io_error
+ *
+ * this function was primarily created because the function
+ * buffer_IO_error is inline and kgdb doesn't allow breakpoints
+ * to be set on inline functions. Since this was an error path
+ * and not mainline, I decided to add a trace statement to help
+ * report on the failing condition.
+ *
+ */
+static void
+mbr_ebr_partition_io_error(struct evms_logical_node *node,
+			   int io_flag, struct buffer_head *bh)
+{
+	LOG_SERIOUS
+	    ("attempt to %s beyond partition boundary("PFU64") on (%s), rsector("PFU64").\n",
+	     (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1, node->name,
+	     (u64) bh->b_rsector);
+
+	bh->b_end_io(bh, 0);
+}
+
+/*
+ * Function: mbr_ebr_partition_read
+ *
+ */
+static void
+mbr_ebr_partition_read(struct evms_logical_node *partition,
+		       struct buffer_head *bh)
+{
+	struct dos_private *dos_prv = partition->private;
+
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    partition->total_vsectors) {
+		bh->b_rsector += dos_prv->start_sect;
+		R_IO(dos_prv->source_disk, bh);
+	} else
+		mbr_ebr_partition_io_error(partition, READ, bh);
+}
+
+/*
+ * Function: mbr_ebr_partition_write
+ *
+ */
+static void
+mbr_ebr_partition_write(struct evms_logical_node *partition,
+			struct buffer_head *bh)
+{
+	struct dos_private *dos_prv = partition->private;
+
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    partition->total_vsectors) {
+		bh->b_rsector += dos_prv->start_sect;
+		W_IO(dos_prv->source_disk, bh);
+	} else
+		mbr_ebr_partition_io_error(partition, WRITE, bh);
+}
+
+/*
+ * Function: mbr_ebr_partition_init_io
+ *
+ */
+static int
+mbr_ebr_partition_init_io(struct evms_logical_node *partition, int io_flag,	/* 0=read, 1=write */
+			  u64 sect_nr,	/* disk LBA */
+			  u64 num_sects,	/* # of sectors */
+			  void *buf_addr)
+{				/* buffer address */
+	int rc;
+	struct dos_private *dos_prv = partition->private;
+
+	if ((sect_nr + num_sects) <= partition->total_vsectors) {
+		rc = INIT_IO(dos_prv->source_disk, io_flag,
+			     sect_nr + dos_prv->start_sect, num_sects,
+			     buf_addr);
+	} else {
+		LOG_SERIOUS
+		    ("init_io: attempt to %s beyond partition(%s) boundary("PFU64") at sector("PFU64") for count("PFU64").\n",
+		     (io_flag) ? "WRITE" : "READ", partition->name,
+		     (dos_prv->nr_sects - 1), sect_nr, num_sects);
+		rc = -EINVAL;
+	}
+
+	return (rc);
+}
+
+/*
+ * Function: mbr_ebr_partition_ioctl
+ *
+ */
+static int
+mbr_ebr_partition_ioctl(struct evms_logical_node *partition,
+			struct inode *inode,
+			struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct dos_private *dos_prv;
+	struct hd_geometry hd_geo;
+	int rc;
+
+	rc = 0;
+	dos_prv = partition->private;
+	if (!inode)
+		return -EINVAL;
+	switch (cmd) {
+	case HDIO_GETGEO:
+		{
+			rc = IOCTL(dos_prv->source_disk, inode, file, cmd, arg);
+			if (rc)
+				break;
+			if (copy_from_user
+			    (&hd_geo, (void *) arg,
+			     sizeof (struct hd_geometry)))
+				rc = -EFAULT;
+			if (rc)
+				break;
+			hd_geo.start = dos_prv->start_sect;
+			if (copy_to_user
+			    ((void *) arg, &hd_geo,
+			     sizeof (struct hd_geometry)))
+				rc = -EFAULT;
+		}
+		break;
+	case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt *bmap =
+			    (struct evms_get_bmap_pkt *) arg;
+			bmap->rsector += dos_prv->start_sect;
+			/* intentionally fall thru to
+			 * default ioctl down to device
+			 * manager.
+			 */
+		}
+	default:
+		rc = IOCTL(dos_prv->source_disk, inode, file, cmd, arg);
+	}
+	return rc;
+}
+
+/*
+ * Function: dos_part_init
+ *
+ */
+static int __init
+dos_part_init(void)
+{
+	return evms_cs_register_plugin(&plugin_header);	/* register with EVMS */
+}
+
+static void __exit
+dos_part_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(dos_part_init);
+module_exit(dos_part_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/evms.c evms-2002-09-30/drivers/evms/evms.c
--- linux-2002-09-30/drivers/evms/evms.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/evms.c	Thu Sep 26 11:55:45 2002
@@ -0,0 +1,5865 @@
+/* -*- linux-c -*- */
+/*
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+/*
+ *
+ * linux/drivers/evms/evms.c
+ *
+ * EVMS Base and Common Services
+ *
+ */
+
+#define DEVICE_NR(device) MINOR(device)	/* evms has no partition bits */
+#define DEVICE_NAME "evms"	/* name for messaging */
+#define DEVICE_NO_RANDOM	/* no entropy to contribute */
+#define DEVICE_OFF(d)		/* do nothing */
+
+//#define LOCAL_DEBUG 1
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/blk.h>		/* must be included by all block drivers */
+#include <linux/blkdev.h>
+#include <linux/blkpg.h>
+#include <linux/iobuf.h>
+#include <linux/genhd.h>
+#include <linux/sched.h>
+#include <linux/completion.h>
+#include <linux/version.h>
+#include <linux/swap.h>
+#include <net/checksum.h>
+#include <linux/sysctl.h>
+#include <linux/smp_lock.h>
+#include <linux/reboot.h>
+#include <linux/compiler.h>
+#include <linux/evms/evms.h>
+
+//#define VFS_PATCH_PRESENT
+
+/* prefix used in logging messages */
+#define LOG_PREFIX
+
+struct evms_registered_plugin {
+	struct evms_plugin_header *plugin;
+	struct evms_registered_plugin *next;
+};
+static struct evms_registered_plugin *registered_plugin_head = NULL;
+
+static struct evms_list_node *evms_global_device_list = NULL;
+static struct evms_list_node *evms_global_feature_node_list = NULL;
+static struct evms_list_node *evms_global_notify_list = NULL;
+
+int evms_info_level = EVMS_INFO_LEVEL;
+struct proc_dir_entry *evms_proc_dir = NULL;
+EXPORT_SYMBOL(evms_info_level);
+static struct evms_logical_volume *evms_logical_volumes;
+static int evms_volumes = 0;
+/* a few variables to aid in detecting memory leaks.
+ * these variables are always in use, regardless of
+ * the state of EVMS_MEM_DEBUG.
+ */
+static atomic_t evms_allocs = (atomic_t) ATOMIC_INIT(0);
+static atomic_t evms_logical_nodes = (atomic_t) ATOMIC_INIT(0);
+
+u8 *evms_primary_string = "primary";
+EXPORT_SYMBOL(evms_primary_string);
+u8 *evms_secondary_string = "secondary";
+EXPORT_SYMBOL(evms_secondary_string);
+
+static struct evms_version evms_svc_version = {
+	.major		= EVMS_COMMON_SERVICES_MAJOR,
+	.minor		= EVMS_COMMON_SERVICES_MINOR,
+	.patchlevel	= EVMS_COMMON_SERVICES_PATCHLEVEL
+};
+
+/* Handles for "private" EVMS object pools */
+static struct evms_pool_mgmt *evms_io_notify_pool;
+
+/* Handles for "public" EVMS object pools */
+struct evms_pool_mgmt *evms_bh_pool;
+EXPORT_SYMBOL(evms_bh_pool);
+
+/* Handle for the devfs directory entry */
+devfs_handle_t evms_dir_devfs_handle;
+devfs_handle_t evms_blk_devfs_handle;
+
+/**********************************************************/
+/* SYSCTL - EVMS folder					  */
+/**********************************************************/
+
+#ifdef CONFIG_PROC_FS
+static struct ctl_table_header *evms_table_header;
+static int evms_info_level_min = EVMS_INFO_CRITICAL;
+static int evms_info_level_max = EVMS_INFO_EVERYTHING;
+
+static ctl_table evms_table[] = {
+	{DEV_EVMS_INFO_LEVEL, "evms_info_level",
+	 &evms_info_level, sizeof (int), 0644, NULL,
+	 &proc_dointvec_minmax, &sysctl_intvec,
+	 NULL, &evms_info_level_min, &evms_info_level_max},
+	{0}
+};
+
+static ctl_table evms_dir_table[] = {
+	{DEV_EVMS, "evms", NULL, 0, 0555, evms_table},
+	{0}
+};
+
+static ctl_table dev_dir_table[] = {
+	{CTL_DEV, "dev", NULL, 0, 0555, evms_dir_table},
+	{0}
+};
+#endif
+
+/**********************************************************/
+/* START -- arch ioctl32 support                          */
+/**********************************************************/
+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64)
+#include <linux/evms/evms_bbr_k.h>
+#include <linux/raid/md.h>
+
+extern asmlinkage long
+sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
+
+extern int
+register_ioctl32_conversion(unsigned int cmd, void *handler);
+
+extern int
+unregister_ioctl32_conversion(unsigned int cmd);
+
+#define uvirt_to_kernel(__x) ((unsigned long)(__x))
+typedef unsigned int __uvirt_addr;
+
+struct evms_sector_io32 {
+	u64 disk_handle;
+	s32 io_flag;
+	u64 starting_sector;
+	u64 sector_count;
+	__uvirt_addr  buffer_address;
+	s32 status;
+};
+
+struct evms_rediscover32 {
+	s32 status;
+	u32 drive_count;
+	__uvirt_addr  drive_array;
+};
+
+struct evms_compute_csum32 {
+	__uvirt_addr  buffer_address;
+	s32 buffer_size;
+	u32 insum;
+	u32 outsum;
+	s32 status;
+};
+
+struct evms_plugin_ioctl32 {
+	u32 feature_id;
+	s32 feature_command;
+	s32 status;
+	__uvirt_addr  feature_ioctl_data;
+};
+
+struct evms_notify_bbr32 {
+	char object_name[EVMS_VOLUME_NAME_SIZE+1];
+	u64 count;
+	u64 start_sect;
+	u64 nr_sect;
+	__uvirt_addr buffer;
+	s32 rw;
+};
+
+#define EVMS_MD_ID              4
+#define EVMS_MD_PERS_IOCTL_CMD  1
+#define EVMS_MD_ADD             2
+#define EVMS_MD_REMOVE          3
+#define EVMS_MD_ACTIVATE        4
+#define EVMS_MD_DEACTIVATE      5
+#define EVMS_MD_GET_ARRAY_INFO  6
+#define EVMS_MD_RAID5_INIT_IO   1
+
+struct evms_md_ioctl {
+	int mddev_idx;
+	int cmd;
+	void *arg;
+};
+
+struct evms_md_ioctl32 {
+	u32 mddev_idx;
+	u32 cmd;
+	__uvirt_addr arg;
+};
+
+struct evms_md_array_info {
+	unsigned long state;
+	mdp_super_t *sb;
+};
+
+struct evms_md_array_info32 {
+	u32 state;
+	__uvirt_addr sb;
+};
+
+struct raid5_ioctl_init_io {
+	int rw;
+	u64 lsn;
+	u64 nr_sects;
+	void *data;
+};
+
+struct raid5_ioctl_init_io32 {
+	s32 rw;
+	u64 lsn;
+	u64 nr_sects;
+	__uvirt_addr data;
+};
+
+#define EVMS_MD_PLUGIN_ID ((IBM_OEM_ID << 16) | \
+			   (EVMS_REGION_MANAGER << 12) | EVMS_MD_ID)
+#define EVMS_BBR_PLUGIN_ID ((IBM_OEM_ID << 16) | \
+			    (EVMS_FEATURE << 12) | EVMS_BBR_FEATURE_ID)
+
+#define EVMS_SECTOR_IO_32 _IOWR(EVMS_MAJOR, \
+				EVMS_SECTOR_IO_NUMBER, \
+				struct evms_sector_io32)
+#define EVMS_REDISCOVER_VOLUMES_32 _IOWR(EVMS_MAJOR, \
+					 EVMS_REDISCOVER_VOLUMES_NUMBER, \
+					 struct evms_rediscover32)
+#define EVMS_COMPUTE_CSUM_32 _IOWR(EVMS_MAJOR, \
+				   EVMS_COMPUTE_CSUM_NUMBER, \
+				   struct evms_compute_csum32)
+#define EVMS_PLUGIN_IOCTL_32 _IOR(EVMS_MAJOR, \
+				  EVMS_PLUGIN_IOCTL_NUMBER, \
+				  struct evms_plugin_ioctl32)
+
+static int evms_sector_io(unsigned int fd,
+			  unsigned int cmd,
+			  unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	struct evms_sector_io32 parms32;
+	struct evms_sector_io_pkt parms;
+	unsigned int kcmd;
+	void *karg;
+	int rc = 0;
+
+	if (copy_from_user(&parms32, (struct evms_sector_io32 *)arg,
+			   sizeof(struct evms_sector_io32)))
+		return -EFAULT;
+
+	parms.disk_handle     = parms32.disk_handle;
+	parms.io_flag         = parms32.io_flag;
+	parms.starting_sector = parms32.starting_sector;
+	parms.sector_count    = parms32.sector_count;
+	parms.buffer_address  = (u8 *)uvirt_to_kernel(parms32.buffer_address);
+	parms.status          = 0;
+
+	kcmd = EVMS_SECTOR_IO;
+	karg = &parms;
+
+	set_fs(KERNEL_DS);
+	rc = sys_ioctl(fd, kcmd, (unsigned long)karg);
+	set_fs(old_fs);
+
+	parms32.status = parms.status;
+
+	if (copy_to_user((struct evms_sector_io32 *)arg, &parms32,
+			 sizeof(struct evms_sector_io32)))
+		return -EFAULT;
+
+	return rc;
+}
+
+static int evms_rediscover(unsigned int fd,
+			   unsigned int cmd,
+			   unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	struct evms_rediscover32 parms32;
+	struct evms_rediscover_pkt parms;
+	unsigned int kcmd;
+	void *karg;
+	int rc = 0;
+
+	if (copy_from_user(&parms32, (struct evms_rediscover32 *)arg,
+			   sizeof(struct evms_rediscover32)))
+		return -EFAULT;
+
+	parms.drive_count = parms32.drive_count;
+	parms.drive_array = (void *)uvirt_to_kernel(parms32.drive_array);
+	parms.status      = 0;
+
+	kcmd = EVMS_REDISCOVER_VOLUMES;
+	karg = &parms;
+
+	set_fs(KERNEL_DS);
+	rc = sys_ioctl(fd, kcmd, (unsigned long)karg);
+	set_fs(old_fs);
+
+	parms32.status = parms.status;
+
+	if (copy_to_user((struct evms_rediscover32 *)arg, &parms32,
+			 sizeof(struct evms_rediscover32)))
+		return -EFAULT;
+
+	return rc;
+}
+
+static int evms_compute_csum(unsigned int fd,
+			     unsigned int cmd,
+			     unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	struct evms_compute_csum32 parms32;
+	struct evms_compute_csum_pkt parms;
+	unsigned int kcmd;
+	void *karg;
+	int rc = 0;
+
+	if (copy_from_user(&parms32, (struct evms_compute_csum32 *)arg,
+			   sizeof(struct evms_compute_csum32)))
+		return -EFAULT;
+
+	parms.insum          = parms32.insum;
+	parms.outsum         = parms32.outsum;
+	parms.buffer_size    = parms32.buffer_size;
+	parms.buffer_address = (void *)uvirt_to_kernel(parms32.buffer_address);
+	parms.status         = 0;
+
+	kcmd = EVMS_COMPUTE_CSUM;
+	karg = &parms;
+
+	set_fs(KERNEL_DS);
+	rc = sys_ioctl(fd, kcmd, (unsigned long)karg);
+	set_fs(old_fs);
+
+	parms32.status = parms.status;
+	parms32.outsum = parms.outsum;
+
+	if (copy_to_user((struct evms_compute_csum32 *)arg, &parms32,
+			 sizeof(struct evms_compute_csum32)))
+		return -EFAULT;
+
+	return rc;
+}
+
+static int evms_bbr_plugin_ioctl(unsigned int fd,
+				 unsigned int cmd,
+				 unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	struct evms_notify_bbr32 bbr_parms32;
+	struct evms_notify_bbr bbr_parms;
+	struct evms_plugin_ioctl_pkt *parms =
+		(struct evms_plugin_ioctl_pkt *)arg;
+	void *old_ptr = NULL;
+	int rc;
+
+	if (copy_from_user(&bbr_parms32,
+			   (struct evms_notify_bbr32 *)parms->feature_ioctl_data,
+			   sizeof(struct evms_notify_bbr32)))
+		return -EFAULT;
+
+	memcpy(&bbr_parms, &bbr_parms32, sizeof(struct evms_notify_bbr32));
+	bbr_parms.buffer = (void *)uvirt_to_kernel(bbr_parms32.buffer);
+	bbr_parms.rw = bbr_parms32.rw;
+	old_ptr = parms->feature_ioctl_data;
+	parms->feature_ioctl_data = &bbr_parms;
+
+	set_fs(KERNEL_DS);
+	rc = sys_ioctl(fd, cmd, arg);
+	set_fs(old_fs);
+
+	parms->feature_ioctl_data = old_ptr;
+
+	if (!rc) {
+		bbr_parms32.nr_sect = bbr_parms.nr_sect;
+		rc = copy_to_user((struct evms_notify_bbr32 *)parms->feature_ioctl_data,
+				  &bbr_parms32,
+				  sizeof(struct evms_notify_bbr32));
+	}
+
+	return rc;
+}
+
+static int evms_md_plugin_ioctl(unsigned int fd,
+				unsigned int cmd,
+				unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	void *old_ptr = NULL;
+	void *old_md_ptr = NULL;
+	struct evms_md_ioctl32 md_parms32;
+	struct evms_md_ioctl md_parms;
+	struct evms_md_array_info32 md_array_parms32;
+	struct evms_md_array_info md_array_parms;
+	struct raid5_ioctl_init_io32 r5_init_io_parms32;
+	struct raid5_ioctl_init_io r5_init_io_parms;
+	struct evms_plugin_ioctl_pkt *parms =
+		(struct evms_plugin_ioctl_pkt *)arg;
+	int rc;
+
+	if (copy_from_user(&md_parms32,
+			   (struct evms_md_ioctl*)parms->feature_ioctl_data,
+			   sizeof(struct evms_md_ioctl32)))
+		return -EFAULT;
+
+	md_parms.mddev_idx = md_parms32.mddev_idx;
+	md_parms.cmd = md_parms32.cmd;
+	md_parms.arg = (void *)uvirt_to_kernel(md_parms32.arg);
+	old_ptr = parms->feature_ioctl_data;
+	parms->feature_ioctl_data = &md_parms;
+
+	if (parms->feature_command == EVMS_MD_GET_ARRAY_INFO) {
+		if (copy_from_user(&md_array_parms32,
+				   (struct evms_md_array_info32*)md_parms.arg,
+				   sizeof(struct evms_md_array_info32)))
+			return -EFAULT;
+
+		md_array_parms.state = md_array_parms32.state;
+		md_array_parms.sb =
+			(void *)uvirt_to_kernel(md_array_parms32.sb);
+		old_md_ptr = (void *)md_parms.arg;
+		md_parms.arg = &md_array_parms;
+	} else if (parms->feature_command == EVMS_MD_PERS_IOCTL_CMD) {
+		if (md_parms.cmd == EVMS_MD_RAID5_INIT_IO) {
+			if (copy_from_user(&r5_init_io_parms32,
+					   (struct raid5_ioctl_init_io32*)md_parms.arg,
+					   sizeof(struct raid5_ioctl_init_io32)))
+				return -EFAULT;
+
+			r5_init_io_parms.rw = r5_init_io_parms32.rw;
+			r5_init_io_parms.lsn = r5_init_io_parms32.lsn;
+			r5_init_io_parms.nr_sects = r5_init_io_parms32.nr_sects;
+			r5_init_io_parms.data =
+				(void *)uvirt_to_kernel(r5_init_io_parms32.data);
+			old_md_ptr = (void *)md_parms.arg;
+			md_parms.arg = &r5_init_io_parms;
+		}
+	}
+
+	set_fs(KERNEL_DS);
+	rc = sys_ioctl(fd, cmd, arg);
+	set_fs(old_fs);
+
+	parms->feature_ioctl_data = old_ptr;
+	md_parms.arg = old_md_ptr;
+
+	if (!rc) {
+		if (parms->feature_command == EVMS_MD_GET_ARRAY_INFO) {
+			md_array_parms32.state = md_array_parms.state;
+			rc = copy_to_user((struct evms_md_array_info32 *)md_parms.arg,
+					  &md_array_parms32,
+					  sizeof(struct evms_md_array_info32));
+		}
+		if (!rc) {
+			md_parms32.mddev_idx = md_parms.mddev_idx;
+			rc = copy_to_user((struct evms_md_ioctl*)parms->feature_ioctl_data,
+					  &md_parms32,
+					  sizeof(struct evms_md_ioctl32));
+		}
+	}
+
+	return rc;
+}
+
+static int evms_plugin_ioctl(unsigned int fd,
+			     unsigned int cmd,
+			     unsigned long arg)
+{
+	mm_segment_t old_fs = get_fs();
+	struct evms_plugin_ioctl32 parms32;
+	struct evms_plugin_ioctl_pkt parms;
+	unsigned int kcmd;
+	void *karg;
+	int rc;
+
+	if (copy_from_user(&parms32, (struct evms_plugin_ioctl32 *)arg,
+			   sizeof(struct evms_plugin_ioctl32)))
+		return -EFAULT;
+
+	parms.feature_id = parms32.feature_id;
+	parms.feature_command = parms32.feature_command;
+	parms.status = parms32.status;
+	parms.feature_ioctl_data =
+		(void *)uvirt_to_kernel(parms32.feature_ioctl_data);
+
+	kcmd = EVMS_PLUGIN_IOCTL;
+	karg = &parms;
+
+	switch (parms.feature_id) {
+	case EVMS_MD_PLUGIN_ID:
+		rc = evms_md_plugin_ioctl(fd, kcmd, (unsigned long)karg);
+		break;
+	case EVMS_BBR_PLUGIN_ID:
+		rc = evms_bbr_plugin_ioctl(fd, kcmd, (unsigned long)karg);
+		break;
+	default:
+		set_fs(KERNEL_DS);
+		rc = sys_ioctl(fd, kcmd, (unsigned long)karg);
+		set_fs(old_fs);
+	}
+
+	if (!rc) {
+		parms32.status = parms.status;
+		rc = copy_to_user((struct evms_plugin_ioctl32 *)arg, &parms32,
+				  sizeof(struct evms_plugin_ioctl32));
+	}
+
+	return rc;
+}
+#endif
+
+/**********************************************************/
+/* START -- exported functions/Common Services            */
+/**********************************************************/
+
+/*
+ * Function:     evms_cs_get_version
+ * Description: This function returns the current EVMS version
+ */
+void
+evms_cs_get_version(int *major, int *minor)
+{
+	*major = EVMS_MAJOR_VERSION;
+	*minor = EVMS_MINOR_VERSION;
+}
+
+EXPORT_SYMBOL(evms_cs_get_version);
+
+int
+evms_cs_check_version(struct evms_version *required,
+		      struct evms_version *actual)
+{
+	if (required->major != actual->major)
+		return -EINVAL;
+	else if (required->minor > actual->minor)
+		return -EINVAL;
+	else if (required->minor == actual->minor)
+		if (required->patchlevel > actual->patchlevel)
+			return -EINVAL;
+	return 0;
+}
+
+EXPORT_SYMBOL(evms_cs_check_version);
+
+int
+evms_cs_allocate_logical_node(struct evms_logical_node **pp)
+{
+	*pp = kmalloc(sizeof (struct evms_logical_node), GFP_KERNEL);
+	if (*pp) {
+		memset(*pp, 0, sizeof (struct evms_logical_node));
+		atomic_inc(&evms_logical_nodes);
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(evms_cs_allocate_logical_node);
+
+void
+evms_cs_deallocate_volume_info(struct evms_logical_node *p)
+{
+	if (p->iflags & EVMS_FEATURE_BOTTOM) {
+		evms_cs_remove_item_from_list(&evms_global_feature_node_list,
+					      p);
+		kfree(p->volume_info);
+		p->volume_info = NULL;
+		p->iflags &= ~EVMS_FEATURE_BOTTOM;
+	}
+}
+
+EXPORT_SYMBOL(evms_cs_deallocate_volume_info);
+
+void
+evms_cs_deallocate_logical_node(struct evms_logical_node *p)
+{
+	if (p->next) {
+		LOG_SERIOUS
+		    ("Deallocating object whose NEXT ptr is not null!!\n");
+	}
+	evms_cs_deallocate_volume_info(p);
+	if (p->feature_header) {
+		kfree(p->feature_header);
+		p->feature_header = NULL;
+	}
+	kfree(p);
+	atomic_dec(&evms_logical_nodes);
+}
+
+EXPORT_SYMBOL(evms_cs_deallocate_logical_node);
+
+/*
+ * Function:     evms_cs_register_plugin
+ * Description: This function is exported so that all plugins can register with EVMS
+ */
+int
+evms_cs_register_plugin(struct evms_plugin_header *plugin)
+{
+	int rc = 0;
+	struct evms_registered_plugin *reg_record, **pp;
+	struct evms_version *ver;
+
+	ver = &plugin->required_services_version;
+
+	LOG_EXTRA
+	    ("registering plugin (plugin.id=%d.%d.%d, plugin.ver=%d.%d.%d, req.svc.ver=%d.%d.%d)\n",
+	     GetPluginOEM(plugin->id), GetPluginType(plugin->id),
+	     GetPluginID(plugin->id), plugin->version.major,
+	     plugin->version.minor, plugin->version.patchlevel, ver->major,
+	     ver->minor, ver->patchlevel);
+
+	/* check common services requirements */
+	rc = evms_cs_check_version(ver, &evms_svc_version);
+	if (rc) {
+		LOG_SERIOUS
+		    ("plugin failed to load: common services (vers:%d,%d,%d) incompatibility!\n",
+		     EVMS_COMMON_SERVICES_MAJOR, EVMS_COMMON_SERVICES_MINOR,
+		     EVMS_COMMON_SERVICES_PATCHLEVEL);
+	}
+	if (!rc) {
+		/* ensure a plugin with this feature id is
+		 * not already loaded.
+		 */
+		for (pp = &registered_plugin_head; *pp; pp = &(*pp)->next) {
+			if ((*pp)->plugin->id == plugin->id) {
+				rc = -EBUSY;
+				LOG_ERROR
+				    ("error(%d) attempting to load another plugin with id(%x).\n",
+				     rc, plugin->id);
+			}
+		}
+	}
+	if (!rc) {
+		/* ensure the plugin has provided functions for
+		 * the mandatory entry points.
+		 */
+		if (!plugin->fops->discover) {
+			rc = -EINVAL;
+		} else if (!plugin->fops->init_io) {
+			rc = -EINVAL;
+		} else if (!plugin->fops->ioctl) {
+			rc = -EINVAL;
+		} else if (!plugin->fops->read) {
+			rc = -EINVAL;
+		} else if (!plugin->fops->write) {
+			rc = -EINVAL;
+		} else if (!plugin->fops->delete) {
+			rc = -EINVAL;
+		}
+	}
+	if (!rc) {
+		/* allocate a new plugin registration record */
+		reg_record =
+		    kmalloc(sizeof (struct evms_registered_plugin), GFP_KERNEL);
+		if (!reg_record) {
+			rc = -ENOMEM;
+		}
+	}
+	if (!rc) {
+		memset(reg_record, 0, sizeof (struct evms_registered_plugin));
+		/* store ptr to plugin header in new registration record */
+		reg_record->plugin = plugin;
+
+		/* terminate the record */
+		reg_record->next = NULL;
+
+		/* find end of the plugin registration list */
+		for (pp = &registered_plugin_head; *pp; pp = &(*pp)->next) ;
+		/* add registration record to list */
+		*pp = reg_record;
+
+		/* increment the usage count */
+		MOD_INC_USE_COUNT;
+	}
+
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_register_plugin);
+
+/*
+ * Function:     evms_cs_unregister_plugin
+ * Description: This function is exported so that all plugins can
+ * unregister with EVMS
+ */
+int
+evms_cs_unregister_plugin(struct evms_plugin_header *plugin)
+{
+	int rc = 0, found = FALSE;
+	struct evms_registered_plugin **pp;
+	struct evms_version *ver;
+
+	ver = &plugin->required_services_version;
+
+	LOG_EXTRA
+	    ("unregistering plugin (plugin.id=%d.%d.%d, plugin.ver=%d.%d.%d, req.svc.ver=%d.%d.%d)\n",
+	     GetPluginOEM(plugin->id), GetPluginType(plugin->id),
+	     GetPluginID(plugin->id), plugin->version.major,
+	     plugin->version.minor, plugin->version.patchlevel, ver->major,
+	     ver->minor, ver->patchlevel);
+	/* ensure a plugin with this feature id is
+	 * currently loaded.
+	 */
+	for (pp = &registered_plugin_head; *pp; pp = &(*pp)->next) {
+		if ((*pp)->plugin->id == plugin->id) {
+			found = TRUE;
+			break;
+		}
+	}
+	if (!found) {
+		rc = -ENOPKG;
+		LOG_ERROR
+		    ("error(%d) attempt to unload a non-loaded plugin with id(%x).\n",
+		     rc, plugin->id);
+	}
+	/* actually unload the plugin now */
+	if (!rc) {
+		struct evms_registered_plugin *tmp = *pp;
+
+		/* remove the plugin record from our
+		 * internal plugin list
+		 */
+		*pp = (*pp)->next;
+		/* deallocate the plugin registration record
+		 */
+		kfree(tmp);
+
+		/* decrement the usage count */
+		MOD_DEC_USE_COUNT;
+	}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_unregister_plugin);
+
+/* function: evms_cs_add_logical_node_to_list
+ *
+ * This functions adds a new logical node to the end of a
+ * node list.
+ *
+ * NOTE: This function is only expected to be called at
+ * discovery time, which is singled threaded by nature,
+ * and therefore doesn't need to be made SMP safe.
+ */
+int
+evms_cs_add_logical_node_to_list(struct evms_logical_node **list_head,
+				 struct evms_logical_node *node)
+{
+	int rc = 0;
+	struct evms_logical_node **pp = NULL;
+
+	/* check to make sure node is not already on a list */
+	if (node->next)
+		rc = 1;
+	else
+		/* check to make sure node being added is not already in the list */
+		for (pp = list_head; *pp; pp = &(*pp)->next)
+			if (*pp == node) {
+				rc = 2;
+				break;
+			}
+
+	/* add node to the end of the list */
+	if (!rc)
+		*pp = node;
+
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_add_logical_node_to_list);
+
+/* function: evms_cs_remove_logical_node_from_list
+ *
+ * This functions removes a new logical node from a node list.
+ *
+ * NOTE: This function is only expected to be called at
+ * discovery time, which is singled threaded by nature,
+ * and therefore doesn't need to be made SMP safe.
+ */
+int
+evms_cs_remove_logical_node_from_list(struct evms_logical_node **list_head,
+				      struct evms_logical_node *node)
+{
+	/* remove this node from the head of the list */
+	int rc = 1;		/* assume failure until target node is found */
+	struct evms_logical_node **pp;
+	for (pp = list_head; *pp; pp = &(*pp)->next)
+		if (*pp == node) {
+			*pp = (*pp)->next;
+			node->next = NULL;
+			rc = 0;
+			break;
+		}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_remove_logical_node_from_list);
+
+int
+evms_cs_kernel_ioctl(struct evms_logical_node *node, unsigned int cmd,
+		     unsigned long arg)
+{
+	int rc = 0;
+	struct inode tmp_inode;
+	mm_segment_t fs;
+
+	lock_kernel();
+	fs = get_fs();
+	set_fs(get_ds());
+	rc = IOCTL(node, &tmp_inode, NULL, cmd, arg);
+	set_fs(fs);
+	unlock_kernel();
+
+	return (rc);
+
+}
+
+EXPORT_SYMBOL(evms_cs_kernel_ioctl);
+
+/*
+ * function: evms_cs_size_in_vsectors
+ *
+ * In EVMS a V(irtual)Sector is 512 bytes in size.
+ * This function computes the number of VSECTORs an specified
+ * item size would require.
+ *
+ * NOTE: This function has been coded to work with 64 bit values.
+ */
+unsigned long
+evms_cs_size_in_vsectors(long long item_size)
+{
+	long long sectors;
+
+	sectors = item_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	if (item_size & (EVMS_VSECTOR_SIZE - 1))
+		sectors++;
+
+	return (sectors);
+}
+
+EXPORT_SYMBOL(evms_cs_size_in_vsectors);
+
+/*
+ * function: evms_cs_log2
+ *
+ * this function computes the power of the 2 of specified
+ * value. If the value is 0, a -1 is returned. If the value
+ * is NOT a power of 2, a -2 is return. Otherwise the power
+ * of 2 is returned.
+ */
+int
+evms_cs_log2(long long value)
+{
+	int result = -1;
+	long long tmp;
+
+	if (value) {
+		tmp = value;
+		result++;
+		while (!(tmp & 1)) {
+			result++;
+			tmp >>= 1;
+		}
+		if (tmp != 1) {
+			result = -2;
+		}
+	}
+	return (result);
+}
+
+EXPORT_SYMBOL(evms_cs_log2);
+
+/*
+ * Functions:
+ *
+ *              build_crc_table()
+ *              calculate_crc()
+ *
+ *
+ * Description: The functions in this module provide a means of calculating
+ *              the 32 bit CRC for a block of data.  build_crc_table must
+ *              be called to initialize this module.  calculate_crc must
+ *              NOT be used until after build_crc_table has been called.
+ *              Once build_crc_table has been called, calculate_crc can
+ *              be used to calculate the crc of the data residing in a
+ *              user specified buffer.
+ *
+ */
+
+#define CRC_POLYNOMIAL     0xEDB88320L
+
+static u32 crc_table[256];
+static u32 crc_table_built = FALSE;
+
+/*********************************************************************/
+/*                                                                   */
+/*   Function Name: build_crc_table                                  */
+/*                                                                   */
+/*   Descriptive Name: This module implements the crc function using */
+/*                     a table driven method.  The required table    */
+/*                     must be setup before the calculate_crc        */
+/*                     function can be used.  This table only needs  */
+/*                     to be set up once.  This function sets up the */
+/*                     crc table needed by calculate_crc.            */
+/*                                                                   */
+/*   Input: None                                                     */
+/*                                                                   */
+/*   Output: None                                                    */
+/*                                                                   */
+/*   Error Handling: N/A                                             */
+/*                                                                   */
+/*   Side Effects:  The internal crc table is initialized.           */
+/*                                                                   */
+/*   Notes:  None.                                                   */
+/*                                                                   */
+/*********************************************************************/
+static void
+build_crc_table(void)
+{
+	u32 i, j, crc;
+
+	for (i = 0; i <= 255; i++) {
+		crc = i;
+		for (j = 8; j > 0; j--) {
+			if (crc & 1)
+				crc = (crc >> 1) ^ CRC_POLYNOMIAL;
+			else
+				crc >>= 1;
+		}
+		crc_table[i] = crc;
+	}
+	crc_table_built = TRUE;
+}
+
+/*********************************************************************/
+/*                                                                   */
+/*   Function Name: calculate_crc                                    */
+/*                                                                   */
+/*   Descriptive Name: This function calculates the crc value for    */
+/*                     the data in the buffer specified by Buffer.   */
+/*                                                                   */
+/*   Input: u32    crc : This is the starting crc.  If you are */
+/*                             starting a new crc calculation, then  */
+/*                             this should be set to 0xFFFFFFFF.  If */
+/*                             you are continuing a crc calculation  */
+/*                             (i.e. all of the data did not fit in  */
+/*                             the buffer so you could not calculate */
+/*                             the crc in a single operation), then  */
+/*                             this is the crc output by the last    */
+/*                             calculate_crc call.                   */
+/*                                                                   */
+/*   Output: The crc for the data in the buffer, based upon the value*/
+/*           of the input parameter crc.                             */
+/*                                                                   */
+/*   Error Handling: None.                                           */
+/*                                                                   */
+/*   Side Effects:  None.                                            */
+/*                                                                   */
+/*   Notes:  None.                                                   */
+/*                                                                   */
+/*********************************************************************/
+u32
+evms_cs_calculate_crc(u32 crc, void *buffer, u32 buffersize)
+{
+	unsigned char *current_byte;
+	u32 temp1, temp2, i;
+
+	current_byte = (unsigned char *) buffer;
+	/* Make sure the crc table is available */
+	if (crc_table_built == FALSE)
+		build_crc_table();
+	/* Process each byte in the buffer. */
+	for (i = 0; i < buffersize; i++) {
+		temp1 = (crc >> 8) & 0x00FFFFFF;
+		temp2 =
+		    crc_table[(crc ^ (u32) *
+			       current_byte) & (u32) 0xff];
+		current_byte++;
+		crc = temp1 ^ temp2;
+	}
+	return (crc);
+}
+
+EXPORT_SYMBOL(evms_cs_calculate_crc);
+
+#define EVMS_ORIGINAL_CALLBACK_FLAG	1<<0
+typedef struct io_notify_s {
+	unsigned int flags;
+	void *private;
+	struct buffer_head *bh;
+	u64 rsector;
+	kdev_t rdev;
+	void *b_private;
+	void (*callback_function) (struct evms_logical_node * node,
+				   struct buffer_head * bh,
+				   int uptodate, int *redrive);
+	struct io_notify_s *next;
+} io_notify_t;
+
+struct evms_pool_mgmt *
+evms_cs_create_pool(int objsize,
+		    u8 * pool_name,
+		    void (*ctor) (void *, kmem_cache_t *, unsigned long),
+		    void (*dtor) (void *, kmem_cache_t *, unsigned long))
+{
+	struct evms_pool_mgmt *pool;
+
+	/* create the pool management structure */
+	pool = kmalloc(sizeof (struct evms_pool_mgmt), GFP_KERNEL);
+	if (!pool) {
+		LOG_CRITICAL("Cannot create %s fpool mgmt structure",
+			     pool_name);
+		return NULL;
+	}
+	/* initialize various field in pool mgmt structure */
+	memset(pool, 0, sizeof (struct evms_pool_mgmt));
+	pool->member_size = objsize;
+	pool->name = pool_name;
+	pool->waiters = (atomic_t) ATOMIC_INIT(0);
+	init_waitqueue_head(&pool->wait_queue);
+	/* go create the pool */
+	pool->cachep = kmem_cache_create(pool->name,
+					 pool->member_size,
+					 0, SLAB_HWCACHE_ALIGN, ctor, dtor);
+	if (!pool->cachep)
+		panic("Cannot create %s SLAB cache", pool->name);
+	return (pool);
+}
+
+EXPORT_SYMBOL(evms_cs_create_pool);
+
+void *
+evms_cs_allocate_from_pool(struct evms_pool_mgmt *pool, int blockable)
+{
+	void *objp;
+
+	while (1) {
+		objp = kmem_cache_alloc(pool->cachep, SLAB_NOIO);
+		if (objp || !blockable) {
+			return (objp);
+		} else {
+			/* block and wait for an object to
+			 * be returned to the pool
+			 */
+			atomic_inc(&pool->waiters);
+			wait_event(pool->wait_queue,
+				   (!atomic_read(&pool->waiters)));
+		}
+	}
+	return (objp);
+}
+
+EXPORT_SYMBOL(evms_cs_allocate_from_pool);
+
+void
+evms_cs_deallocate_to_pool(struct evms_pool_mgmt *pool, void *objp)
+{
+	kmem_cache_free(pool->cachep, objp);
+	atomic_set(&pool->waiters, 0);
+	if (waitqueue_active(&pool->wait_queue)) {
+		wake_up(&pool->wait_queue);
+	}
+}
+
+EXPORT_SYMBOL(evms_cs_deallocate_to_pool);
+
+void
+evms_cs_destroy_pool(struct evms_pool_mgmt *pool)
+{
+	kmem_cache_destroy(pool->cachep);
+	kfree(pool);
+}
+
+EXPORT_SYMBOL(evms_cs_destroy_pool);
+
+/*
+ * function: evms_end_io
+ *
+ * This is a support function for
+ * evms_cs_register_for_end_io_notification.
+ * This function is called during I/O completion on any buffer
+ * head that was registered by a plugin. Control is passed here
+ * and this routine will, thru the use of the I/O notify entry
+ * stored in the b_private field of the buffer head, restore
+ * the b_rsector value the buffer head had at the time of
+ * registration and pass control to the registered callback
+ * address, with pointers to the buffer head and an optional
+ * plugin private data. Upon completion of the callback,
+ * control is returned back here. The io notify list entry
+ * is deleted. This process repeats until this routine
+ * detects that all registered plugins have been called back
+ * and the buffer head's original end_io function has been
+ * called. At this point the DONE flag is set, and we terminate
+ * callback loop and exit.
+ *
+ * Plugins may desire to break or interrupt the callback
+ * sequence or chain. This may be useful to redrive I/O or
+ * to wait for other buffer heads to complete before
+ * allowing the original buffer head callback to occur.
+ * To interrupt the callback "chain", a registered
+ * plugin's callback must return with the DONE flag set.
+ *
+ * NOTE: If a plugin set the DONE flag, and wishes to redrive
+ * a buffer head, the plugin MUST reregister the buffer head
+ * to receive another callback on this buffer head. Also, the
+ * plugin MUST ensure that the original buffer head end_io
+ * function get called at some point, either by reregistering
+ * this buffer head and receiving another callback, or by
+ * means of buffer head aggregation triggered by the callbacks
+ * of other buffer heads.
+ *
+ */
+static void
+evms_end_io(struct buffer_head *bh, int uptodate)
+{
+	io_notify_t *entry;
+	int done;
+
+	done = FALSE;
+	while (!done) {
+		/* retrieve the io_notify_entry ptr from
+		 * the b_private field in the buffer head.
+		 */
+		entry = (io_notify_t *) bh->b_private;
+
+		/* restore the b_private value to
+		 * the previous b_private value (which
+		 * should be a previous io_notify_entry
+		 * or the original b_private pointer).
+		 */
+		bh->b_private = entry->b_private;
+
+		/* check for original callback for this bh */
+		if (entry->flags & EVMS_ORIGINAL_CALLBACK_FLAG) {
+			/* this is the original for bh */
+
+			/* turn off flag marking this as the original */
+			entry->flags &= ~EVMS_ORIGINAL_CALLBACK_FLAG;
+
+			/* decrement volume's requests_in_progress var */
+			atomic_dec(&evms_logical_volumes[MINOR(bh->b_rdev)].
+				   requests_in_progress);
+
+			/* restore b_end_io to original value */
+			bh->b_end_io = (void *) entry->callback_function;
+			if (bh->b_end_io) {
+				/* invoke original callback function
+				 * if it exists.
+				 */
+				bh->b_end_io(bh, uptodate);
+			}
+			done = TRUE;
+		} else {
+			/* this is a plugin callback */
+
+			/* restore the rsector value to the
+			 * value at the time of callback
+			 * registration.
+			 */
+			bh->b_rsector = entry->rsector;
+			bh->b_rdev = entry->rdev;
+			/* invoke plugin callback function */
+			entry->callback_function(entry->private, bh, uptodate,
+						 &done);
+		}
+		/* free the io notify entry */
+		evms_cs_deallocate_to_pool(evms_io_notify_pool, entry);
+	}
+}
+
+/*
+ * function: evms_cs_register_for_end_io_notification
+ *
+ * This function is an evms common service.
+ * This routine allows a (plugin) function to register to
+ * participate in the io completion notification process.
+ * This is useful for plugins which alter data after it
+ * has been read from the disk (i.e. encryption or
+ * compression).
+ *
+ * This routine also records the rsector value at the time
+ * of registration, so that it can be restored to that value
+ * prior to the callback to a plugin, thus allowing that
+ * plugin to work with the value it had seen during the
+ * initiating I/O request.
+ *
+ * This routine also records a private data pointer at the
+ * time of registration, and is returned to the plugin
+ * at callback time. This private data pointer was designed
+ * to contain context/callback/buffer_head specific data, and
+ * frees the plugin from having to store and find associated
+ * data at the time of the callback. This field is not used
+ * by this function and is optional (NULL if unused). It is
+ * recorded and returned as a convenience for the plugins.
+ *
+ * DANGER!!! - WILL ROBINSON - DANGER!!!
+ * This routine uses the b_private field in the
+ * buffer_head structure. If any lower level driver uses this
+ * field and do NOT restore it, the I/O callback will fail!!
+ *
+ * Any plugins writers requiring a field for private storage
+ * should instead use the private field parameter in this
+ * function to store their private data.
+ *
+ */
+
+int
+evms_cs_register_for_end_io_notification(void *private,
+					 struct buffer_head *bh,
+					 void *callback_function)
+{
+	int rc = 0, done;
+	io_notify_t *new_entry;
+
+	done = FALSE;
+	while (!done) {
+		/* allocate a notify entry */
+		new_entry =
+		    evms_cs_allocate_from_pool(evms_io_notify_pool,
+					       EVMS_BLOCKABLE);
+		if (!new_entry) {
+			schedule();
+			continue;
+		}
+
+		/* initialize notify entry */
+		new_entry->private = private;
+		new_entry->bh = bh;
+		new_entry->rsector = bh->b_rsector;
+		new_entry->rdev = bh->b_rdev;
+		new_entry->b_private = bh->b_private;
+		new_entry->flags = 0;
+
+		/* is this the first callback for this bh? */
+		if (bh->b_end_io != evms_end_io) {
+			/* yes, first callback */
+			new_entry->flags |= EVMS_ORIGINAL_CALLBACK_FLAG;
+			new_entry->callback_function = (void *) bh->b_end_io;
+
+			/* increment volume's requests_in_progress var */
+			atomic_inc(&evms_logical_volumes[MINOR(bh->b_rdev)].
+				   requests_in_progress);
+
+			/* set b_end_io so we get control */
+			bh->b_end_io = evms_end_io;
+		} else {
+			/* no, not first callback */
+			new_entry->callback_function = callback_function;
+			done = TRUE;
+		}
+		/* set b_private to aid in quick lookup */
+		bh->b_private = new_entry;
+	}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_register_for_end_io_notification);
+
+/* function description: evms_cs_lookup_item_in_list
+ *  	
+ * this function searches for the specified item in the
+ * specified node list. it returns the address of the
+ * evms_list_node containing the specified item.
+ */
+struct evms_list_node **
+evms_cs_lookup_item_in_list(struct evms_list_node **node_list, void *item)
+{
+	struct evms_list_node **list_node;
+
+	list_node = node_list;
+	while (*list_node) {
+		if ((*list_node)->item == item)
+			break;
+		list_node = &(*list_node)->next;
+	}
+	return (list_node);
+}
+
+EXPORT_SYMBOL(evms_cs_lookup_item_in_list);
+
+/* function description: evms_add_item_to_list
+ *
+ * this function adds an item to the list. the
+ * node for the new item is added to the end
+ * of the list. the list is traversed to find the end.
+ * while the traversal occurs, the list is checked
+ * for the presence of the specified item. if already
+ * present in the list, and error code is returned.
+ */
+/* function description: evms_cs_add_item_to_list
+ *
+ * this function adds an item to an item list.
+ *
+ * RC == 0 is returned for:
+ *	a successful add of a new item
+ *
+ * RC == 1 is returned when:
+ *	the item is already on the list
+ *
+ * RC < 0 is returned for an error attempting to add the item.
+ */
+int
+evms_cs_add_item_to_list(struct evms_list_node **list, void *item)
+{
+	int rc = 0;
+	struct evms_list_node **list_node, *new_node;
+
+	list_node = evms_cs_lookup_item_in_list(list, item);
+	if (*list_node == NULL) {
+		new_node = kmalloc(sizeof (struct evms_list_node), GFP_NOIO);
+		if (new_node) {
+			memset(new_node, 0, sizeof (struct evms_list_node));
+			new_node->item = item;
+			*list_node = new_node;
+		} else {
+			rc = -ENOMEM;
+		}
+	} else {
+		rc = 1;
+		LOG_DEBUG
+		    ("warning: attempt to add duplicate item(%p) to list(%p).\n",
+		     item, list);
+	}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_add_item_to_list);
+
+/* function description: evms_remove_item_from_list
+ *
+ * this function removes a specified item from the
+ * specified list. if the specified item is not
+ * found in the list, and error is returned.
+ */
+int
+evms_cs_remove_item_from_list(struct evms_list_node **list, void *item)
+{
+	int rc = 0;
+	struct evms_list_node **list_node;
+
+	/* check to see if item is in the list */
+	list_node = evms_cs_lookup_item_in_list(list, item);
+
+	/* was the node found in the list? */
+	if (*list_node) {
+		/* yes, it was found */
+		struct evms_list_node *tmp_node;
+
+		/* save ptr to node being removed */
+		tmp_node = *list_node;
+		/* remove it from the global list */
+		*list_node = tmp_node->next;
+		/* delete removed node */
+		kfree(tmp_node);
+	} else {
+		/* no, it was not found */
+		rc = -1;
+		LOG_ERROR
+		    ("error(%d): attempt to remove nonexistant node(%p) from list(%p).\n",
+		     rc, item, list);
+	}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_remove_item_from_list);
+
+/* function description: evms_cs_register_device
+ *
+ * this function adds a device to the EVMS global device list.
+ *
+ * RC == 0 is returned for:
+ *	a successful add of a new device
+ *
+ * RC == 1 is returned when:
+ *	the device is already on the list
+ *
+ * RC < 0 is returned for an error attempting to add the device.
+ */
+int
+evms_cs_register_device(struct evms_logical_node *device)
+{
+	return (evms_cs_add_item_to_list(&evms_global_device_list, device));
+}
+
+EXPORT_SYMBOL(evms_cs_register_device);
+
+/* function description: evms_cs_unregister_device
+ *
+ * this function removes a device from the EVMS global device list.
+ *
+ * RC == 0 is returned for:
+ *	a successful removal of the specified device
+ *
+ * RC < 0 is returned for an error attempting to add the device.
+ * 	-ENODATA is returned if specified device is not found.
+ */
+int
+evms_cs_unregister_device(struct evms_logical_node *device)
+{
+	return (evms_cs_remove_item_from_list(&evms_global_device_list,
+					      device));
+}
+
+EXPORT_SYMBOL(evms_cs_unregister_device);
+
+static struct evms_list_node *find_first_next_list_node = NULL;
+int
+evms_cs_find_next_device(struct evms_logical_node *in_device,
+			 struct evms_logical_node **out_device)
+{
+	int rc = 0;
+	struct evms_list_node **list_node;
+
+	if (in_device == NULL)
+		find_first_next_list_node = evms_global_device_list;
+	else {
+		list_node =
+		    evms_cs_lookup_item_in_list(&evms_global_device_list,
+						in_device);
+		find_first_next_list_node = *list_node;
+		if (find_first_next_list_node == NULL)
+			rc = -ENODATA;
+		else
+			find_first_next_list_node =
+			    find_first_next_list_node->next;
+	}
+
+	if (find_first_next_list_node == NULL)
+		*out_device = NULL;
+	else
+		*out_device = (struct evms_logical_node *)
+		    find_first_next_list_node->item;
+
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_find_next_device);
+
+void
+evms_cs_signal_event(int eventid)
+{
+	int rc;
+	struct evms_list_node **list_node;
+
+	/* signal PID(s) of specified event */
+	list_node = &evms_global_notify_list;
+	while (*list_node) {
+		struct evms_event *event;
+
+		event = (*list_node)->item;
+		if (event->eventid == eventid) {
+			struct task_struct *tsk;
+
+			tsk = find_task_by_pid(event->pid);
+			if (tsk) {
+				struct siginfo siginfo;
+
+				siginfo.si_signo = event->signo;
+				siginfo.si_errno = 0;
+				siginfo.si_code = 0;
+				rc = send_sig_info(event->signo, &siginfo, tsk);
+			} else {
+				/* TODO:
+				 * unregister this stale
+				 * notification record
+				 */
+			}
+		}
+		list_node = &(*list_node)->next;
+	}
+}
+
+EXPORT_SYMBOL(evms_cs_signal_event);
+
+static inline void
+evms_flush_signals(void)
+{
+	spin_lock(&current->sigmask_lock);
+	flush_signals(current);
+	spin_unlock(&current->sigmask_lock);
+}
+
+static inline void
+evms_init_signals(void)
+{
+	current->exit_signal = SIGCHLD;
+	siginitsetinv(&current->blocked, sigmask(SIGKILL));
+}
+
+static int
+evms_thread(void *arg)
+{
+	struct evms_thread *thread = arg;
+	lock_kernel();
+
+	/*
+	 * Detach thread
+	 */
+
+	daemonize();
+
+	sprintf(current->comm, thread->name);
+	evms_init_signals();
+	evms_flush_signals();
+	thread->tsk = current;
+
+	current->policy = SCHED_OTHER;
+#ifdef O1_SCHEDULER
+	set_user_nice(current, -20);
+#else
+	current->nice = -20;
+#endif
+	unlock_kernel();
+
+	complete(thread->event);
+	while (thread->run) {
+		void (*run) (void *data);
+		DECLARE_WAITQUEUE(wait, current);
+
+		add_wait_queue(&thread->wqueue, &wait);
+#ifdef O1_SCHEDULER
+		set_current_state(TASK_INTERRUPTIBLE);
+#else
+		set_task_state(current, TASK_INTERRUPTIBLE);
+#endif
+		if (!test_bit(EVMS_THREAD_WAKEUP, &thread->flags)) {
+			schedule();
+		}
+#ifdef O1_SCHEDULER
+		set_current_state(TASK_RUNNING);
+#else
+		current->state = TASK_RUNNING;
+#endif
+		remove_wait_queue(&thread->wqueue, &wait);
+		clear_bit(EVMS_THREAD_WAKEUP, &thread->flags);
+
+		run = thread->run;
+		if (run) {
+			run(thread->data);
+			run_task_queue(&tq_disk);
+		}
+		if (signal_pending(current)) {
+			evms_flush_signals();
+		}
+	}
+	complete(thread->event);
+	return 0;
+}
+
+struct evms_thread *
+evms_cs_register_thread(void (*run) (void *), void *data, const u8 * name)
+{
+	struct evms_thread *thread;
+	int ret;
+	struct completion event;
+
+	thread = kmalloc(sizeof (struct evms_thread), GFP_KERNEL);
+	if (!thread) {
+		return NULL;
+	}
+	memset(thread, 0, sizeof (struct evms_thread));
+	init_waitqueue_head(&thread->wqueue);
+
+	init_completion(&event);
+	thread->event = &event;
+	thread->run = run;
+	thread->data = data;
+	thread->name = name;
+	ret = kernel_thread(evms_thread, thread, 0);
+	if (ret < 0) {
+		kfree(thread);
+		return NULL;
+	}
+	wait_for_completion(&event);
+	return thread;
+}
+
+EXPORT_SYMBOL(evms_cs_register_thread);
+
+void
+evms_cs_unregister_thread(struct evms_thread *thread)
+{
+	struct completion event;
+
+	init_completion(&event);
+
+	thread->event = &event;
+	thread->run = NULL;
+	thread->name = NULL;
+	evms_cs_interrupt_thread(thread);
+	wait_for_completion(&event);
+	kfree(thread);
+}
+
+EXPORT_SYMBOL(evms_cs_unregister_thread);
+
+void
+evms_cs_wakeup_thread(struct evms_thread *thread)
+{
+	set_bit(EVMS_THREAD_WAKEUP, &thread->flags);
+	wake_up(&thread->wqueue);
+}
+
+EXPORT_SYMBOL(evms_cs_wakeup_thread);
+
+void
+evms_cs_interrupt_thread(struct evms_thread *thread)
+{
+	if (!thread->tsk) {
+		LOG_ERROR("error: attempted to interrupt an invalid thread!\n");
+		return;
+	}
+	send_sig(SIGKILL, thread->tsk, 1);
+}
+
+EXPORT_SYMBOL(evms_cs_interrupt_thread);
+
+struct proc_dir_entry *
+evms_cs_get_evms_proc_dir(void)
+{
+#ifdef CONFIG_PROC_FS
+	if (!evms_proc_dir) {
+		evms_proc_dir = create_proc_entry("evms", S_IFDIR, &proc_root);
+	}
+#endif
+	return (evms_proc_dir);
+}
+
+EXPORT_SYMBOL(evms_cs_get_evms_proc_dir);
+
+int
+evms_cs_volume_request_in_progress(kdev_t dev,
+				   int operation, int *current_count)
+{
+	int rc = 0;
+	struct evms_logical_volume *volume;
+
+	volume = &evms_logical_volumes[MINOR(dev)];
+	if (volume->node) {
+		if (operation > 0) {
+			atomic_inc(&volume->requests_in_progress);
+		} else if (operation < 0) {
+			atomic_dec(&volume->requests_in_progress);
+		}
+		if (current_count) {
+			*current_count =
+			    atomic_read(&volume->requests_in_progress);
+		}
+	} else {
+		rc = -ENODEV;
+	}
+	return (rc);
+}
+
+EXPORT_SYMBOL(evms_cs_volume_request_in_progress);
+
+void
+evms_cs_invalidate_volume(struct evms_logical_node *node)
+{
+	int i;
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		if (evms_logical_volumes[i].node && node->name) {
+			if (!
+			    (strcmp
+			     (evms_logical_volumes[i].node->name,
+			      node->name))) {
+				LOG_DETAILS
+				    ("Invalidating EVMS device %s minor %d\n",
+				     node->name, i);
+				invalidate_device(MKDEV(EVMS_MAJOR, i), 0);
+				break;
+			}
+		}
+	}
+}
+
+EXPORT_SYMBOL(evms_cs_invalidate_volume);
+
+static int
+is_open(int minor)
+{
+	return atomic_read(&evms_logical_volumes[minor].opens);
+}
+
+/**********************************************************/
+/* END -- exported functions/Common Services              */
+/**********************************************************/
+
+/**********************************************************/
+/* START -- Proc FS Support functions                     */
+/**********************************************************/
+
+#ifdef CONFIG_PROC_FS
+static int
+evms_info_read_proc(char *page,
+		    char **start, off_t off, int count, int *eof, void *data)
+{
+	int sz = 0;
+	char *info_level_text = NULL;
+
+	PROCPRINT("Enterprise Volume Management System: Info\n");
+	switch (evms_info_level) {
+	case EVMS_INFO_CRITICAL:
+		info_level_text = "critical";
+		break;
+	case EVMS_INFO_SERIOUS:
+		info_level_text = "serious";
+		break;
+	case EVMS_INFO_ERROR:
+		info_level_text = "error";
+		break;
+	case EVMS_INFO_WARNING:
+		info_level_text = "warning";
+		break;
+	case EVMS_INFO_DEFAULT:
+		info_level_text = "default";
+		break;
+	case EVMS_INFO_DETAILS:
+		info_level_text = "details";
+		break;
+	case EVMS_INFO_DEBUG:
+		info_level_text = "debug";
+		break;
+	case EVMS_INFO_EXTRA:
+		info_level_text = "extra";
+		break;
+	case EVMS_INFO_ENTRY_EXIT:
+		info_level_text = "entry exit";
+		break;
+	case EVMS_INFO_EVERYTHING:
+		info_level_text = "everything";
+		break;
+	default:
+		info_level_text = "unknown";
+		break;
+	}
+	PROCPRINT("EVMS info level: %d (%s).\n",
+		  evms_info_level, info_level_text);
+
+	PROCPRINT("EVMS kernel version: %d.%d.%d\n",
+		  EVMS_MAJOR_VERSION,
+		  EVMS_MINOR_VERSION, EVMS_PATCHLEVEL_VERSION);
+
+	PROCPRINT("EVMS IOCTL interface version: %d.%d.%d\n",
+		  EVMS_IOCTL_INTERFACE_MAJOR,
+		  EVMS_IOCTL_INTERFACE_MINOR, EVMS_IOCTL_INTERFACE_PATCHLEVEL);
+
+	PROCPRINT("EVMS Common Services version: %d.%d.%d\n",
+		  EVMS_COMMON_SERVICES_MAJOR,
+		  EVMS_COMMON_SERVICES_MINOR, EVMS_COMMON_SERVICES_PATCHLEVEL);
+
+	*eof = 1;
+
+out:
+	*start = page + off;
+	sz -= off;
+	if (sz < 0)
+		sz = 0;
+	return sz > count ? count : sz;
+}
+
+static int
+evms_plugins_read_proc(char *page,
+		       char **start, off_t off, int count, int *eof, void *data)
+{
+	int sz = 0;
+	struct evms_registered_plugin *rp = NULL;
+
+	PROCPRINT("Enterprise Volume Management System: Plugins\n");
+	/*             0    1    1    2    2    3    3    4    4    5    5    6    6    7 */
+	/*         1   5    0    5    0    5    0    5    0    5    0    5    0    5    0 */
+	PROCPRINT(" ---------Plugin----------      required services\n");
+	PROCPRINT(" ----id----        version      version\n\n");
+	for (rp = registered_plugin_head; rp; rp = rp->next) {
+		PROCPRINT(" %x.%x.%x\t   %d.%d.%d\t%d.%d.%d\n",
+			  GetPluginOEM(rp->plugin->id),
+			  GetPluginType(rp->plugin->id),
+			  GetPluginID(rp->plugin->id),
+			  rp->plugin->version.major,
+			  rp->plugin->version.minor,
+			  rp->plugin->version.patchlevel,
+			  rp->plugin->required_services_version.major,
+			  rp->plugin->required_services_version.minor,
+			  rp->plugin->required_services_version.patchlevel);
+	}
+
+out:
+	*start = page + off;
+	sz -= off;
+	if (sz < 0)
+		sz = 0;
+	return sz > count ? count : sz;
+}
+
+static int
+evms_volumes_read_proc(char *page,
+		       char **start, off_t off, int count, int *eof, void *data)
+{
+	int sz = 0, j;
+
+	PROCPRINT("Enterprise Volume Management System: Volumes\n");
+	PROCPRINT("major   minor          #blocks type   flags name\n\n");
+	for (j = 1; j < MAX_EVMS_VOLUMES; j++) {
+		struct evms_logical_volume *volume;
+
+		volume = &evms_logical_volumes[j];
+		if (volume->node) {
+			PROCPRINT("%5d %7d %16Ld %s %s %s %s%s\n",
+				  EVMS_MAJOR, j,
+				  (long long)volume->node->total_vsectors >> 1,
+				  (volume->
+				   flags & EVMS_VOLUME_FLAG) ? "evms  " :
+				  "compat",
+				  (volume->
+				   flags & EVMS_VOLUME_READ_ONLY) ? "ro" : "rw",
+				  (volume->
+				   flags & EVMS_VOLUME_PARTIAL) ? "p " : "  ",
+				  EVMS_DEV_NODE_PATH, volume->name);
+		}
+	}
+out:
+	*start = page + off;
+	sz -= off;
+	if (sz < 0)
+		sz = 0;
+	return sz > count ? count : sz;
+
+}
+#endif
+
+/**********************************************************/
+/* END -- Proc FS Support functions                       */
+/**********************************************************/
+
+/**********************************************************/
+/* START -- FOPS functions definitions                    */
+/**********************************************************/
+
+/************************************************/
+/* START -- IOCTL commands -- EVMS specific     */
+/************************************************/
+
+static int
+evms_ioctl_cmd_get_ioctl_version(void *arg)
+{
+	int rc = 0;
+	struct evms_version ver;
+
+	ver.major = EVMS_IOCTL_INTERFACE_MAJOR;
+	ver.minor = EVMS_IOCTL_INTERFACE_MINOR;
+	ver.patchlevel = EVMS_IOCTL_INTERFACE_PATCHLEVEL;
+
+	/* copy info to userspace */
+	if (copy_to_user(arg, &ver, sizeof (ver)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_get_version(void *arg)
+{
+	int rc = 0;
+	struct evms_version ver;
+
+	ver.major = EVMS_MAJOR_VERSION;
+	ver.minor = EVMS_MINOR_VERSION;
+	ver.patchlevel = EVMS_PATCHLEVEL_VERSION;
+
+	/* copy info to userspace */
+	if (copy_to_user(arg, &ver, sizeof (ver)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_get_info_level(void *arg)
+{
+	int rc = 0;
+
+	/* copy info to userspace */
+	if (copy_to_user(arg, &evms_info_level, sizeof (evms_info_level)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_set_info_level(void *arg)
+{
+	int temp, rc = 0;
+
+	/* copy info from userspace */
+	if (copy_from_user(&temp, arg, sizeof (temp)))
+		rc = -EFAULT;
+	else
+		evms_info_level = temp;
+
+	return (rc);
+}
+
+/* function: evms_quiesce_volume
+ *
+ * this function performs the actual quiesce operation on
+ * a volume in kernel memory.
+ *
+ * when quiescing, all new I/Os to a volume are stopped,
+ * causing the calling thread to block. this thread then
+ * waits until all I/Os in progress are completed, before
+ * return control to the caller.
+ *
+ * when unquiescing, all new I/Os are allowed to proceed
+ * unencumbered, and all threads waiting (blocked) on this
+ * volume, are woken up and allowed to proceed.
+ *
+ */
+static int
+evms_quiesce_volume(struct evms_logical_volume *volume,
+		    struct inode *inode,
+		    struct file *file, struct evms_quiesce_vol_pkt *qv)
+{
+	int rc;
+
+	LOG_DEBUG("%squiescing %s.\n",
+		  ((qv->command) ? "" : "un"), volume->name);
+
+#ifdef VFS_PATCH_PRESENT
+	if (qv->do_vfs) {
+		/* VFS function call to sync and lock the filesystem */
+		fsync_dev_lockfs(MKDEV(EVMS_MAJOR, qv->minor));
+		volume->vfs_quiesced = TRUE;
+	}
+#endif
+	volume->quiesced = qv->command;
+
+	/* Command specified was "quiesce". */
+	if (qv->command) {
+		/* After setting the volume to
+		 * a quiesced state, there could
+		 * be threads (on SMP systems)
+		 * that are executing in the
+		 * function, evms_handle_request,
+		 * between the "wait_event" and the
+		 * "atomic_inc" lines. We need to
+		 * provide a "delay" sufficient
+		 * to allow those threads to
+		 * to reach the atomic_inc's
+		 * before executing the while loop
+		 * below. The "schedule" call should
+		 * provide this.
+		 */
+		schedule();
+		/* wait for outstanding requests
+		 * to complete
+		 */
+		while (atomic_read(&volume->requests_in_progress) > 0)
+			schedule();
+	}
+	/* send this command down the stack so lower */
+	/* layers can know about this                */
+	rc = IOCTL(volume->node, inode, file,
+		   EVMS_QUIESCE_VOLUME, (unsigned long) qv);
+	if (!rc) {
+		/* Command specified was "unquiesce". */
+		if (!qv->command) {
+			/* "wakeup" any I/O requests waiting on
+			 * this volume.
+			 */
+			if (waitqueue_active(&volume->wait_queue))
+				wake_up(&volume->wait_queue);
+#ifdef VFS_PATCH_PRESENT
+			if (volume->vfs_quiesced) {
+				/* VFS function call to unlock the filesystem */
+				unlockfs(MKDEV(EVMS_MAJOR, qv->minor));
+				volume->vfs_quiesced = FALSE;
+			}
+#endif
+		}
+	} else {
+		LOG_ERROR("error(%d) %squiescing %s.\n",
+			  rc, ((qv->command) ? "" : "un"), volume->name);
+	}
+	return (rc);
+}
+
+/* function: evms_delete_volume
+ *
+ * this function performs the actual delete operation on
+ * a volume to purge it from kernel memory. all structures
+ * and memory consumed by this volume will be free as well
+ * as clearing or unregistering any system services or
+ * global data arrays.
+ *
+ * NOTE: this function will return -EBUSY on attempts to
+ * delete mounted volumes.
+ *
+ */
+static int
+evms_delete_volume(struct evms_logical_volume *volume,
+		   struct evms_delete_vol_pkt *dv)
+{
+	int rc = 0;
+
+	/* if this is a "permament" delete */
+	/* check to make sure volume is not mounted */
+	if (dv->command) {
+		if (is_open(dv->minor)) {
+			rc = -EBUSY;
+		} else {
+			// invalidate the device since it is not coming back
+			// this is required incase we are re-using the minor number
+			invalidate_device(MKDEV(EVMS_MAJOR, dv->minor), 1);
+		}
+	}
+
+	/* invoke the delete ioctl at the top of the feature stack */
+	if (!rc) {
+		LOG_DETAILS("deleting '%s'.\n", volume->name);
+		rc = DELETE(volume->node);
+	}
+
+	/* the volume has been deleted, do any clean up work
+	 * required.
+	 */
+	if (!rc) {
+		devfs_unregister(volume->devfs_handle);
+		if (dv->command) {
+			/* if "permanent" delete, free the name
+			 * and NULL the name field.
+			 */
+			kfree(volume->name);
+			volume->name = NULL;
+			volume->flags = 0;
+		} else {
+			/* if "soft" delete, leave the name so
+			 * we can use it to reassign the same
+			 * minor to this volume after a
+			 * rediscovery.
+			 */
+			volume->flags = EVMS_VOLUME_SOFT_DELETED;
+		}
+		volume->node = NULL;
+		set_device_ro(MKDEV(EVMS_MAJOR, dv->minor), 0);
+		blk_size[EVMS_MAJOR][dv->minor] = 0;
+		blksize_size[EVMS_MAJOR][dv->minor] = 0;
+		hardsect_size[EVMS_MAJOR][dv->minor] = 0;
+		evms_volumes--;
+	} else {
+		LOG_ERROR("error(%d) %s deleting %s.\n",
+			  rc, ((dv->command) ? "hard" : "soft"), volume->name);
+	}
+	return (rc);
+}
+
+/* function: evms_user_delete_volume
+ *
+ * this function, depending on the parameters, performs
+ * a "soft" or a "hard" delete. for a "soft" delete, a
+ * quiesce & delete request is queued up, to be executed
+ * at the beginning of the next rediscovery. for a
+ * "hard" delete, the target volume is quiesced and then
+ * deleted. if there is any errors attempting to delete
+ * the target, then the target is unquiesced. if an
+ * associative volume is specified it is quiesced before
+ * the target volume is quiesced, and is unquiesced
+ * after the attempt to delete the target volume.
+ *
+ */
+static int
+evms_user_delete_volume(struct evms_logical_volume *lvt,
+			struct inode *inode,
+			struct file *file, struct evms_delete_vol_pkt *dv)
+{
+	int rc = 0;
+
+	if (!dv->command) {
+		/* "soft delete" requested */
+		lvt->flags |= (EVMS_REQUESTED_QUIESCE | EVMS_REQUESTED_DELETE);
+		if (dv->do_vfs) {
+			lvt->flags |= EVMS_REQUESTED_VFS_QUIESCE;
+		}
+	} else {
+		/* "hard delete" requested */
+		int qa = FALSE;
+		struct evms_quiesce_vol_pkt qv;
+		struct evms_logical_volume *lva = NULL;
+
+		if (dv->associative_minor) {
+			/* associative volume specified
+			 *
+			 * quiesce it
+			 */
+			lva = &evms_logical_volumes[dv->associative_minor];
+			/* quiesce associative volume */
+			qv.command = EVMS_QUIESCE;
+			qv.do_vfs = EVMS_VFS_DO_NOTHING;
+			qv.minor = dv->associative_minor;
+			rc = evms_quiesce_volume(lva, inode, file, &qv);
+			qa = (rc) ? FALSE : TRUE;
+		}
+		if (!rc) {
+			/* quiesce target volume */
+			qv.command = EVMS_QUIESCE;
+			qv.do_vfs = EVMS_VFS_DO_NOTHING;
+			qv.minor = dv->minor;
+			rc = evms_quiesce_volume(lvt, inode, file, &qv);
+		}
+		if (!rc) {
+			/* delete the target volume */
+			rc = evms_delete_volume(lvt, dv);
+			if (rc) {
+				/* got an error undeleting...
+				 *
+				 * unquiesce the target
+				 */
+				qv.command = EVMS_UNQUIESCE;
+				qv.do_vfs = EVMS_VFS_DO_NOTHING;
+				qv.minor = dv->minor;
+				evms_quiesce_volume(lvt, inode, file, &qv);
+			}
+		}
+		if (dv->associative_minor) {
+			/* associative volume specified
+			 *
+			 * unquiesce it
+			 */
+			if (qa) {
+				/* only unquiesce associative
+				 * if we successfully quiesced
+				 * it previously.
+				 */
+				qv.command = EVMS_UNQUIESCE;
+				qv.do_vfs = EVMS_VFS_DO_NOTHING;
+				qv.minor = dv->associative_minor;
+				evms_quiesce_volume(lva, inode, file, &qv);
+			}
+		}
+	}
+	return (rc);
+}
+
+/* function: evms_ioctl_cmd_delete_volume
+ *
+ * this function copy user data to/from the kernel, and
+ * validates user parameters. after validation, control
+ * is passed to worker routine evms_user_delete_volume.
+ *
+ */
+static int
+evms_ioctl_cmd_delete_volume(struct inode *inode,
+			     struct file *file, unsigned long arg)
+{
+	int rc = 0;
+	struct evms_delete_vol_pkt tmp, *user_parms;
+	struct evms_logical_volume *volume = NULL;
+
+	user_parms = (struct evms_delete_vol_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	/* check to make sure associative minor is in use */
+	if (!rc) {
+		if (tmp.associative_minor) {
+			volume = &evms_logical_volumes[tmp.associative_minor];
+			if (volume->node == NULL)
+				rc = -ENXIO;
+		}
+	}
+	/* check to make sure target minor is in use */
+	if (!rc) {
+		volume = &evms_logical_volumes[tmp.minor];
+		if (volume->node == NULL)
+			rc = -ENXIO;
+		else
+			rc = evms_user_delete_volume(volume, inode, file, &tmp);
+	}
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+/* function: evms_full_rediscover_prep
+ *
+ * this function helps to prevent problems when evms is
+ * configured with the base built in statically and some
+ * plugins built as modules.
+ *
+ * in these cases, when the initial discovery is done,
+ * only the statically built modules are available for
+ * volume construction. as a result, some volumes that
+ * require the plugins built as modules (which haven't
+ * been loaded), to be fully reconstructed, may come up
+ * as compatibility volumes or partial volumes.
+ *
+ * when parts of evms are built as modules, the
+ * evms_rediscover_pkty utility is used, to perform a secondary
+ * rediscover, after all the plugins built as modules
+ * have been loaded, to construct all the volumes
+ * requiring these plugins.
+ *
+ * however since some of the volumes, requiring the plugins
+ * built as modules, may have been already exported as
+ * compatibility or partial volumes, we need to purge these
+ * volumes from kernel's memory, so that can be rediscovered
+ * and claimed by the appropriate plugins, and reconstructed
+ * into the correct volumes.
+ *
+ * this function purges all compatibility volumes that are
+ * not in use(mounted) and all partial volumes, prior to
+ * doing the secondary rediscover, thus allowing volumes to
+ * rediscovered correctly.
+ *
+ * NOTE: again, this is only required in cases when a
+ * combination of plugins are built statically and as
+ * modules.
+ *
+ */
+static void
+evms_full_rediscover_prep(struct inode *inode, struct file *file)
+{
+	int rc = 0, i;
+
+	LOG_DETAILS("%s: started.\n", __FUNCTION__);
+	/* check for acceptable volumes to be deleted */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		struct evms_logical_volume *volume = NULL;
+		struct evms_delete_vol_pkt dv;
+		int volume_open, doit;
+
+		volume = &evms_logical_volumes[i];
+		if (!volume->node)
+			continue;
+		volume_open = is_open(i);
+		/* only proceed on volumes that are:
+		 *   partial volumes
+		 *      OR
+		 *   unopened compatibility volumes
+		 */
+		doit = FALSE;
+		if (volume->flags & EVMS_VOLUME_PARTIAL) {
+			/* do all partial volumes
+			 */
+			doit = TRUE;
+		} else if (!(volume->flags & EVMS_VOLUME_FLAG)) {
+			/* check all compatibility volumes
+			 */
+			if (!volume_open && !is_swap_partition(MKDEV(EVMS_MAJOR, i))) {
+				/* only do unopened volumes
+				 */
+				doit = TRUE;
+			}
+		}
+		if (doit == FALSE) {
+			continue;
+		}
+		/* delete the volume from memory.
+		 * do a 'soft' delete if volume
+		 * is mounted, and 'hard' delete
+		 * if it is not.
+		 *
+		 * NOTE: the delete operation will
+		 * clear the bits in the flags field.
+		 */
+		dv.command = (volume_open) ?
+		    EVMS_SOFT_DELETE : EVMS_HARD_DELETE;
+		dv.minor = i;
+		dv.associative_minor = 0;
+		dv.status = 0;
+		rc = evms_user_delete_volume(volume, inode, file, &dv);
+	}
+	LOG_DETAILS("%s: completed.\n", __FUNCTION__);
+}
+
+static int
+evms_ioctl_cmd_rediscover_volumes(struct inode *inode,
+				  struct file *file,
+				  unsigned int cmd, unsigned long arg)
+{
+	int rc, i;
+	struct evms_rediscover_pkt tmp, *user_parms;
+	u64 *array_ptr = NULL;
+	ulong array_size = 0;
+	struct evms_logical_volume *volume = NULL;
+
+	rc = tmp.drive_count = 0;
+	user_parms = (struct evms_rediscover_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (tmp.drive_count == REDISCOVER_ALL_DEVICES) {
+		evms_full_rediscover_prep(inode, file);
+	}
+	/* quiesce all queued volumes */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		struct evms_quiesce_vol_pkt qv;
+
+		volume = &evms_logical_volumes[i];
+		if (!volume->node) {
+			continue;
+		}
+		if (!(volume->flags & EVMS_REQUESTED_QUIESCE)) {
+			continue;
+		}
+		qv.command = EVMS_QUIESCE;
+		qv.minor = i;
+		qv.do_vfs = (volume->flags & EVMS_REQUESTED_VFS_QUIESCE) ?
+		    EVMS_VFS_DO : EVMS_VFS_DO_NOTHING, qv.status = 0;
+		rc = evms_quiesce_volume(volume, inode, file, &qv);
+	}
+	/* "soft" delete all queued volumes */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		struct evms_delete_vol_pkt dv;
+
+		volume = &evms_logical_volumes[i];
+		if (!volume->node) {
+			continue;
+		}
+		if (!(volume->flags & EVMS_REQUESTED_DELETE)) {
+			continue;
+		}
+		dv.command = EVMS_SOFT_DELETE;
+		dv.minor = i;
+		dv.associative_minor = 0;
+		dv.status = 0;
+		rc = evms_delete_volume(volume, &dv);
+	}
+
+	if (tmp.drive_count && (tmp.drive_count != REDISCOVER_ALL_DEVICES)) {
+		if (!rc) {
+			/* create space for userspace drive array */
+			array_size =
+			    sizeof (*tmp.drive_array) * tmp.drive_count;
+			array_ptr = tmp.drive_array;
+			tmp.drive_array = kmalloc(array_size, GFP_KERNEL);
+			if (!tmp.drive_array) {
+				rc = -ENOMEM;
+			}
+		}
+		if (!rc)
+			/* copy rediscover drive array to kernel space */
+			if (copy_from_user
+			    (tmp.drive_array, array_ptr, array_size))
+				rc = -EFAULT;
+	}
+
+	if (!rc) {
+		static int evms_discover_volumes(struct evms_rediscover_pkt *);
+		/* perform the rediscovery operation */
+		rc = evms_discover_volumes(&tmp);
+	}
+
+	/* clean up after operation */
+	if (tmp.drive_count && (tmp.drive_count != REDISCOVER_ALL_DEVICES))
+		kfree(tmp.drive_array);
+
+	/* set return code and copy info to userspace */
+	tmp.status = rc;
+	if (copy_to_user(&user_parms->status, &tmp.status, sizeof (tmp.status)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static struct evms_list_node *user_disk_ptr;
+static int
+evms_ioctl_cmd_get_logical_disk(void *arg)
+{
+	int rc = 0;
+	struct evms_user_disk_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_user_disk_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user
+	    (&tmp.command, &user_parms->command, sizeof (tmp.command)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		if (tmp.command == EVMS_FIRST_DISK)
+			user_disk_ptr = evms_global_device_list;
+		else		/* tmp.command == EVMS_NEXT_DISK */
+			user_disk_ptr = user_disk_ptr->next;
+
+		if (user_disk_ptr == NULL)
+			tmp.status = EVMS_DISK_INVALID;
+		else {
+			tmp.status = EVMS_DISK_VALID;
+			tmp.disk_handle =
+			    NODE_TO_DEV_HANDLE(user_disk_ptr->item);
+		}
+		/* copy info to userspace */
+		if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+			rc = -EFAULT;
+	}
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_get_logical_disk_info(void *arg)
+{
+	int rc = 0;
+	struct evms_user_disk_info_pkt tmp, *user_parms;
+	struct evms_list_node *p;
+	struct evms_logical_node *disk_node = NULL;
+
+	user_parms = (struct evms_user_disk_info_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user
+	    (&tmp.disk_handle, &user_parms->disk_handle,
+	     sizeof (tmp.disk_handle)))
+		rc = -EFAULT;
+
+	/* check handle for validity */
+	if (!rc) {
+		rc = -EINVAL;
+		disk_node = DEV_HANDLE_TO_NODE(tmp.disk_handle);
+		for (p = evms_global_device_list; p; p = p->next)
+			if (p->item == disk_node) {
+				rc = 0;
+				user_disk_ptr = p;
+				break;
+			}
+	}
+
+	/* populate kernel copy of user's structure with appropriate info */
+	if (!rc) {
+		struct hd_geometry geo;
+		struct evms_logical_node *node =
+		    (struct evms_logical_node *) user_disk_ptr->item;
+		tmp.flags = node->flags;
+		strcpy(tmp.disk_name, EVMS_DEV_NODE_PATH);
+		strcat(tmp.disk_name, node->name);
+		rc = evms_cs_kernel_ioctl(node, EVMS_UPDATE_DEVICE_INFO,
+					  (ulong) NULL);
+		if (!rc) {
+			tmp.total_sectors = node->total_vsectors;
+			tmp.hardsect_size = node->hardsector_size;
+			tmp.block_size = node->block_size;
+			rc = evms_cs_kernel_ioctl(node, HDIO_GETGEO,
+						  (unsigned long) &geo);
+		}
+		if (!rc) {
+			tmp.geo_sectors = geo.sectors;
+			tmp.geo_heads = geo.heads;
+			tmp.geo_cylinders = geo.cylinders;
+		}
+	}
+
+	/* set return code and copy info to userspace */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_sector_io(void *arg)
+{
+	int rc;
+#define MAX_IO_SIZE 128
+	u64 io_size, max_io_size = MAX_IO_SIZE;
+#undef MAX_IO_SIZE
+	struct evms_sector_io_pkt tmp, *user_parms;
+	struct evms_logical_node *disk_node = NULL;
+	struct evms_list_node *list_node;
+	unsigned char *io_buffer;
+
+	rc = 0;
+	list_node = NULL;
+	io_buffer = NULL;
+
+	user_parms = (struct evms_sector_io_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	/* check handle for validity */
+	if (!rc) {
+		rc = -EINVAL;
+		disk_node = DEV_HANDLE_TO_NODE(tmp.disk_handle);
+		for (list_node = evms_global_device_list; list_node;
+		     list_node = list_node->next)
+			if (list_node->item == disk_node) {
+				rc = 0;
+				break;
+			}
+	}
+	if (!rc) {
+		int done;
+		/* allocate a io buffer upto 64Kbytes in size */
+		if (tmp.sector_count < max_io_size)
+			max_io_size = tmp.sector_count;
+		do {
+			done = TRUE;
+			/* allocate buffer large enough to max_io_size sectors */
+			io_buffer =
+			    kmalloc(max_io_size << EVMS_VSECTOR_SIZE_SHIFT,
+				    GFP_KERNEL);
+			if (!io_buffer) {
+				max_io_size >>= 1;
+				if (!max_io_size) {
+					rc = -ENOMEM;
+				} else {
+					done = FALSE;
+				}
+			}
+		} while (!done);
+	}
+	/* perform io with specified disk */
+	if (!rc) {
+		u64 io_sector_offset, io_remaining;
+		u64 io_bytes;
+		u_char *user_buffer_ptr;
+
+		io_remaining = tmp.sector_count;
+		io_sector_offset = 0;
+		user_buffer_ptr = tmp.buffer_address;
+		while (io_remaining) {
+			/* compute the io_size for this pass */
+			io_size = (io_remaining >= max_io_size) ?
+			    max_io_size : io_remaining;
+
+			io_bytes = io_size << EVMS_VSECTOR_SIZE_SHIFT;
+			/* for writes, copy a sector from user to kernel */
+			if (tmp.io_flag == EVMS_SECTOR_IO_WRITE) {
+				/* copy sector from user data buffer */
+				if (copy_from_user(io_buffer,
+						   user_buffer_ptr, io_bytes))
+					rc = -EFAULT;
+			}
+			if (rc)
+				break;
+
+			/* perform IO one sector at a time */
+			rc = INIT_IO(disk_node,
+				     tmp.io_flag,
+				     io_sector_offset + tmp.starting_sector,
+				     io_size, io_buffer);
+
+			if (rc)
+				break;
+
+			if (tmp.io_flag != EVMS_SECTOR_IO_WRITE) {
+				/* copy sector to user data buffer */
+				if (copy_to_user(user_buffer_ptr,
+						 io_buffer, io_bytes))
+					rc = -EFAULT;
+			}
+			if (rc)
+				break;
+
+			user_buffer_ptr += io_bytes;
+			tmp.buffer_address += io_bytes;
+			io_sector_offset += io_size;
+			io_remaining -= io_size;
+		}
+	}
+
+	/* if the sector_buffer was allocated, free it */
+	if (io_buffer)
+		kfree(io_buffer);
+
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int user_minor;
+static int
+evms_ioctl_cmd_get_minor(void *arg)
+{
+	int rc = 0;
+	struct evms_user_minor_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_user_minor_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user
+	    (&tmp.command, &user_parms->command, sizeof (tmp.command)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		if (tmp.command == EVMS_FIRST_VOLUME)
+			user_minor = 1;
+		else		/* tmp.command == EVMS_NEXT_VOLUME */
+			user_minor++;
+
+		tmp.status = EVMS_VOLUME_INVALID;
+		for (; user_minor < MAX_EVMS_VOLUMES; user_minor++) {
+			struct evms_logical_volume *lv;
+
+			lv = &evms_logical_volumes[user_minor];
+			/* see if any corrupt volumes have been
+			 * unmounted. If so, clean up the
+			 * evms_logical_volumes array entry, and
+			 * don't report the volume to the user.
+			 */
+			if (lv->flags & EVMS_VOLUME_CORRUPT) {
+				if (!is_open(user_minor)) {
+					/* clear logical volume structure
+					   * for this volume so it may be
+					   * reused.
+					 */
+					LOG_WARNING
+					    ("ioctl_get_minor: found unmounted %s volume(%u,%u,%s).\n",
+					     ((lv->
+					       flags & EVMS_VOLUME_SOFT_DELETED)
+					      ? "'soft deleted'" : ""),
+					     EVMS_MAJOR, user_minor, lv->name);
+					LOG_WARNING
+					    ("            releasing minor(%d) used by volume(%s)!\n",
+					     user_minor, lv->name);
+					kfree(lv->name);
+					lv->name = NULL;
+					lv->flags = 0;
+				}
+			}
+			if (lv->node || (lv->flags & EVMS_VOLUME_CORRUPT)) {
+				tmp.status = EVMS_VOLUME_VALID;
+				tmp.minor = user_minor;
+				break;
+			}
+		}
+
+		/* copy info to userspace */
+		if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+			rc = -EFAULT;
+	}
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_get_volume_data(void *arg)
+{
+	int rc = 0;
+	struct evms_volume_data_pkt tmp, *user_parms;
+	struct evms_logical_volume *volume = NULL;
+	struct evms_logical_node *node = NULL;
+
+	user_parms = (struct evms_volume_data_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		volume = &evms_logical_volumes[tmp.minor];
+		node = volume->node;
+		if (node == NULL)
+			rc = -ENODEV;
+	}
+	if (!rc) {
+		tmp.flags = volume->flags;
+		strcpy(tmp.volume_name, EVMS_DEV_NODE_PATH);
+		strcat(tmp.volume_name, volume->name);
+	}
+
+	/* copy return code and info to userspace */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+	return (rc);
+}
+
+static struct evms_registered_plugin *ioctl_reg_record;
+static int
+evms_ioctl_cmd_get_plugin(void *arg)
+{
+	int rc = 0;
+	struct evms_kernel_plugin_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_kernel_plugin_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user
+	    (&tmp.command, &user_parms->command, sizeof (tmp.command)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		/* if the command is not 0, then verify
+		 * that ioctl_reg_record is pointing to
+		 * current and valid plugin header.
+		 */
+		if (tmp.command) {	/* tmp.command == EVMS_NEXT_PLUGIN */
+			struct evms_registered_plugin *tmp_reg_record;
+			tmp_reg_record = registered_plugin_head;
+			/* search the current plugin list */
+			while (tmp_reg_record) {
+				if (tmp_reg_record == ioctl_reg_record)
+					break;
+				tmp_reg_record = tmp_reg_record->next;
+			}
+			/* if the ioctl_reg_record is not in the
+			 * current list, then start at the beginning.
+			 */
+			if (!tmp_reg_record)
+				tmp.command = EVMS_FIRST_PLUGIN;
+		}
+
+		if (tmp.command == EVMS_FIRST_PLUGIN)
+			/* start at beginning of plugin list */
+			ioctl_reg_record = registered_plugin_head;
+		else		/* tmp.command == EVMS_NEXT_PLUGIN */
+			/* continue from current position in list */
+			ioctl_reg_record = ioctl_reg_record->next;
+
+		tmp.status = EVMS_PLUGIN_INVALID;
+		tmp.id = 0;
+		if (ioctl_reg_record) {
+			tmp.id = ioctl_reg_record->plugin->id;
+			tmp.version = ioctl_reg_record->plugin->version;
+			tmp.status = EVMS_PLUGIN_VALID;
+		}
+
+		/* copy info to userspace */
+		if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+			rc = -EFAULT;
+	}
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_plugin_ioctl(struct inode *inode,
+			    struct file *file,
+			    unsigned int cmd, unsigned long arg)
+{
+	int rc = 0, found = FALSE;
+	struct evms_plugin_ioctl_pkt tmp, *user_parms;
+	struct evms_registered_plugin *p;
+
+	user_parms = (struct evms_plugin_ioctl_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		/* search for the specified plugin */
+		for (p = registered_plugin_head; p; p = p->next)
+			/* check for the specified feature id */
+			if (p->plugin->id == tmp.feature_id) {
+				found = TRUE;
+				/* check that entry point is used */
+				if (p->plugin->fops->direct_ioctl)
+					rc = DIRECT_IOCTL(p, inode, file, cmd,
+							  arg);
+				else
+					rc = -ENOSYS;
+				break;
+			}
+		/* was the specified plugin found? */
+		if (found == FALSE)
+			rc = -ENOPKG;
+
+		/* copy the status value back to the user */
+		tmp.status = rc;
+		if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+			rc = -EFAULT;
+	}
+	return (rc);
+}
+
+#define MAX_BUFFER_SIZE 65536
+static int
+evms_ioctl_cmd_kernel_partial_csum(void *arg)
+{
+	int rc = 0;
+	u64 compute_size = MAX_BUFFER_SIZE;
+	struct evms_compute_csum_pkt tmp, *user_parms;
+	unsigned char *buffer = NULL;
+
+	user_parms = (struct evms_compute_csum_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		/* allocate a io buffer upto 64Kbytes in size */
+		if (tmp.buffer_size < MAX_BUFFER_SIZE)
+			compute_size = tmp.buffer_size;
+
+		/* allocate buffer large enough to hold a single sector */
+		buffer = kmalloc(compute_size, GFP_KERNEL);
+		if (!buffer) {
+			rc = -ENOMEM;
+		}
+	}
+	/* perform io with specified disk */
+	if (!rc) {
+		u64 remaining_bytes;
+		u_char *user_buffer_ptr;
+		unsigned int insum = tmp.insum;
+
+		remaining_bytes = tmp.buffer_size;
+		user_buffer_ptr = tmp.buffer_address;
+		while (remaining_bytes) {
+			/* compute the compute_size for this pass */
+			compute_size = (remaining_bytes >= MAX_BUFFER_SIZE) ?
+			    MAX_BUFFER_SIZE : remaining_bytes;
+
+			/* copy into kernel from user data buffer */
+			if (copy_from_user(buffer, user_buffer_ptr,
+					   compute_size))
+				rc = -EFAULT;
+			if (rc)
+				break;
+			/* compute the checksum for this pass */
+			tmp.outsum = csum_partial(buffer, tmp.buffer_size,
+						  insum);
+			/* set up for another possible pass */
+			insum = tmp.outsum;
+			/* update loop progress variables */
+			user_buffer_ptr += compute_size;
+			tmp.buffer_address += compute_size;
+			remaining_bytes -= compute_size;
+		}
+	}
+
+	/* if the sector_buffer was allocated, free it */
+	if (buffer)
+		kfree(buffer);
+
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+#undef MAX_BUFFER_SIZE
+
+static int
+evms_ioctl_cmd_get_bmap(struct inode *inode,
+			struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = 0;
+	struct evms_get_bmap_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_get_bmap_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	/* pass the ioctl down the volume stack */
+	if (!rc) {
+		struct evms_logical_volume *volume;
+
+		volume = &evms_logical_volumes[MINOR(inode->i_rdev)];
+		rc = IOCTL(volume->node, inode, file, cmd,
+			   (unsigned long) &tmp);
+	}
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_process_notify_event(unsigned long arg)
+{
+	int rc = 0, found = FALSE;
+	struct evms_notify_pkt tmp, *user_parms;
+	struct evms_list_node **list_node = NULL;
+	struct evms_event *event = NULL;
+
+	user_parms = (struct evms_notify_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	/* check to see if PID has already been registered
+	 * for this event.
+	 */
+	if (!rc) {
+		list_node = &evms_global_notify_list;
+		while (*list_node) {
+			event = (*list_node)->item;
+			if ((event->pid == tmp.eventry.pid) &&
+			    (event->eventid == tmp.eventry.eventid)) {
+				found = TRUE;
+				break;
+			}
+			list_node = &(*list_node)->next;
+		}
+	}
+	if (tmp.command) {	/* tmp.command == EVMS_REGISTER_EVENT */
+		/* registration code */
+		if (found) {
+			rc = -EBUSY;
+			LOG_ERROR
+			    ("error(%d) pid(%d) already register to receive signal(%d) on event(%d).\n",
+			     rc, tmp.eventry.pid, tmp.eventry.signo,
+			     tmp.eventry.eventid);
+		} else {
+			/* register this pid/event type */
+			event = kmalloc(sizeof (struct evms_event), GFP_KERNEL);
+			if (!event) {
+				rc = -ENOMEM;
+				LOG_ERROR
+				    ("error(%d) allocating event structure.\n",
+				     rc);
+			} else {
+				memset(event, 0, sizeof (struct evms_event));
+				event->pid = tmp.eventry.pid;
+				event->eventid = tmp.eventry.eventid;
+				event->signo = tmp.eventry.signo;
+				rc = evms_cs_add_item_to_list
+				    (&evms_global_notify_list, event);
+			}
+		}
+	} else {		/* tmp.command == EVMS_UNREGISTER_EVENT */
+		/* unregistration code */
+		if (!found) {
+			rc = -ENODATA;
+			LOG_ERROR
+			    ("error(%d) attempting to unregister a non-registered pid(%d) on event(%d).\n",
+			     rc, tmp.eventry.pid, tmp.eventry.eventid);
+		} else {
+			event = (*list_node)->item;
+			rc = evms_cs_remove_item_from_list
+			    (&evms_global_notify_list, event);
+			if (!rc) {
+				kfree(event);
+			}
+		}
+	}
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_check_mount_status(struct inode *inode, struct file *file,
+				  ulong arg)
+{
+	int rc = 0;
+	struct evms_mount_status_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_mount_status_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		tmp.mounted =
+		    (is_mounted(MKDEV(EVMS_MAJOR, tmp.minor))) ? TRUE : FALSE;
+	}
+
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+static int
+evms_ioctl_cmd_check_open_status(struct inode *inode, struct file *file,
+				  ulong arg)
+{
+	int rc = 0;
+	struct evms_open_status_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_open_status_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		tmp.opens = is_open(tmp.minor);
+	}
+
+	/* copy the status value back to the user */
+	tmp.status = rc;
+	if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+		rc = -EFAULT;
+
+	return (rc);
+}
+
+/************************************************/
+/* END -- IOCTL commands -- EVMS specific       */
+/************************************************/
+
+/************************************************/
+/* START -- IOCTL commands -- Volume specific   */
+/************************************************/
+
+/************************************************/
+/* END -- IOCTL commands -- Volume specific     */
+/************************************************/
+
+/************************************************/
+/* START -- IOCTL main                          */
+/************************************************/
+
+/*
+ * Function: evms_ioctl
+ *
+ *  This function is the main ioctl entry point for all of evms.
+ */
+
+static int
+evms_ioctl(struct inode *inode,
+	   struct file *file, unsigned int cmd, unsigned long arg)
+{
+	unsigned long minor = 0;
+	int rc = 0;
+	struct evms_logical_node *node = NULL;
+
+	/* check user access */
+	if (!capable(CAP_SYS_ADMIN))
+		rc = -EACCES;
+
+	if (!inode)
+		rc = -EINVAL;
+
+	if (!rc) {
+		/* get the minor */
+		minor = MINOR(inode->i_rdev);
+		LOG_EXTRA
+		    ("ioctl: minor(%lu), dir(%d), size(%d), type(%d), nr(%d)\n",
+		     minor, (cmd >> _IOC_DIRSHIFT) & _IOC_DIRMASK,
+		     (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
+		     (cmd >> _IOC_TYPESHIFT) & _IOC_TYPEMASK,
+		     (cmd >> _IOC_NRSHIFT) & _IOC_NRMASK);
+
+		/* insure this minor points to a valid volume */
+		if (minor) {
+			node = evms_logical_volumes[minor].node;
+			if (node == NULL)
+				rc = -ENXIO;
+		}
+	}
+
+	/* process the IOCTL commands */
+	if (!rc) {
+		if (!minor) {
+			/* process all EVMS specific commands */
+			switch (cmd) {
+			case EVMS_GET_IOCTL_VERSION:
+				rc = evms_ioctl_cmd_get_ioctl_version((void *)
+								      arg);
+				break;
+			case EVMS_GET_VERSION:
+				rc = evms_ioctl_cmd_get_version((void *) arg);
+				break;
+			case EVMS_GET_INFO_LEVEL:
+				rc = evms_ioctl_cmd_get_info_level((void *)
+								   arg);
+				break;
+			case EVMS_SET_INFO_LEVEL:
+				rc = evms_ioctl_cmd_set_info_level((void *)
+								   arg);
+				break;
+			case EVMS_REDISCOVER_VOLUMES:
+				rc = evms_ioctl_cmd_rediscover_volumes(inode,
+								       file,
+								       cmd,
+								       arg);
+				break;
+			case EVMS_GET_LOGICAL_DISK:
+				rc = evms_ioctl_cmd_get_logical_disk((void *)
+								     arg);
+				break;
+			case EVMS_GET_LOGICAL_DISK_INFO:
+				rc = evms_ioctl_cmd_get_logical_disk_info((void
+									   *)
+									  arg);
+				break;
+			case EVMS_SECTOR_IO:
+				rc = evms_ioctl_cmd_sector_io((void *) arg);
+				break;
+			case EVMS_GET_MINOR:
+				rc = evms_ioctl_cmd_get_minor((void *) arg);
+				break;
+			case EVMS_GET_VOLUME_DATA:
+				rc = evms_ioctl_cmd_get_volume_data((void *)
+								    arg);
+				break;
+			case EVMS_DELETE_VOLUME:
+				rc = evms_ioctl_cmd_delete_volume(inode, file,
+								  arg);
+				break;
+			case EVMS_GET_PLUGIN:
+				rc = evms_ioctl_cmd_get_plugin((void *) arg);
+				break;
+			case EVMS_PLUGIN_IOCTL:
+				rc = evms_ioctl_cmd_plugin_ioctl(inode, file,
+								 cmd, arg);
+				break;
+			case EVMS_COMPUTE_CSUM:
+				rc = evms_ioctl_cmd_kernel_partial_csum((void *)
+									arg);
+				break;
+			case EVMS_PROCESS_NOTIFY_EVENT:
+				rc = evms_ioctl_cmd_process_notify_event(arg);
+				break;
+			case EVMS_CHECK_MOUNT_STATUS:
+				rc = evms_ioctl_cmd_check_mount_status(inode,
+								       file,
+								       arg);
+				break;
+			case EVMS_CHECK_OPEN_STATUS:
+				rc = evms_ioctl_cmd_check_open_status(inode,
+								       file,
+								       arg);
+				break;
+			default:
+				rc = -EINVAL;
+				break;
+			}
+		} else {
+			/* process Volume specific commands */
+			switch (cmd) {
+				/* pick up standard blk ioctls */
+			case BLKFLSBUF:
+			case BLKROSET:
+			case BLKROGET:
+			case BLKRASET:
+			case BLKRAGET:
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
+			case BLKBSZGET:
+			case BLKBSZSET:
+#endif
+			case BLKSSZGET:
+				rc = blk_ioctl(inode->i_rdev, cmd, arg);
+				break;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,10)
+			case BLKGETSIZE:
+				{
+					/* casting size down to 32-bits until
+					 * kernel allows return of 64-bit size
+					 * values.
+					 */
+					long size = node->total_vsectors;
+					if (copy_to_user
+					    ((long *) arg, &size,
+					     sizeof (long)))
+						rc = -EFAULT;
+				}
+				break;
+			case BLKGETSIZE64:
+				{
+					u64 size_in_bytes =
+					    node->
+					    total_vsectors <<
+					    EVMS_VSECTOR_SIZE_SHIFT;
+					if (copy_to_user
+					    ((u64 *) arg, &size_in_bytes,
+					     sizeof (u64)))
+						rc = -EFAULT;
+				}
+				break;
+#endif
+			case EVMS_GET_IOCTL_VERSION:
+				rc = evms_ioctl_cmd_get_ioctl_version((void *)
+								      arg);
+				break;
+			case EVMS_GET_BMAP:
+				rc = evms_ioctl_cmd_get_bmap(inode, file, cmd,
+							     arg);
+				break;
+			case EVMS_GET_VOL_STRIPE_INFO:
+				{
+					struct evms_vol_stripe_info_pkt info;
+
+					info.size =
+					    PAGE_SIZE >>
+					    EVMS_VSECTOR_SIZE_SHIFT;
+					info.width = 1;
+					if (copy_to_user
+					    ((struct evms_vol_stripe_info_pkt *)
+					     arg, &info, sizeof (info)))
+						rc = -EFAULT;
+				}
+				break;
+
+			default:
+				rc = IOCTL(node, inode, file, cmd, arg);
+				break;
+			}
+		}
+	}
+	return rc;
+}
+
+/************************************************/
+/* END -- IOCTL main                            */
+/************************************************/
+
+/************************************************/
+/* START -- CHECK MEDIA CHANGE		        */
+/************************************************/
+
+static int
+evms_check_media_change(kdev_t dev)
+{
+	int rc = 0;
+	struct evms_logical_volume *volume = NULL;
+
+	/* check user access */
+	if (!capable(CAP_SYS_ADMIN))
+		rc = -EACCES;
+	if (!rc) {
+		int minor;
+		/* get the minor */
+		minor = MINOR(dev);
+		/* insure this minor points to a valid volume */
+		volume = &evms_logical_volumes[minor];
+		if (volume->node == NULL) {
+			rc = -ENXIO;
+		}
+	}
+	if (!rc) {
+		if (volume->flags & EVMS_DEVICE_REMOVABLE) {
+			/* check for media change */
+			rc = evms_cs_kernel_ioctl(volume->node,
+						  EVMS_CHECK_MEDIA_CHANGE,
+						  (unsigned long) NULL);
+			if (rc < 0) {
+				LOG_ERROR
+				    ("error(%d) doing EVMS_CHECK_MEDIA_CHANGE ioctl on '%s'.\n",
+				     rc, volume->name);
+			}
+		}
+	}
+	return (rc);
+}
+
+/************************************************/
+/* END -- CHECK MEDIA CHANGE		        */
+/************************************************/
+
+static int
+evms_check_for_device_changes(struct inode *inode, struct file *file)
+{
+	int rc = 0, something_changed = 0, i;
+	struct evms_rediscover_pkt kernel_rd_pckt = { 0, 0, NULL };
+	struct evms_list_node *disk_list = NULL, *lnode, *next_lnode;
+	struct evms_logical_node *disk, *new_device_list = NULL;
+	struct evms_logical_volume *volume = NULL;
+
+	/* check for new devices
+	 *
+	 * put all new devices on the disk list so they
+	 * will be included in the rediscovery process.
+	 */
+	static void evms_discover_logical_disks(struct evms_logical_node **);
+	evms_discover_logical_disks(&new_device_list);
+	if (new_device_list) {
+		LOG_DETAILS("%s: new devices detected.\n", __FUNCTION__);
+		something_changed++;
+		/* put these new nodes on the disk list */
+		while (new_device_list) {
+			disk = new_device_list;
+			rc = evms_cs_remove_logical_node_from_list
+			    (&new_device_list, disk);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) removing device(%s) from list.\n",
+				     __FUNCTION__, rc, disk->name);
+			}
+			rc = evms_cs_add_item_to_list(&disk_list, disk);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) adding device(%s) from list.\n",
+				     __FUNCTION__, rc, disk->name);
+			}
+		}
+	}
+
+	/* check all devices for changed removable media
+	 *
+	 * scan the global device list and issue check
+	 * media change on each removable media device.
+	 * put all removable devices that indicate a
+	 * media change on the disk list.
+	 *
+	 * also scan for devices that have been unplugged
+	 * or contain corrupt volumes.
+	 */
+	for (lnode = evms_global_device_list; lnode; lnode = lnode->next) {
+		int add_to_list = FALSE;
+		disk = (struct evms_logical_node *) lnode->item;
+		/* only really check removable media devices */
+		if (disk->flags & EVMS_DEVICE_REMOVABLE) {
+			/* check for media change */
+			rc = evms_cs_kernel_ioctl(disk,
+						  EVMS_CHECK_MEDIA_CHANGE,
+						  (unsigned long) NULL);
+			if (rc < 0) {
+				LOG_ERROR
+				    ("%s: error(%d) doing EVMS_CHECK_MEDIA_CHANGE ioctl on '%s'.\n",
+				     __FUNCTION__, rc, disk->name);
+			} else if (rc == 1) {
+				add_to_list = TRUE;
+			}
+		}
+		/* check for device that where present
+		 * before but are gone (unplugged
+		 * device or unloaded driver).
+		 */
+		rc = IOCTL(disk, inode, file,
+			   EVMS_CHECK_DEVICE_STATUS, (ulong) NULL);
+		if (rc) {
+			LOG_ERROR
+			    ("error(%d) doing EVMS_CHECK_DEVICE_STATUS ioctl on '%s'.\n",
+			     rc, volume->name);
+		}
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			add_to_list = TRUE;
+		}
+		if (add_to_list) {
+			something_changed++;
+			rc = evms_cs_add_item_to_list(&disk_list, disk);
+		}
+	}
+	/* log a statement that we detected changed media.
+	 */
+	if (disk_list) {
+		LOG_DETAILS("%s: media change detected.\n", __FUNCTION__);
+	}
+
+	/* check for volumes with removed removable media.
+	 * mark the volumes that reside on changed media.
+	 */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		volume = &evms_logical_volumes[i];
+		if (!volume->node)
+			continue;
+		if (!(volume->flags & EVMS_DEVICE_REMOVABLE))
+			continue;
+		if (evms_check_media_change(MKDEV(EVMS_MAJOR, i)) <= 0)
+			continue;
+		/* remember which volumes have changed media */
+		volume->flags |= EVMS_MEDIA_CHANGED;
+		something_changed++;
+	}
+
+	/* check for removed devices */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		int status;
+		volume = &evms_logical_volumes[i];
+		if (!volume->node)
+			continue;
+		/* check for device status */
+		status = 0;
+		rc = IOCTL(volume->node, inode, file,
+			   EVMS_CHECK_DEVICE_STATUS, (ulong) & status);
+		if (rc) {
+			LOG_ERROR
+			    ("error(%d) doing EVMS_CHECK_DEVICE_STATUS ioctl on '%s'.\n",
+			     rc, volume->name);
+			continue;
+		}
+		if (!(status & EVMS_DEVICE_UNAVAILABLE)) {
+			continue;
+		}
+		/* remember which volumes have changed media */
+		volume->flags |= EVMS_DEVICE_UNPLUGGED;
+		something_changed++;
+	}
+
+	/* do we have some work to do? */
+	if (something_changed) {
+		/* check for volumes to be deleted */
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			struct evms_quiesce_vol_pkt qv;
+
+			volume = &evms_logical_volumes[i];
+			if (!volume->node)
+				continue;
+			/* only proceed on volumes with:
+			 *  changed media,
+			 *  hot-unplugged devices,
+			 *  & partial volumes
+			 */
+			if (!(volume->flags &
+			      (EVMS_MEDIA_CHANGED |
+			       EVMS_VOLUME_PARTIAL | EVMS_DEVICE_UNPLUGGED)))
+				continue;
+			/* gather the disk's needing to be
+			 * rediscovered to rebuild this
+			 * volume.
+			 *
+			 * this will locate other disks that
+			 * the volume resides on that don't
+			 * indicate media change.
+			 */
+			rc = evms_cs_kernel_ioctl(volume->node,
+						  EVMS_GET_DISK_LIST,
+						  (unsigned long) &disk_list);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) retrieving underlying disk list for '%s', skipping ...\n",
+				     __FUNCTION__, rc, volume->name);
+				continue;
+			}
+			/* quiesce all the changed volumes
+			 * prior to being deleted.
+			 */
+			qv.command = 1;	// quiesce
+			qv.minor = i;	//
+			qv.status = 0;	// reset status
+			qv.do_vfs = 0;
+			rc = evms_quiesce_volume(volume, inode, file, &qv);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) attempting to quiesce '%s%s'.\n",
+				     __FUNCTION__, rc, EVMS_DEV_NODE_PATH,
+				     volume->name);
+			}
+		}
+
+		/* we need to revalidate all the changed
+		 * media. this is accomplished by issuing
+		 * the revalidate disk ioctl to each device
+		 * with changed media. the device manager
+		 * remembers which devices indicated
+		 * media changed (set by check media
+		 * changed ioctl issued earlier), and will
+		 * only issue the revalidate disk ioctl to
+		 * those disks one time.
+		 *
+		 * NOTE:
+		 * this needs to be done BEFORE deleting
+		 * the volumes because deleting the
+		 * last segment on disk will cause the
+		 * associated disk node to freed, and we
+		 * will not be able to issue the
+		 * revalidate disk ioctl after that.
+		 */
+		for (lnode = disk_list; lnode; lnode = lnode->next) {
+			disk = (struct evms_logical_node *) lnode->item;
+			/* only really do removable media devices */
+			if (disk->flags & EVMS_MEDIA_CHANGED) {
+				/* go revalidate the change media */
+				rc = evms_cs_kernel_ioctl(disk,
+							  EVMS_REVALIDATE_DISK,
+							  (unsigned long) NULL);
+				if (rc) {
+					LOG_ERROR
+					    ("%s: error(%d) attempting to revalidate '%s%s'.\n",
+					     __FUNCTION__, rc,
+					     EVMS_DEV_NODE_PATH, volume->name);
+				}
+			}
+		}
+
+		/* delete all the affected volumes */
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			struct evms_delete_vol_pkt dv;
+
+			volume = &evms_logical_volumes[i];
+			if (!volume->node)
+				continue;
+			/* only proceed on volumes with:
+			 *  changed media,
+			 *  hot-unplugged devices,
+			 *  & partial volumes
+			 */
+			if (!(volume->flags &
+			      (EVMS_MEDIA_CHANGED |
+			       EVMS_VOLUME_PARTIAL | EVMS_DEVICE_UNPLUGGED)))
+				continue;
+			/* only delete quiesced volumes */
+			if (!volume->quiesced)
+				continue;
+			/* delete the volume from memory.
+			 * do a 'soft' delete if volume
+			 * is mounted, and 'hard' delete
+			 * if it is not.
+			 *
+			 * NOTE: the delete operation will
+			 * clear the bits in the flags field.
+			 */
+			dv.command = is_open(i);
+			dv.minor = i;
+			dv.status = 0;
+			rc = evms_delete_volume(volume, &dv);
+		}
+
+		/* at this point all devices indicating
+		 * media change that had volumes on them
+		 * should be gone. however, we could still
+		 * have devices indicating media change
+		 * that had no volumes on them in the disk
+		 * list. we need to delete these devices
+		 * from kernel memory and the global device
+		 * list.
+		 */
+		for (lnode = evms_global_device_list; lnode; lnode = next_lnode) {
+			next_lnode = lnode->next;
+
+			disk = (struct evms_logical_node *) lnode->item;
+			if (disk->flags & EVMS_MEDIA_CHANGED) {
+				rc = DELETE(disk);
+			}
+		}
+
+		/* all the devices that indicated media
+		 * change should be gone, both from kernel
+		 * memory and global device list. we now
+		 * need to remove any references to these
+		 * devices from the disk list.
+		 *
+		 * when removable media is installed, it
+		 * will get detected in the device manager's
+		 * rediscovery as a new device and added to
+		 * the discover list.
+		 */
+		for (lnode = disk_list; lnode; lnode = next_lnode) {
+			struct evms_list_node *glnode;
+			int lnode_still_there;
+
+			next_lnode = lnode->next;
+
+			lnode_still_there = FALSE;
+			for (glnode = evms_global_device_list;
+			     glnode; glnode = glnode->next) {
+				if (glnode->item == lnode->item) {
+					lnode_still_there = TRUE;
+					break;
+				}
+			}
+			if (lnode_still_there == FALSE) {
+				rc = evms_cs_remove_item_from_list(&disk_list,
+								   lnode->item);
+				if (rc) {
+					LOG_ERROR
+					    ("%s: error(%d) attempting to remove item(%p) from disk_list(%p).\n",
+					     __FUNCTION__, rc, lnode->item,
+					     &disk_list);
+				}
+			}
+		}
+
+		/* build the in-kernel rediscover packet */
+
+		/* allocate the space for the drive_array in
+		 * the struct evms_rediscover_pkt packet. to do this
+		 * we need to count the number of disk nodes,
+		 * then allocate the necessary space.
+		 */
+		/* count the disk nodes */
+		for (lnode = disk_list; lnode; lnode = lnode->next)
+			kernel_rd_pckt.drive_count++;
+		/* allocate the space */
+		if (kernel_rd_pckt.drive_count) {
+			kernel_rd_pckt.drive_array =
+			    kmalloc(kernel_rd_pckt.drive_count *
+				    sizeof (u64), GFP_KERNEL);
+			if (!kernel_rd_pckt.drive_array) {
+				rc = -ENOMEM;
+				LOG_ERROR
+				    ("%s: error(%d) allocating rediscover drive array.\n",
+				     __FUNCTION__, rc);
+			}
+		}
+		/* populate the drive array
+		 *
+		 * this also frees the disk_list which is useful
+		 * if we had an error allocating the drive array.
+		 */
+		for (i = 0, lnode = disk_list; lnode; lnode = next_lnode, i++) {
+			next_lnode = lnode->next;
+
+			/* remove this disk from the disk list */
+			disk = (struct evms_logical_node *) lnode->item;
+			rc = evms_cs_remove_item_from_list(&disk_list, disk);
+			if (!rc) {
+				/* add this disk to rediscover
+				 * packet
+				 */
+				kernel_rd_pckt.drive_array[i] =
+				    NODE_TO_DEV_HANDLE(disk);
+			}
+		}
+		/* perform the rediscovery operation */
+		if (!rc) {
+			static int evms_discover_volumes(struct
+							 evms_rediscover_pkt *);
+			rc = evms_discover_volumes(&kernel_rd_pckt);
+			if (kernel_rd_pckt.drive_count) {
+				kfree(kernel_rd_pckt.drive_array);
+			}
+		}
+		LOG_DETAILS("%s: rediscover completed.\n", __FUNCTION__);
+	}
+
+	return (rc);
+}
+
+/************************************************/
+/* START -- REVALIDATE DISK		        */
+/************************************************/
+
+static int
+evms_revalidate_disk(kdev_t dev)
+{
+	int rc = 0;
+	struct evms_logical_volume *volume = NULL;
+
+	/* check user access */
+	if (!capable(CAP_SYS_ADMIN))
+		rc = -EACCES;
+	if (!rc) {
+		int minor;
+		/* get the minor */
+		minor = MINOR(dev);
+		/* insure this minor points to a valid volume */
+		volume = &evms_logical_volumes[minor];
+		if (volume->node == NULL) {
+			rc = -ENXIO;
+		}
+	}
+	if (!rc) {
+		/* go revalidate the change media */
+		rc = evms_cs_kernel_ioctl(volume->node,
+					  EVMS_REVALIDATE_DISK,
+					  (unsigned long) NULL);
+	}
+	return (rc);
+}
+
+/************************************************/
+/* END -- REVALIDATE DISK		        */
+/************************************************/
+
+/************************************************/
+/* START -- OPEN			        */
+/************************************************/
+
+static int
+evms_open(struct inode *inode, struct file *file)
+{
+	int rc = 0, minor = 0;
+	struct evms_logical_volume *volume = NULL;
+
+	/* check user access */
+	if (!capable(CAP_SYS_ADMIN))
+		rc = -EACCES;
+	if (!rc) {
+		if (!inode)
+			rc = -EINVAL;
+	}
+	rc = evms_check_for_device_changes(inode, file);
+	if (!rc) {
+		/* get the minor */
+		minor = MINOR(inode->i_rdev);
+		if (minor) {
+			/* insure this minor points to a valid volume */
+			volume = &evms_logical_volumes[minor];
+			if (volume->node == NULL) {
+				rc = -ENXIO;
+			}
+		}
+	}
+	/* go "open" the volume */
+	if (!rc && minor) {
+		atomic_inc(&volume->opens);
+		rc = IOCTL(volume->node, inode, file,
+			   EVMS_OPEN_VOLUME, (unsigned long) NULL);
+		if (rc) {
+			LOG_ERROR
+			    ("error(%d) doing EVMS_OPEN_VOLUME ioctl to '%s'.\n",
+			     rc, volume->name);
+			atomic_dec(&volume->opens);
+		}
+	}
+	return (rc);
+}
+
+/************************************************/
+/* END -- OPEN				        */
+/************************************************/
+
+/************************************************/
+/* START -- RELEASE			        */
+/************************************************/
+
+static int
+evms_release(struct inode *inode, struct file *file)
+{
+	int rc = 0, minor = 0;
+	struct evms_logical_volume *volume = NULL;
+
+	if (!inode)
+		rc = -EINVAL;
+	if (!rc) {
+		/* get the minor */
+		minor = MINOR(inode->i_rdev);
+		if (minor) {
+			/* insure this minor points to a valid volume */
+			volume = &evms_logical_volumes[minor];
+			if (volume->node == NULL) {
+				rc = -ENXIO;
+			}
+		}
+	}
+	/* go "close" the volume */
+	if (!rc && minor) {
+		rc = IOCTL(volume->node, inode, file,
+			   EVMS_CLOSE_VOLUME, (unsigned long) NULL);
+		if (rc) {
+			LOG_ERROR
+			    ("error(%d) doing EVMS_CLOSE_VOLUME ioctl to '%s'.\n",
+			     rc, volume->name);
+		} else {
+			atomic_dec(&volume->opens);
+		}
+	}
+	return (rc);
+}
+
+/************************************************/
+/* END -- RELEASE			        */
+/************************************************/
+
+static struct block_device_operations evms_fops = {
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,14)
+	owner:THIS_MODULE,
+#endif
+	open:evms_open,
+	release:evms_release,
+	ioctl:evms_ioctl,
+	check_media_change:evms_check_media_change,
+	revalidate:evms_revalidate_disk
+};
+
+/**********************************************************/
+/* END -- FOPS functions definitions                      */
+/**********************************************************/
+
+/**********************************************************/
+/* START -- RUNTIME support functions                     */
+/**********************************************************/
+
+static void
+evms_do_request_fn(request_queue_t * q)
+{
+	LOG_WARNING("This function should not be called.\n");
+}
+
+#ifdef CONFIG_SMP
+static request_queue_t *
+evms_find_queue(kdev_t dev)
+{
+	request_queue_t *rq = NULL;
+	struct evms_logical_volume *volume;
+
+	volume = &evms_logical_volumes[MINOR(dev)];
+	if (volume->node)
+		rq = &volume->request_queue;
+	return (rq);
+}
+#endif
+
+/*
+ * Function:    evms_make_request_fn
+ *
+ */
+static int
+evms_make_request_fn(request_queue_t * q, int rw, struct buffer_head *bh)
+{
+	struct evms_logical_volume *volume;
+
+	volume = &evms_logical_volumes[MINOR(bh->b_rdev)];
+	wait_event(volume->wait_queue, (!volume->quiesced));
+	if (volume->node) {
+		switch (rw) {
+		case READ:
+		case READA:
+			atomic_inc(&volume->requests_in_progress);
+			R_IO(volume->node, bh);
+			atomic_dec(&volume->requests_in_progress);
+			return 0;
+		case WRITE:
+			atomic_inc(&volume->requests_in_progress);
+			W_IO(volume->node, bh);
+			atomic_dec(&volume->requests_in_progress);
+			return 0;
+		default:
+			buffer_IO_error(bh);
+			return 0;
+		}
+	} else {
+		LOG_ERROR("request for unknown logical volume [minor(%d)].\n",
+			  MINOR(bh->b_rdev));
+		buffer_IO_error(bh);
+	}
+	return 0;
+}
+
+/**********************************************************/
+/* END -- RUNTIME support functions                       */
+/**********************************************************/
+
+/**********************************************************/
+/* START -- INIT/DISCOVERY support functions              */
+/**********************************************************/
+
+#ifdef LOCAL_DEBUG
+static void
+display_discover_list(struct evms_logical_node *discover_list, char *text)
+{
+	struct evms_logical_node *node;
+
+	LOG_DETAILS("discover list:(%s)\n", text);
+	for (node = discover_list; node; node = node->next) {
+		LOG_DETAILS("\nnode info:\n");
+		LOG_DETAILS("node.....................(0x%p)\n", node);
+		LOG_DETAILS("name.....................(%s)\n", node->name);
+		LOG_DETAILS("plugin id................(0x%x)\n",
+			    node->plugin->id);
+		LOG_DETAILS("size.....................("PFU64")\n",
+			    node->total_vsectors);
+		LOG_DETAILS("flags....................(0x%x)\n", node->flags);
+		LOG_DETAILS("iflags...................(0x%x)\n", node->iflags);
+		LOG_DETAILS("sector size..............(%d)\n",
+			    node->hardsector_size);
+		LOG_DETAILS("block size...............(%d)\n",
+			    node->block_size);
+		LOG_DETAILS("sys id...................(0x%x)\n",
+			    node->system_id);
+
+		if (node->feature_header) {
+			struct evms_feature_header *fh;
+
+			fh = node->feature_header;
+			LOG_DETAILS("\nfeature header:\n");
+			LOG_DETAILS("signature................(0x%x)\n",
+				    fh->signature);
+			LOG_DETAILS("crc......................(0x%x)\n",
+				    fh->crc);
+			LOG_DETAILS("feature header version...(%d.%d.%d)\n",
+				    fh->version.major, fh->version.minor,
+				    fh->version.patchlevel);
+			LOG_DETAILS("engine version...........(%d.%d.%d)\n",
+				    fh->engine_version.major,
+				    fh->engine_version.minor,
+				    fh->engine_version.patchlevel);
+			LOG_DETAILS("flags....................(0x%x)\n",
+				    fh->flags);
+			LOG_DETAILS("feature id...............(0x%x)\n",
+				    fh->feature_id);
+			LOG_DETAILS("sequence#................("PFU64")\n",
+				    fh->sequence_number);
+			LOG_DETAILS("alignment padding........("PFU64")\n",
+				    fh->alignment_padding);
+			LOG_DETAILS("feature data1 lsn........("PFU64")\n",
+				    fh->feature_data1_start_lsn);
+			LOG_DETAILS("feature data1 size.......("PFU64")\n",
+				    fh->feature_data1_size);
+			LOG_DETAILS("feature data2 lsn........("PFU64")\n",
+				    fh->feature_data2_start_lsn);
+			LOG_DETAILS("feature data2 size.......("PFU64")\n",
+				    fh->feature_data2_size);
+			LOG_DETAILS("volume sn................("PFU64")\n",
+				    fh->volume_serial_number);
+			LOG_DETAILS("volume minor#............(%d)\n",
+				    fh->volume_system_id);
+			LOG_DETAILS("object depth.............(%d)\n",
+				    fh->object_depth);
+			LOG_DETAILS("object name..............(%s)\n",
+				    fh->object_name);
+			LOG_DETAILS("volume name..............(%s)\n",
+				    fh->volume_name);
+		}
+
+		if (node->volume_info) {
+			struct evms_volume_info *vi;
+
+			vi = node->volume_info;
+			LOG_DETAILS("\nvolume info:\n");
+			LOG_DETAILS("volume name..............(%s)\n",
+				    vi->volume_name);
+			LOG_DETAILS("volume sn................("PFU64")\n",
+				    vi->volume_sn);
+			LOG_DETAILS("volume minor#............(%d)\n",
+				    vi->volume_minor);
+		}
+	}
+	if (discover_list) {
+		LOG_DETAILS("\n");
+	}
+}
+#endif
+
+/*
+ * Function:     evms_discover_logical_disks
+ * Description: Construct the logical disk list by calling all registered device managers.
+ */
+static void
+evms_discover_logical_disks(struct evms_logical_node **disk_list)
+{
+	struct evms_registered_plugin *p;
+	LOG_EXTRA("discovering logical disks...\n");
+	for (p = registered_plugin_head; p; p = p->next) {
+		if (GetPluginType(p->plugin->id) == EVMS_DEVICE_MANAGER) {
+			DISCOVER(p, disk_list);
+		}
+	}
+}
+
+/*
+ * Function:     evms_discover_logical_partitions
+ * Description: Construct the logical partition list by calling all registered partition managers.
+ */
+static void
+evms_discover_logical_partitions(struct evms_logical_node **discover_list)
+{
+	int rc, done;
+
+	struct evms_registered_plugin *p;
+	LOG_EXTRA("discovering logical partitions...\n");
+	do {
+		done = TRUE;
+		for (p = registered_plugin_head; p; p = p->next) {
+			if (GetPluginType(p->plugin->id) ==
+			    EVMS_SEGMENT_MANAGER) {
+				rc = DISCOVER(p, discover_list);
+				/* RC > 0 means the plugin
+				 * added something to the
+				 * discover list. This also
+				 * means we must loop thru
+				 * these plugins another time.
+				 * RC == 0 means nothing was
+				 * added to the discover list
+				 * by this plugin.
+				 * RC < 0 means the plugin
+				 * encountered some error and
+				 * nothing was added to the list.
+				 * NOTE: If a plugin has both
+				 * added something new to the
+				 * discover list and encountered
+				 * an error, RC > 0 must be
+				 * returned.
+				 */
+				if (rc > 0)
+					done = FALSE;
+			}
+		}
+	} while (done == FALSE);
+
+	/* send the end of discovery signal to each
+	 * partition manager plugin.
+	 */
+	for (p = registered_plugin_head; p; p = p->next)
+		if (GetPluginType(p->plugin->id) == EVMS_SEGMENT_MANAGER)
+			if (p->plugin->fops->end_discover)
+				rc = END_DISCOVER(p, discover_list);
+}
+
+/*
+ * Function:     evms_discover_volume_groups
+ * Description: Find volume groups within the logical partitions list
+ */
+static void
+evms_discover_volume_groups(struct evms_logical_node **discover_list)
+{
+	int rc, done;
+
+	struct evms_registered_plugin *p;
+	LOG_EXTRA("discovering logical volume groups...\n");
+	do {
+		done = TRUE;
+		for (p = registered_plugin_head; p; p = p->next) {
+			if (GetPluginType(p->plugin->id) == EVMS_REGION_MANAGER) {
+				rc = DISCOVER(p, discover_list);
+				/* RC > 0 means the plugin
+				 * added something to the
+				 * discover list. This also
+				 * means we must loop thru
+				 * these plugins another time.
+				 * RC == 0 means nothing was
+				 * added to the discover list
+				 * by this plugin.
+				 * RC < 0 means the plugin
+				 * encountered some error and
+				 * nothing was added to the list.
+				 * NOTE: If a plugin has both
+				 * added something new to the
+				 * discover list and encountered
+				 * an error, RC > 0 must be
+				 * returned.
+				 */
+				if (rc > 0)
+					done = FALSE;
+			}
+		}
+	} while (done == FALSE);
+
+	/* send the end of discovery signal to each volume
+	 * group plugin.
+	 */
+	for (p = registered_plugin_head; p; p = p->next)
+		if (GetPluginType(p->plugin->id) == EVMS_REGION_MANAGER)
+			if (p->plugin->fops->end_discover)
+				rc = END_DISCOVER(p, discover_list);
+}
+
+/*
+ *
+ * convert all the feature header fields into cpu native format
+ * from the on-disk Little Endian format. From this point forward
+ * all plugins can deal with feature headers natively.
+ */
+void
+le_feature_header_to_cpu(struct evms_feature_header *fh)
+{
+	fh->signature = le32_to_cpup(&fh->signature);
+	fh->crc = le32_to_cpup(&fh->crc);
+	fh->version.major = le32_to_cpup(&fh->version.major);
+	fh->version.minor = le32_to_cpup(&fh->version.minor);
+	fh->version.patchlevel = le32_to_cpup(&fh->version.patchlevel);
+	fh->engine_version.major = le32_to_cpup(&fh->engine_version.major);
+	fh->engine_version.minor = le32_to_cpup(&fh->engine_version.minor);
+	fh->engine_version.patchlevel =
+	    le32_to_cpup(&fh->engine_version.patchlevel);
+	fh->flags = le32_to_cpup(&fh->flags);
+	fh->feature_id = le32_to_cpup(&fh->feature_id);
+	fh->sequence_number = le64_to_cpup(&fh->sequence_number);
+	fh->alignment_padding = le64_to_cpup(&fh->alignment_padding);
+	fh->feature_data1_start_lsn =
+	    le64_to_cpup(&fh->feature_data1_start_lsn);
+	fh->feature_data1_size = le64_to_cpup(&fh->feature_data1_size);
+	fh->feature_data2_start_lsn =
+	    le64_to_cpup(&fh->feature_data2_start_lsn);
+	fh->feature_data2_size = le64_to_cpup(&fh->feature_data2_size);
+	fh->volume_serial_number = le64_to_cpup(&fh->volume_serial_number);
+	fh->volume_system_id = le32_to_cpup(&fh->volume_system_id);
+	fh->object_depth = le32_to_cpup(&fh->object_depth);
+}
+
+static int
+edef_load_feature_header(struct evms_logical_node *node)
+{
+	int i, rc = 0, rc_array[2] = { 0, 0 };
+	unsigned long size_in_bytes;
+	u64 size_in_sectors, starting_sector = 0;
+	struct evms_feature_header *fh = NULL, *fh1 = NULL, *fh2 = NULL;
+	char *location_name = NULL;
+	struct evms_version version = {
+		EVMS_FEATURE_HEADER_MAJOR,
+		EVMS_FEATURE_HEADER_MINOR,
+		EVMS_FEATURE_HEADER_PATCHLEVEL
+	};
+
+	if (!node->feature_header) {
+		size_in_sectors = evms_cs_size_in_vsectors(sizeof (*fh));
+		size_in_bytes = size_in_sectors << EVMS_VSECTOR_SIZE_SHIFT;
+		fh1 = kmalloc(size_in_bytes, GFP_KERNEL);
+		if (fh1) {
+			fh2 = kmalloc(size_in_bytes, GFP_KERNEL);
+			if (!fh2) {
+				kfree(fh1);
+				rc = -ENOMEM;
+			}
+		} else {
+			rc = -ENOMEM;
+		}
+
+		for (i = 0; i < 2; i++) {
+			if (i == 0) {
+				starting_sector =
+				    node->total_vsectors - size_in_sectors;
+				fh = fh1;
+				location_name = evms_primary_string;
+			} else {
+				starting_sector--;
+				fh = fh2;
+				location_name = evms_secondary_string;
+			}
+			/* read header into buffer */
+			rc = INIT_IO(node,
+				     0, starting_sector, size_in_sectors, fh);
+			if (rc) {
+				LOG_ERROR
+				    ("error(%d) probing for %s feature header(at "PFU64") on '%s'.\n",
+				     rc, location_name, starting_sector,
+				     node->name);
+				rc_array[i] = rc;
+				continue;
+			}
+			/* validate header signature */
+			if (cpu_to_le32(fh->signature) !=
+			    EVMS_FEATURE_HEADER_SIGNATURE) {
+				rc = -ENODATA;
+				rc_array[i] = rc;
+				continue;
+			}
+			/* validate header CRC */
+			if (fh->crc != EVMS_MAGIC_CRC) {
+				u32 org_crc, final_crc;
+				org_crc = cpu_to_le32(fh->crc);
+				fh->crc = 0;
+				final_crc =
+				    evms_cs_calculate_crc(EVMS_INITIAL_CRC, fh,
+							  sizeof (*fh));
+				if (final_crc != org_crc) {
+					LOG_ERROR
+					    ("CRC mismatch error [stored(%x), computed(%x)] in %s feature header(at "PFU64") on '%s'.\n",
+					     org_crc, final_crc, location_name,
+					     starting_sector, node->name);
+					rc = -EINVAL;
+					rc_array[i] = rc;
+					continue;
+				}
+			} else {
+				LOG_WARNING
+				    ("CRC disabled in %s feature header(at "PFU64") on '%s'.\n",
+				     location_name, starting_sector,
+				     node->name);
+			}
+			/* convert the feature header from the
+			 * on-disk format (Little Endian) to
+			 * native cpu format.
+			 */
+			le_feature_header_to_cpu(fh);
+			/* verify the system data version */
+			rc = evms_cs_check_version(&version, &fh->version);
+			if (rc) {
+				LOG_ERROR
+				    ("error: obsolete version(%d,%d,%d) in %s feature header on '%s'.\n",
+				     fh->version.major, fh->version.minor,
+				     fh->version.patchlevel, location_name,
+				     node->name);
+				rc_array[i] = rc;
+			}
+		}
+
+		/* getting same return code for both copies? */
+		if (rc_array[0] == rc_array[1]) {
+			rc = rc_array[0];
+			/* if no errors on both copies,
+			 * check the sequence numbers.
+			 * use the highest sequence number.
+			 */
+			if (!rc) {
+				/* compare sequence numbers */
+				if (fh1->sequence_number ==
+				    fh2->sequence_number) {
+					fh = fh1;
+				} else {
+					LOG_WARNING
+					    ("%s feature header sequence number("PFU64") mismatches %s feature header sequence number("PFU64") on '%s'!\n",
+					     evms_primary_string,
+					     fh1->sequence_number,
+					     evms_secondary_string,
+					     fh2->sequence_number, node->name);
+					if (fh1->sequence_number >
+					    fh2->sequence_number) {
+						fh = fh1;
+						location_name =
+						    evms_primary_string;
+						/* indicate bad sequence number of secondary */
+						rc_array[1] = -1;
+					} else {
+						fh = fh2;
+						location_name =
+						    evms_secondary_string;
+						/* indicate bad sequence number of primary */
+						rc_array[0] = -1;
+					}
+				}
+			}
+			/* getting different return codes for each copy */
+		} else
+			/* either primary or secondary copy is
+			 * valid, so use the valid copy.
+			 */
+		if ((rc_array[0] == 0) || (rc_array[1] == 0)) {
+			char *warn_name = NULL;
+
+			/* indicate success */
+			rc = 0;
+			/* set variables based on which copy is valid */
+			if (rc_array[0] == 0) {
+				/* use primary (rear) copy if its good */
+				fh = fh1;
+				location_name = evms_primary_string;
+				warn_name = evms_secondary_string;
+			} else {
+				/* use secondary (front) copy if its good */
+				fh = fh2;
+				location_name = evms_secondary_string;
+				warn_name = evms_primary_string;
+			}
+			/* warn the user about the invalid copy */
+			LOG_WARNING
+			    ("warning: error(%d) probing/verifying the %s feature header on '%s'.\n",
+			     rc_array[0] + rc_array[1], warn_name, node->name);
+		} else
+			/* both copies had a different error,
+			 * and one was a fatal error, so
+			 * indicate fatal error.
+			 */
+		if ((rc_array[0] == -EINVAL) || (rc_array[1] == -EINVAL)) {
+			rc = -EINVAL;
+		}
+
+		/* on error, set fh to NULL */
+		if (rc)
+			fh = NULL;
+
+		/* deallocate metadata buffers appropriately */
+		if (fh != fh1)
+			kfree(fh1);
+		if (fh != fh2)
+			kfree(fh2);
+
+		/* save validated feature header pointer */
+		if (!rc) {
+			node->feature_header = fh;
+			if (rc_array[0] != rc_array[1]) {
+				LOG_DETAILS
+				    ("using %s feature header on '%s'.\n",
+				     location_name, node->name);
+			}
+		}
+
+		/* if no signature found, adjust return code */
+		if (rc == -ENODATA) {
+			rc = 0;
+			LOG_DEBUG("no feature header found on '%s'.\n",
+				  node->name);
+		}
+	}
+	return (rc);
+}
+
+static int
+edef_find_first_features(struct evms_logical_node **discover_list)
+{
+	int rc;
+	struct evms_logical_node *node, *tmp_list_head;
+
+	tmp_list_head = *discover_list;
+	*discover_list = NULL;
+
+	while (tmp_list_head) {
+		struct evms_list_node **evms_node;
+
+		node = tmp_list_head;
+		rc = evms_cs_remove_logical_node_from_list(&tmp_list_head,
+							   node);
+		if (rc)
+			BUG();
+
+		/* check for duplicate pointers
+		 * search for the node in global list
+		 */
+		evms_node =
+		    evms_cs_lookup_item_in_list(&evms_global_feature_node_list,
+						node);
+		/* already present? */
+		if (*evms_node) {
+			/* yes, already present */
+			rc = -ENODATA;	/* dont process this node further */
+			LOG_DETAILS("deleting duplicate reference to '%s'.\n",
+				    node->name);
+			/* forget this node */
+			node = NULL;
+		} else {
+			/* load the feature header if present */
+			rc = edef_load_feature_header(node);
+			/* This node have a feature header ?
+			 * it won't be if there is no header to load
+			 * OR
+			 * there was a fatal error attempting to read it.
+			 */
+			if (node->feature_header) {
+				/* check for object flag */
+				if (node->feature_header->flags &
+				    EVMS_VOLUME_DATA_OBJECT) {
+					LOG_DEFAULT
+					    ("object detected, deleting '%s'.\n",
+					     node->name);
+					rc = -EINVAL;
+				} else
+					/* check for stop-data flag */
+				if (node->feature_header->flags &
+					    EVMS_VOLUME_DATA_STOP) {
+					LOG_DEFAULT
+					    ("stop data detected, deleting '%s'.\n",
+					     node->name);
+					rc = -EINVAL;
+				} else {
+					/* we have a valid feature header.
+					 * initialize appropriate node fields
+					 * to indicate this.
+					 */
+					node->flags |= EVMS_VOLUME_FLAG;
+					node->iflags |= EVMS_FEATURE_BOTTOM;
+					node->volume_info =
+					    kmalloc(sizeof
+						    (struct evms_volume_info),
+						    GFP_KERNEL);
+					if (node->volume_info) {
+						/* set up volume
+						 * info struct
+						 */
+						memset(node->volume_info, 0,
+						       sizeof
+						       (struct
+							evms_volume_info));
+						node->volume_info->volume_sn =
+						    node->feature_header->
+						    volume_serial_number;
+						node->volume_info->
+						    volume_minor =
+						    node->feature_header->
+						    volume_system_id;
+						strcpy(node->volume_info->
+						       volume_name,
+						       node->feature_header->
+						       volume_name);
+						/* register(add) node to
+						 * the global list.
+						 */
+						rc = evms_cs_add_item_to_list
+						    (&evms_global_feature_node_list,
+						     node);
+					} else {
+						rc = -ENOMEM;
+					}
+				}
+			}
+		}
+		/* if any errors, delete the node */
+		if (rc) {
+			if (node) {
+				DELETE(node);
+			}
+		} else
+			/* on successful processing of this node
+			 * place it back on the discover list.
+			 */
+			evms_cs_add_logical_node_to_list(discover_list, node);
+	}
+	return (0);
+}
+
+/* These define describe the node types that can be isolated. */
+#define ISOLATE_ASSOCIATIVE_FEATURES		0
+#define ISOLATE_COMPATIBILITY_VOLUMES		1
+#define ISOLATE_EVMS_VOLUMES			2
+#define ISOLATE_EVMS_VOLUME_SERIAL_NUMBER	3
+#define ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH	4
+static int
+edef_isolate_nodes_by_type(unsigned int type,
+			   struct evms_logical_node **src_list,
+			   struct evms_logical_node **trg_list,
+			   u32 compare32, u64 compare64)
+{
+	struct evms_logical_node *node, *next_node;
+	int rc = 0, found_node;
+	struct evms_feature_header *fh = NULL;
+
+	for (node = *src_list; node; node = next_node) {
+		next_node = node->next;
+
+		if (node->feature_header)
+			fh = node->feature_header;
+		found_node = FALSE;
+		switch (type) {
+		case ISOLATE_ASSOCIATIVE_FEATURES:
+			if (fh) {
+				if (GetPluginType(fh->feature_id) ==
+				    EVMS_ASSOCIATIVE_FEATURE)
+					found_node = TRUE;
+			}
+			break;
+		case ISOLATE_COMPATIBILITY_VOLUMES:
+			if (!(node->flags & EVMS_VOLUME_FLAG))
+				found_node = TRUE;
+			break;
+		case ISOLATE_EVMS_VOLUMES:
+			if (node->flags & EVMS_VOLUME_FLAG)
+				found_node = TRUE;
+			break;
+			/* EVMS volumes with same serial # */
+		case ISOLATE_EVMS_VOLUME_SERIAL_NUMBER:
+			if (node->volume_info->volume_sn == compare64)
+				found_node = TRUE;
+			break;
+		case ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH:
+			if (fh)
+				if (fh->object_depth == compare64)
+					if (fh->feature_id == compare32)
+						found_node = TRUE;
+			break;
+		}
+		if (found_node == TRUE) {
+			rc = evms_cs_remove_logical_node_from_list(src_list,
+								   node);
+			if (rc)
+				break;
+			rc = evms_cs_add_logical_node_to_list(trg_list, node);
+			if (rc)
+				break;
+		}
+	}
+	return (rc);
+}
+
+static int
+edef_apply_feature(struct evms_logical_node *node,
+		   struct evms_logical_node **volume_node_list)
+{
+	struct evms_registered_plugin *p;
+	int rc = -1;
+
+	for (p = registered_plugin_head; p; p = p->next) {
+		if (p->plugin->id == node->feature_header->feature_id) {
+			rc = DISCOVER(p, volume_node_list);
+			break;
+		}
+	}
+	return (rc);
+}
+
+static int
+edef_get_feature_plugin_header(u32 id, struct evms_plugin_header **header)
+{
+	int rc = -ENOPKG;
+	struct evms_registered_plugin *p;
+
+	for (p = registered_plugin_head; p; p = p->next) {
+		if (p->plugin->id == id) {
+			*header = p->plugin;
+			rc = 0;
+			break;
+		}
+	}
+	if (rc) {
+		LOG_SERIOUS("no plugin loaded for feature id(0x%x)\n", id);
+	}
+	return (rc);
+}
+
+typedef struct evms_volume_build_info_s {
+	int node_count;
+	int feature_header_count;
+	int feature_count;
+	int associative_feature_count;
+	u64 max_depth;
+	struct evms_plugin_header *plugin;
+	struct evms_logical_node *feature_node_list;
+} evms_volume_build_info_t;
+
+/*
+ * edef_evaluate_volume_node_list:
+ *   does:
+ *	1) put all nodes from feature list back on volume list
+ *      2) loads the node's feature headers
+ *      3) counts the node list's entries
+ *      4) builds the feature node list
+ *	5) counts the feature headers for associative features
+ *	6) sets feature count to >1 if >1 features to be processed
+ */
+static int
+edef_evaluate_volume_node_list(struct evms_logical_node **volume_node_list,
+			       evms_volume_build_info_t * vbi,
+			       int volume_complete)
+{
+	int rc;
+	struct evms_logical_node *node;
+
+	vbi->node_count =
+	    vbi->feature_count =
+	    vbi->associative_feature_count = vbi->max_depth = 0;
+	vbi->plugin = NULL;
+
+	/* put all feature nodes back on the volume list */
+	rc = edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES,
+					&vbi->feature_node_list,
+					volume_node_list, 0, 0);
+	if (rc)
+		return (rc);
+
+	/* load all the feature headers */
+	if (!volume_complete) {
+		for (node = *volume_node_list; node; node = node->next) {
+			rc = edef_load_feature_header(node);
+			if (rc)
+				return (rc);
+		}
+	}
+
+	/* find the 1st max depth object:
+	 *   record the depth
+	 *   record the plugin
+	 */
+	for (node = *volume_node_list; node; node = node->next) {
+		struct evms_plugin_header *plugin;
+		struct evms_feature_header *fh = node->feature_header;
+
+		/* count the nodes */
+		vbi->node_count++;
+
+		/* no feature header found, continue to next node */
+		if (!fh)
+			continue;
+
+		/* check the depth */
+		if (fh->object_depth > vbi->max_depth) {
+			/* record new max depth */
+			vbi->max_depth = fh->object_depth;
+			/* find the plugin header for this feature id */
+			rc = edef_get_feature_plugin_header(fh->feature_id,
+							    &plugin);
+			if (rc)
+				return (rc);
+			/* check for >1 plugins */
+			if (vbi->plugin != plugin) {
+				vbi->feature_count++;
+				vbi->plugin = plugin;
+			}
+		}
+		/* check for "associative" feature indicator */
+		if (GetPluginType(vbi->plugin->id) == EVMS_ASSOCIATIVE_FEATURE)
+			vbi->associative_feature_count++;
+	}
+	/* build a list of max depth nodes for this feature */
+	if (vbi->max_depth) {
+		rc = edef_isolate_nodes_by_type
+		    (ISOLATE_EVMS_NODES_BY_FEATURE_AND_DEPTH, volume_node_list,
+		     &vbi->feature_node_list, vbi->plugin->id, vbi->max_depth);
+		if (rc)
+			return (rc);
+		if (!vbi->plugin)
+			return (-ENODATA);
+		if (!vbi->feature_node_list)
+			return (-ENODATA);
+	}
+
+	return (rc);
+}
+
+/* function: edef_check_feature_conditions
+ *
+ * This routine verifies the state of volume based on the features
+ * headers and nodes in the current discovery list. All detected
+ * errors are considered fatal.
+ */
+static int
+edef_check_feature_conditions(evms_volume_build_info_t * vbi)
+{
+	int rc = 0;
+
+	if (vbi->associative_feature_count) {
+		if (vbi->node_count > 1) {
+			rc = -EVMS_VOLUME_FATAL_ERROR;
+			LOG_ERROR
+			    ("associative ERROR: > 1 nodes(%d) remaining to be processed!\n",
+			     vbi->node_count);
+		} else if (vbi->max_depth != 1) {
+			rc = -EVMS_VOLUME_FATAL_ERROR;
+			LOG_ERROR
+			    ("associative ERROR: associative feature found at node depth("PFU64") != 1!\n",
+			     vbi->max_depth);
+		} else
+			rc = -EVMS_ASSOCIATIVE_FEATURE;
+	}
+	if (!rc) {
+		if (!vbi->max_depth) {
+			if (vbi->node_count > 1) {
+				rc = -EVMS_VOLUME_FATAL_ERROR;
+				LOG_ERROR
+				    ("max depth ERROR: > 1 nodes(%d) remaining to be processed!\n",
+				     vbi->node_count);
+			}
+		} else if (vbi->max_depth == 1) {
+			if (vbi->feature_count > 1) {
+				rc = -EVMS_VOLUME_FATAL_ERROR;
+				LOG_ERROR
+				    ("max depth 1 ERROR: > 1 features remaining to be processed!\n");
+			}
+		}
+	}
+	return (rc);
+}
+
+/* function: edef_apply_features
+ *
+ * This routine applies none, one, or more features to an EVMS
+ * volume. The system data structure is first verified and then
+ * features are applied and verified recursively until the
+ * entire volume has been constructed. Fatal errors result in
+ * all nodes in the volume discovery list being deleted.
+ */
+static int
+edef_apply_features(struct evms_logical_node **volume_node_list)
+{
+	int rc = 1, done, top_feature_applying;
+	evms_volume_build_info_t vbi;
+
+	vbi.feature_node_list = NULL;
+	rc = edef_evaluate_volume_node_list(volume_node_list, &vbi, FALSE);
+
+	/* ensure we don't go into the next loop
+	 * without having a target plugin to
+	 * pass control to.
+	 */
+	if (!rc) {
+		if (!vbi.plugin) {
+			rc = -ENODATA;
+		}
+	}
+
+	/* this loop should ONLY get used when
+	 * there are features to process.
+	 */
+	done = (rc) ? TRUE : FALSE;
+	while (!done) {
+		rc = edef_check_feature_conditions(&vbi);
+		if (rc)
+			break;
+		top_feature_applying = (vbi.max_depth == 1) ? TRUE : FALSE;
+		rc = vbi.plugin->fops->discover(&vbi.feature_node_list);
+		if (!rc) {
+			rc = edef_evaluate_volume_node_list(volume_node_list,
+							    &vbi,
+							    top_feature_applying);
+			if (top_feature_applying == TRUE) {
+				if (vbi.node_count > 1) {
+					rc = -EVMS_VOLUME_FATAL_ERROR;
+					LOG_ERROR
+					    ("ERROR: detected > 1 node at volume completion!\n");
+				}
+				done = TRUE;
+			} else {
+				if (!vbi.plugin) {
+					rc = -EVMS_VOLUME_FATAL_ERROR;
+					LOG_ERROR
+					    ("ERROR: depth("PFU64"): expected another feature!\n",
+					     vbi.max_depth);
+					done = TRUE;
+				}
+			}
+		} else {	/* rc != 0 */
+			rc = -EVMS_VOLUME_FATAL_ERROR;
+			done = TRUE;
+		}
+	}
+	if (rc)
+		/* put all feature nodes back on the volume list */
+		if (edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES,
+					       &vbi.feature_node_list,
+					       volume_node_list, 0, 0))
+			BUG();
+	return (rc);
+}
+
+static int
+edef_delete_node(struct evms_logical_node **node_list,
+		 struct evms_logical_node *node, int return_code,
+		 char *log_text)
+{
+	int rc;
+
+	rc = evms_cs_remove_logical_node_from_list(node_list, node);
+	if (!rc) {
+		LOG_ERROR("%s error(%d): deleting volume(%s), node(%s)\n",
+			  log_text, return_code,
+			  node->volume_info->volume_name, node->name);
+		rc = DELETE(node);
+		if (rc) {
+			LOG_ERROR("error(%d) while deleting node(%s)\n",
+				  rc, node->name);
+		}
+	} else {
+		LOG_WARNING
+		    ("%s error(%d): node gone, assumed deleted by plugin.\n",
+		     log_text, return_code);
+		/* plugin must have cleaned up the node.
+		 * So just reset the return code and leave.
+		 */
+		rc = 0;
+	}
+
+	return (rc);
+}
+
+static int
+edef_process_evms_volumes(struct evms_logical_node **discover_list,
+			  struct evms_logical_node **associative_feature_list)
+{
+	int rc = 0;
+	struct evms_logical_node *node, *evms_volumes_list, *volume_node_list;
+	u64 volume_sn;
+
+	/* put all EVMS volumes on their own list */
+	evms_volumes_list = NULL;
+	rc = edef_isolate_nodes_by_type(ISOLATE_EVMS_VOLUMES,
+					discover_list,
+					&evms_volumes_list, 0, 0);
+
+	/* apply features to each EVMS volume */
+	/* one volume at a time on each pass  */
+	while (evms_volumes_list) {
+		node = evms_volumes_list;
+		/* put all nodes for one EVMS volume on separate list */
+		volume_node_list = NULL;
+		volume_sn = node->volume_info->volume_sn;
+		rc = edef_isolate_nodes_by_type
+		    (ISOLATE_EVMS_VOLUME_SERIAL_NUMBER, &evms_volumes_list,
+		     &volume_node_list, 0, volume_sn);
+		if (rc)
+			break;
+		/* go apply all the volume features now */
+		rc = edef_apply_features(&volume_node_list);
+		switch (rc) {
+		case 0:	/* SUCCESS */
+			/* remove volume just processed */
+			node = volume_node_list;
+			rc = evms_cs_remove_logical_node_from_list
+			    (&volume_node_list, node);
+			if (rc)
+				break;
+			/* put volume on global list */
+			rc = evms_cs_add_logical_node_to_list(discover_list,
+							      node);
+			break;
+		case -EVMS_ASSOCIATIVE_FEATURE:
+			/* put all "associative" features on their own list */
+			rc = edef_isolate_nodes_by_type
+			    (ISOLATE_ASSOCIATIVE_FEATURES, &volume_node_list,
+			     associative_feature_list, 0, 0);
+			break;
+		default:	/* FATAL ERROR */
+			/* delete each node remaining in the list */
+			if (volume_node_list) {
+				LOG_ERROR
+				    ("encountered fatal error building volume '%s'\n",
+				     volume_node_list->volume_info->
+				     volume_name);
+			}
+			while (volume_node_list) {
+				node = volume_node_list;
+				edef_delete_node(&volume_node_list,
+						 node, rc, "EVMS feature");
+			}
+			rc = 0;
+			break;
+		}
+		if (rc)
+			break;
+	}
+	return (rc);
+}
+
+static int
+edef_process_associative_volumes(struct evms_logical_node
+				 **associative_feature_list,
+				 struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+	struct evms_logical_node *node;
+
+	while (*associative_feature_list) {
+		node = *associative_feature_list;
+		/* remove this node from associative feature list */
+		rc = evms_cs_remove_logical_node_from_list
+		    (associative_feature_list, node);
+		if (rc)
+			break;
+		/* put volume on global list */
+		rc = evms_cs_add_logical_node_to_list(discover_list, node);
+		if (rc)
+			break;
+		rc = edef_load_feature_header(node);
+		if (rc)
+			break;
+		rc = edef_apply_feature(node, discover_list);
+		if (rc)
+			edef_delete_node(discover_list, node, rc,
+					 "Associative feature");
+	}
+	return (rc);
+}
+
+static int
+edef_check_for_incomplete_volumes(struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+	struct evms_logical_node *next_node, *node;
+
+	/* check to see if any incomplete volumes are left around */
+	/* if so, delete them.                                    */
+	/* complete volumes should not have feature_headers       */
+	/* hanging off them, if we find any, we know the volume   */
+	/* is incomplete.                                         */
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+
+		if (node->feature_header) {
+			edef_delete_node(discover_list, node, rc,
+					 "Unexpected feature header");
+		}
+	}
+	return (rc);
+}
+
+/*
+ * Function:     evms_discover_evms_features
+ * Description: Find features for nodes on the logical partitions list
+ */
+static int
+evms_discover_evms_features(struct evms_logical_node **discover_list)
+{
+	struct evms_logical_node *associative_feature_list;
+	int rc = 0;
+
+	LOG_EXTRA("discovering evms volume features...\n");
+
+	/* initialize "associative" features list */
+	associative_feature_list = NULL;
+
+	/* find the bottom features */
+	rc = edef_find_first_features(discover_list);
+#ifdef LOCAL_DEBUG
+	display_discover_list(*discover_list, "after 1st features hdr");
+#endif
+	if (!rc)
+		/* process EVMS volumes here */
+		rc = edef_process_evms_volumes(discover_list,
+					       &associative_feature_list);
+#ifdef LOCAL_DEBUG
+	display_discover_list(*discover_list, "after evms volumes");
+#endif
+	if (!rc)
+		/* process "associative" features here */
+		rc = edef_process_associative_volumes(&associative_feature_list,
+						      discover_list);
+#ifdef LOCAL_DEBUG
+	display_discover_list(*discover_list, "after associatives");
+#endif
+	if (!rc)
+		/* check for incomplete volumes */
+		rc = edef_check_for_incomplete_volumes(discover_list);
+
+	return (rc);
+}
+
+/*
+ * function: eelv_assign_volume_minor
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine assigns a specific minor number to a volume. It
+ * also performs the remaining steps to make this volume visible
+ * and usable to the kernel.
+ *
+ */
+static void
+eelv_assign_volume_minor(struct evms_logical_node *node, int minor)
+{
+	struct evms_logical_volume *volume;
+
+	/* initialize the logical_node entry in the volume array */
+	volume = &evms_logical_volumes[minor];
+	volume->node = node;
+	volume->name =
+	    kmalloc(strlen(EVMS_GET_NODE_NAME(node)) + 1, GFP_KERNEL);
+	if (!volume->name)
+		BUG();
+	strcpy(volume->name, EVMS_GET_NODE_NAME(node));
+
+	/* copy flags from top level node into volume structure */
+	volume->flags = node->flags;
+
+	/* check for read-only volume */
+	if (volume->flags & EVMS_VOLUME_READ_ONLY) {
+		set_device_ro(MKDEV(EVMS_MAJOR, minor), 1);
+	}
+
+	/* adjust volume size based on hardsector size */
+	node->total_vsectors &=
+	    ~((node->hardsector_size >> EVMS_VSECTOR_SIZE_SHIFT) - 1);
+
+	/* initialize the global device arrays */
+	blksize_size[EVMS_MAJOR][minor] = node->block_size;
+	hardsect_size[EVMS_MAJOR][minor] = node->hardsector_size;
+	blk_size[EVMS_MAJOR][minor] = (int) (node->total_vsectors >> 1);
+
+	/* register this volume with devfs */
+	volume->devfs_handle =
+	    devfs_register(evms_dir_devfs_handle,
+			   volume->name,
+			   DEVFS_FL_DEFAULT,
+			   EVMS_MAJOR, minor,
+			   S_IFBLK | S_IRUGO | S_IWUGO, &evms_fops, NULL);
+
+	evms_volumes++;
+
+	LOG_DEFAULT("Exporting EVMS Volume(%u,%u) from \"%s%s\".\n",
+		    EVMS_MAJOR, minor, EVMS_DEV_NODE_PATH, volume->name);
+}
+
+/*
+ * function: eelv_check_for_duplicity
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine compares the serial number in the top most node
+ * in the volume to the list of currently exported volumes. If
+ * this volumes serial number is found in the list then we know
+ * this volume is a duplicate and it is then delete.
+ *
+ */
+static void
+eelv_check_for_duplicity(struct evms_logical_node **discover_list)
+{
+	struct evms_logical_node *next_node, *node;
+	struct evms_logical_volume *lv;
+	int i, is_dup;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+
+		is_dup = FALSE;
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			lv = &evms_logical_volumes[i];
+			/* only check exported volumes */
+			if (lv->node) {
+				char *type_ptr = NULL;
+
+				/* check for duplicate pointer */
+				if (node == lv->node) {
+					is_dup = TRUE;
+					type_ptr = "pointer";
+					/* check for duplicate node */
+				} else if (!strcmp(node->name, lv->node->name)) {
+					is_dup = TRUE;
+					type_ptr = "node";
+				}
+				if (is_dup == TRUE) {
+					evms_cs_remove_logical_node_from_list
+					    (discover_list, node);
+					LOG_DETAILS
+					    ("deleting duplicate %s to EVMS volume(%u,%u,%s)...\n",
+					     type_ptr, EVMS_MAJOR, i,
+					     EVMS_GET_NODE_NAME(node));
+					/* forget duplicate */
+					break;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * function: eelv_reassign_soft_deleted_volume_minors
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine reassigns minor numbers to rediscovered "soft"
+ * deleted volumes.
+ *
+ */
+static void
+eelv_reassign_soft_deleted_volume_minors(struct evms_logical_node
+					 **discover_list)
+{
+	struct evms_logical_node *next_node, *node;
+	struct evms_logical_volume *lv;
+	int i, node_removed;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+
+		node_removed = FALSE;
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			lv = &evms_logical_volumes[i];
+			/* only check soft deleted volumes:
+			 *  they have a non-NULL name.
+			 */
+			if (lv->flags & EVMS_VOLUME_SOFT_DELETED) {
+				if (!strcmp(EVMS_GET_NODE_NAME(node), lv->name)) {
+					/* reassign requested minor */
+					evms_cs_remove_logical_node_from_list
+					    (discover_list, node);
+					node_removed = TRUE;
+					LOG_DEFAULT("Re");
+					/* free the previously used name */
+					kfree(lv->name);
+					lv->name = NULL;
+					/* clear the EVMS_VOLUME_SOFT_DELETED flag */
+					lv->flags = 0;
+					eelv_assign_volume_minor(node, i);
+					break;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * function: eelv_assign_evms_volume_minors
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine assigns minor numbers to new evms volumes. If
+ * the specified minor is already in use, the requested minor
+ * is set to 0, and will be assigned next available along with
+ * any remaining volumes at the end of evms_export_logical_volumes.
+ *
+ */
+static void
+eelv_assign_evms_volume_minors(struct evms_logical_node **discover_list)
+{
+	struct evms_logical_node *next_node, *node, *lv_node;
+	unsigned int requested_minor, node_removed;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+
+		node_removed = FALSE;
+		/* only process evms volumes */
+		if (node->flags & EVMS_VOLUME_FLAG) {
+			requested_minor = node->volume_info->volume_minor;
+			/* is there a requested minor? */
+			if (requested_minor) {
+				int lv_flags = 0;
+
+				/* check range of requested minor */
+				if (requested_minor >= MAX_EVMS_VOLUMES)
+					lv_node = node;
+				else {
+					struct evms_logical_volume *lv;
+					lv = &evms_logical_volumes
+					    [requested_minor];
+					lv_node = lv->node;
+					lv_flags = lv->flags;
+				}
+				if ((!lv_node)
+				    && (!(lv_flags & EVMS_VOLUME_SOFT_DELETED))) {
+					/* assign requested minor */
+					evms_cs_remove_logical_node_from_list
+					    (discover_list, node);
+					node_removed = TRUE;
+					eelv_assign_volume_minor(node,
+								 requested_minor);
+				} else {
+					LOG_WARNING
+					    ("EVMS volume(%s) requesting invalid/in-use minor(%d), assigning next available!\n",
+					     node->volume_info->volume_name,
+					     requested_minor);
+					/*
+					 * requested minor is already
+					 * in use, defer assignment
+					 * until later.
+					 */
+					node->volume_info->volume_minor = 0;
+				}
+			}
+		}
+	}
+}
+
+/*
+ * function: eelv_assign_remaining_evms_volume_minors
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine assigns minor numbers to new evms volumes that
+ * have no/conflicting minor assignments. This function will
+ * search from high(255) minor values down, for the first available
+ * minor. Searching high to low minimizes the possibility of
+ * conflicting evms volumes causing "compatibility" minor
+ * assignments to shift from expected assignments.
+ *
+ */
+static void
+eelv_assign_remaining_evms_volume_minors(struct evms_logical_node
+					 **discover_list)
+{
+	struct evms_logical_node *next_node, *node;
+	int requested_minor, node_removed;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+
+		node_removed = FALSE;
+		/* only process evms volumes */
+		/* all remaining evms volumes should now
+		 * have a minor value of 0, meaning they
+		 * had no minor assignment, or their minor
+		 * assignment conflicted with an existing
+		 * minor assignment.
+		 */
+		if (node->flags & EVMS_VOLUME_FLAG) {
+			evms_cs_remove_logical_node_from_list(discover_list,
+							      node);
+			node_removed = TRUE;
+			/* find next available minor number */
+			for (requested_minor = 255;
+			     (evms_logical_volumes[requested_minor].node ||
+			      evms_logical_volumes[requested_minor].name) &&
+			     requested_minor; requested_minor--) ;
+			/* check range of assigned minor */
+			if (!requested_minor) {
+				LOG_CRITICAL
+				    ("no more minor numbers available for evms volumes!!!!\n");
+				DELETE(node);
+			} else
+				/* assign requested minor */
+				eelv_assign_volume_minor(node, requested_minor);
+		}
+	}
+}
+
+/*
+ * function: eelv_assign_remaining_volume_minors
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine assigns minor numbers to all remaining unassigned
+ * volumes. Minor numbers are assigned on an availability
+ * basis. The first free minor number is used in the assignment.
+ *
+ */
+static void
+eelv_assign_remaining_volume_minors(struct evms_logical_node **discover_list)
+{
+	struct evms_logical_node *node;
+	int minor;
+
+	while (*discover_list) {
+		node = *discover_list;
+		evms_cs_remove_logical_node_from_list(discover_list, node);
+
+		/* find next available minor number */
+		for (minor = 1;
+		     (evms_logical_volumes[minor].node ||
+		      evms_logical_volumes[minor].name) &&
+		     minor < MAX_EVMS_VOLUMES; minor++) ;
+
+		if (minor >= MAX_EVMS_VOLUMES) {
+			LOG_CRITICAL
+			    ("no more minor numbers available for compatibility volumes!!!!\n");
+			DELETE(node);
+		} else
+			/* assign minor */
+			eelv_assign_volume_minor(node, minor);
+	}
+}
+
+/*
+ * function: eelv_check_for_unreassign_soft_deleted_volume
+ *
+ * This is a support function for evms_export_logical_volumes.
+ * This routine reports any "soft deleted" volumes that were not
+ * found after a rediscovery.
+ */
+static void
+eelv_check_for_unreassign_soft_deleted_volume(void)
+{
+	struct evms_logical_volume *lv;
+	int i;
+
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		lv = &evms_logical_volumes[i];
+		/* only check soft deleted volumes:
+		 *  they have a NULL node ptr &
+		 *  they have a non-NULL name.
+		 */
+		if (lv->flags & EVMS_VOLUME_SOFT_DELETED) {
+			if (is_open(i))
+				lv->flags |= EVMS_VOLUME_CORRUPT;
+			LOG_ERROR
+			    ("error: rediscovery failed to find %smounted 'soft deleted' volume(%u,%u,%s)...\n",
+			     ((lv->flags & EVMS_VOLUME_CORRUPT) ? "" : "un"),
+			     EVMS_MAJOR, i, lv->name);
+			if (lv->flags & EVMS_VOLUME_CORRUPT) {
+				LOG_ERROR
+				    ("         flagging volume(%u,%u,%s) as CORRUPT!\n",
+				     EVMS_MAJOR, i, lv->name);
+			} else {
+				LOG_ERROR
+				    ("         releasing minor(%d) used by volume(%s)!\n",
+				     i, lv->name);
+				/* clear logical volume structure
+				 * for this volume so it may be
+				 * reused.
+				 */
+				kfree(lv->name);
+				lv->name = NULL;
+				lv->flags = 0;
+			}
+		}
+	}
+}
+
+static void
+eelv_unquiesce_volumes(void)
+{
+	int i;
+
+	/* check each volume array entry */
+	for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+		struct evms_logical_volume *volume;
+
+		volume = &evms_logical_volumes[i];
+		/* is this volume "quiesced" ? */
+		if (volume->quiesced) {
+			int rc = 1;
+			if (volume->node) {
+				/* "unquiesce" it */
+				struct inode inode;
+				struct evms_quiesce_vol_pkt qv;
+
+				qv.command = qv.status = 0;
+				qv.do_vfs = 0;
+				qv.minor = i;
+				rc = evms_quiesce_volume(volume, &inode, NULL,
+							 &qv);
+			}
+			/* Wake up any waiters */
+			if (rc) {
+				/* clear the flag */
+				volume->quiesced = 0;
+				/* wake up the waiters */
+				if (waitqueue_active(&volume->wait_queue))
+					wake_up(&volume->wait_queue);
+#ifdef VFS_PATCH_PRESENT
+				/* unquiesce VFS if quiesced */
+				if (volume->vfs_quiesced) {
+					/* VFS function call to unlock the filesystem */
+					unlockfs(MKDEV(EVMS_MAJOR, i));
+					volume->vfs_quiesced = FALSE;
+				}
+#endif
+			}
+		}
+	}
+}
+
+/*
+ * Function:     evms_export_logical_volumes
+ *
+ * This function is called from evms_discover_volumes. It
+ * check for duplicate volumes, assigns minor values to evms
+ * volumes, and assigns minor values to the remaining volumes.
+ * In addition to assigning minor values to each volume this
+ * function also completes the final steps necessary to allow
+ * the volumes to be using by the operating system.
+ */
+static void
+evms_export_logical_volumes(struct evms_logical_node **discover_list)
+{
+	LOG_EXTRA("exporting EVMS logical volumes...\n");
+
+	eelv_check_for_duplicity(discover_list);
+
+	eelv_reassign_soft_deleted_volume_minors(discover_list);
+
+	eelv_assign_evms_volume_minors(discover_list);
+
+	eelv_assign_remaining_evms_volume_minors(discover_list);
+
+	eelv_assign_remaining_volume_minors(discover_list);
+
+	eelv_check_for_unreassign_soft_deleted_volume();
+
+	/* "unquiesce" any "quiesced" volumes */
+	eelv_unquiesce_volumes();
+}
+
+static int
+edv_populate_discover_list(struct evms_list_node *src_list,
+			   struct evms_logical_node **trg_list,
+			   struct evms_rediscover_pkt *discover_parms)
+{
+	int rc = 0, i, move_node, use_all_disks = FALSE;
+	struct evms_list_node *src_node;
+	struct evms_logical_node *disk_node = NULL;
+
+	/* if no discover parameters are specified */
+	/* copy ALL the disk nodes into the        */
+	/* discovery list.                         */
+	if ((discover_parms == NULL) ||
+	    (discover_parms->drive_count == REDISCOVER_ALL_DEVICES))
+		use_all_disks = TRUE;
+
+	/* copy the disk nodes specified in the */
+	/* discover_parms over to a discover list */
+	src_node = src_list;
+	while (src_node) {
+		move_node = use_all_disks;
+		if (move_node == FALSE)
+			/* check the rediscovery array */
+			for (i = 0; i < discover_parms->drive_count; i++) {
+				disk_node =
+				    DEV_HANDLE_TO_NODE(discover_parms->
+						       drive_array[i]);
+				if (disk_node == src_node->item) {
+					move_node = TRUE;
+					break;
+				}
+			}
+		/* check to see if we want this node */
+		if (move_node == TRUE)
+			evms_cs_add_logical_node_to_list(trg_list,
+							 (struct
+							  evms_logical_node *)
+							 src_node->item);
+		/* advance to next struct evms_list_node */
+		src_node = src_node->next;
+	}
+	return (rc);
+}
+
+static int
+evms_discover_volumes(struct evms_rediscover_pkt *discover_parms)
+{
+	int rc = 0;
+	struct evms_logical_node *discover_list = NULL;
+
+	evms_discover_logical_disks(&discover_list);
+	if (evms_global_device_list) {
+		/* move the appropriate disk nodes, based on */
+		/* on the discover parameters, onto the      */
+		/* discover list for the partition managers  */
+		/* to process                                */
+		edv_populate_discover_list(evms_global_device_list,
+					   &discover_list, discover_parms);
+	}
+	if (discover_list) {
+#ifdef LOCAL_DEBUG
+		display_discover_list(discover_list, "after dev mgrs");
+#endif
+		evms_discover_logical_partitions(&discover_list);
+	}
+	if (discover_list) {
+#ifdef LOCAL_DEBUG
+		display_discover_list(discover_list, "after seg mgrs");
+#endif
+		evms_discover_volume_groups(&discover_list);
+	}
+	if (discover_list) {
+#ifdef LOCAL_DEBUG
+		display_discover_list(discover_list, "after reg mgrs");
+#endif
+		evms_discover_evms_features(&discover_list);
+	}
+	if (discover_list) {
+#ifdef LOCAL_DEBUG
+		display_discover_list(discover_list, "after features");
+#endif
+		evms_export_logical_volumes(&discover_list);
+		evms_cs_signal_event(EVMS_EVENT_END_OF_DISCOVERY);
+	}
+	return (rc);
+}
+
+/* function: evms_notify_reboot
+ *
+ * this function gets called at shutdown time and is used
+ * to remove any evms controlled volumes from memory, thus
+ * allowing any plugins needing to flush internal caches
+ * to do so.
+ */
+int
+evms_notify_reboot(struct notifier_block *this, unsigned long code, void *x)
+{
+	int i;
+	struct evms_logical_volume *volume;
+
+	switch (code) {
+	case SYS_DOWN:
+	case SYS_HALT:
+	case SYS_POWER_OFF:
+		LOG_DEFAULT("stopping all evms controlled volumes.\n");
+
+		/* quiesce all volumes */
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			struct evms_quiesce_vol_pkt qv;
+			struct inode inode;
+
+			volume = &evms_logical_volumes[i];
+			if (!volume->node)
+				continue;
+			qv.command = 1;	// quiesce
+			qv.minor = i;	//
+			qv.status = 0;	// reset status
+			qv.do_vfs = 0;
+			evms_quiesce_volume(volume, &inode, NULL, &qv);
+		}
+		/* delete all volumes
+		 *
+		 * to ensure this work under the
+		 * most circumstances, a "soft"
+		 * delete will be done. this will
+		 * handle the strange case of a
+		 * volume still being mounted.
+		 */
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			struct evms_delete_vol_pkt dv;
+
+			volume = &evms_logical_volumes[i];
+			if (!volume->node)
+				continue;
+			/* only delete quiesced volumes */
+			if (!volume->quiesced)
+				continue;
+			/* delete the volume from memory.
+			 * do a 'soft' delete if volume
+			 * is mounted, and 'hard' delete
+			 * if it is not.
+			 */
+			dv.command = is_open(i);
+			dv.minor = i;
+			dv.status = 0;
+			evms_delete_volume(volume, &dv);
+		}
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block evms_notifier = {
+	.notifier_call	= evms_notify_reboot,
+	.next		= NULL,
+	.priority	= INT_MAX,	/* before any real devices */
+};
+
+/*
+ * Function: find_root_fs_dev
+ * If "root=/dev/evms/???" was specified on the kernel command line, and devfs
+ * is not enabled, we need to determine the appropriate minor number for the
+ * specified volume for the root fs.
+ */
+static void
+find_root_fs_dev(void)
+{
+#ifndef MODULE
+	char root_name[64] = { 0 };
+	char *name;
+	int i;
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,18)
+	strncpy(root_name, root_device_name, 63);
+#else
+	get_root_device_name(root_name);
+#endif
+
+	if (!strncmp(root_name, EVMS_DIR_NAME "/", strlen(EVMS_DIR_NAME) + 1)) {
+		name = &root_name[strlen(EVMS_DIR_NAME) + 1];
+
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			if (evms_logical_volumes[i].name &&
+			    !strncmp(name, evms_logical_volumes[i].name,
+				     strlen(evms_logical_volumes[i].name))) {
+				ROOT_DEV = MKDEV(EVMS_MAJOR, i);
+				return;
+			}
+		}
+	}
+#endif
+}
+
+/*
+ * Function: bh_cache_ctor
+ * this function initializes the b_wait field in the buffer heads
+ * in our private buffer head pool.
+ */
+static void
+io_notify_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		io_notify_t *io_notify = (io_notify_t *) foo;
+		memset(io_notify, 0, sizeof (*io_notify));
+	}
+}
+
+/*
+ * Function: bh_cache_ctor
+ * this function initializes the b_wait field in the buffer heads
+ * in our private buffer head pool.
+ */
+static void
+bh_cache_ctor(void *foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		struct buffer_head *bh = (struct buffer_head *) foo;
+		memset(bh, 0, sizeof (*bh));
+		init_waitqueue_head(&bh->b_wait);
+	}
+}
+
+/*
+ * Function:  evms_init_module
+ * This function runs once at system initialization.
+ */
+static int __init
+evms_init_module(void)
+{
+	int rc = 0, i;
+	int *evms_blocksizes;
+
+	LOG_DEFAULT("EVMS v%d.%d.%d initializing .... info level(%d).\n",
+		    EVMS_MAJOR_VERSION,
+		    EVMS_MINOR_VERSION,
+		    EVMS_PATCHLEVEL_VERSION, evms_info_level);
+
+	/* initialize memory management counters */
+	evms_allocs = (atomic_t) ATOMIC_INIT(0);
+	evms_logical_nodes = (atomic_t) ATOMIC_INIT(0);
+
+	/* initialize the io_notify_entry pool */
+	if (!rc)
+		evms_io_notify_pool = evms_cs_create_pool(sizeof (io_notify_t),
+							  "EVMS IO Notify",
+							  io_notify_cache_ctor,
+							  NULL);
+
+	/* initialize the "public" buffer_head pool */
+	if (!rc)
+		evms_bh_pool = evms_cs_create_pool(sizeof (struct buffer_head),
+						   "EVMS BH",
+						   bh_cache_ctor, NULL);
+
+	/* allocate the logical volume array */
+	if (!rc)
+		evms_logical_volumes =
+		    kmalloc(sizeof (struct evms_logical_volume) *
+			    MAX_EVMS_VOLUMES, GFP_KERNEL);
+	if (!evms_logical_volumes) {
+		rc = -ENOMEM;
+	}
+
+	/* initialize the logical volume array entries */
+	if (!rc) {
+		memset(evms_logical_volumes, 0,
+		       sizeof (struct evms_logical_volume) * MAX_EVMS_VOLUMES);
+		for (i = 1; i < MAX_EVMS_VOLUMES; i++) {
+			struct evms_logical_volume *volume;
+
+			volume = &evms_logical_volumes[i];
+			init_waitqueue_head(&volume->wait_queue);
+			volume->requests_in_progress =
+			    (atomic_t) ATOMIC_INIT(0);
+#ifdef CONFIG_SMP
+			blk_init_queue(&volume->request_queue,
+				       evms_do_request_fn);
+			blk_queue_make_request(&volume->request_queue,
+					       evms_make_request_fn);
+#endif
+		}
+	}
+
+	/* allocate EVMS' blk_size array */
+	if (!rc) {
+		evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES *
+					  sizeof (int), GFP_KERNEL);
+		if (!evms_blocksizes) {
+			rc = -ENOMEM;
+			LOG_CRITICAL
+			    ("can't allocate memory for EVMS blk_size\n");
+		} else {
+			memset(evms_blocksizes, 0,
+			       MAX_EVMS_VOLUMES * sizeof (int));
+			blk_size[EVMS_MAJOR] = evms_blocksizes;
+		}
+	}
+
+	/* allocate EVMS' blksize_size array */
+	if (!rc) {
+		evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES *
+					  sizeof (int), GFP_KERNEL);
+		if (!evms_blocksizes) {
+			rc = -ENOMEM;
+			LOG_CRITICAL
+			    ("can't allocate memory for EVMS blksize_size\n");
+		} else {
+			memset(evms_blocksizes, 0,
+			       MAX_EVMS_VOLUMES * sizeof (int));
+			blksize_size[EVMS_MAJOR] = evms_blocksizes;
+		}
+	}
+
+	/* allocate EVMS' hardsect_size array */
+	if (!rc) {
+		evms_blocksizes = kmalloc(MAX_EVMS_VOLUMES *
+					  sizeof (int), GFP_KERNEL);
+		if (!evms_blocksizes) {
+			rc = -ENOMEM;
+			LOG_CRITICAL
+			    ("can't allocate memory for EVMS hardsect_size\n");
+		} else {
+			memset(evms_blocksizes, 0,
+			       MAX_EVMS_VOLUMES * sizeof (int));
+			hardsect_size[EVMS_MAJOR] = evms_blocksizes;
+		}
+	}
+
+	/* Register the block device */
+	if (!rc) {
+		rc = devfs_register_blkdev(EVMS_MAJOR, EVMS_DIR_NAME,
+					   &evms_fops);
+		if (rc) {
+			LOG_CRITICAL
+			    ("error calling devfs_register_blkdev()  err=%u\n",
+			     rc);
+			rc = -EINVAL;
+		}
+	}
+
+	/* Register with devfs */
+	if (!rc) {
+		evms_dir_devfs_handle = devfs_mk_dir(NULL, EVMS_DIR_NAME, NULL);
+		// A NULL return cannot be fatal.
+		// Devfs just might not be running
+		if (!evms_dir_devfs_handle) {
+			LOG_EXTRA
+			    ("NULL return from devfs_mk_dir() for \"%s\"\n",
+			     EVMS_DIR_NAME);
+			LOG_EXTRA("Is devfs enabled?\n");
+		} else {
+			evms_blk_devfs_handle =
+			    devfs_register(evms_dir_devfs_handle, EVMS_DEV_NAME,
+					   DEVFS_FL_DEFAULT, EVMS_MAJOR, 0,
+					   S_IFBLK | S_IRUGO | S_IWUGO,
+					   &evms_fops, NULL);
+			if (!evms_blk_devfs_handle) {
+				LOG_DETAILS
+				    ("NULL return from devfs_register() for \"%s\"\n",
+				     EVMS_DEV_NAME);
+			}
+		}
+	}
+
+	if (!rc) {
+		read_ahead[EVMS_MAJOR] = 4096;
+#ifdef CONFIG_SMP
+		blk_dev[EVMS_MAJOR].queue = evms_find_queue;
+#else
+		blk_init_queue(BLK_DEFAULT_QUEUE(EVMS_MAJOR),
+			       evms_do_request_fn);
+		blk_queue_make_request(BLK_DEFAULT_QUEUE(EVMS_MAJOR),
+				       evms_make_request_fn);
+#endif
+#ifdef CONFIG_PROC_FS
+		evms_cs_get_evms_proc_dir();
+		if (evms_proc_dir) {
+			create_proc_read_entry("info", 0, evms_proc_dir,
+					       evms_info_read_proc, NULL);
+			create_proc_read_entry("plugins", 0, evms_proc_dir,
+					       evms_plugins_read_proc, NULL);
+			create_proc_read_entry("volumes", 0, evms_proc_dir,
+					       evms_volumes_read_proc, NULL);
+		}
+		evms_table_header = register_sysctl_table(dev_dir_table, 1);
+#endif
+		/* Register for reboot notification */
+		register_reboot_notifier(&evms_notifier);
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64)
+		/* Register evms 32bit ioctl handlers */
+		lock_kernel();
+		register_ioctl32_conversion(EVMS_GET_INFO_LEVEL,NULL);
+		register_ioctl32_conversion(EVMS_SET_INFO_LEVEL,NULL);
+		register_ioctl32_conversion(EVMS_REDISCOVER_VOLUMES_32,
+					    evms_rediscover);
+		register_ioctl32_conversion(EVMS_DELETE_VOLUME,NULL);
+		register_ioctl32_conversion(EVMS_PLUGIN_IOCTL_32,
+					    evms_plugin_ioctl);
+		register_ioctl32_conversion(EVMS_PROCESS_NOTIFY_EVENT,NULL);
+		register_ioctl32_conversion(EVMS_GET_LOGICAL_DISK,NULL);
+		register_ioctl32_conversion(EVMS_GET_LOGICAL_DISK_INFO,NULL);
+		register_ioctl32_conversion(EVMS_SECTOR_IO_32, evms_sector_io);
+		register_ioctl32_conversion(EVMS_GET_MINOR,NULL);
+		register_ioctl32_conversion(EVMS_GET_VOLUME_DATA,NULL);
+		register_ioctl32_conversion(EVMS_GET_PLUGIN,NULL);
+		register_ioctl32_conversion(EVMS_COMPUTE_CSUM_32,
+					    evms_compute_csum);
+		register_ioctl32_conversion(EVMS_GET_BMAP,NULL);
+		register_ioctl32_conversion(EVMS_GET_IOCTL_VERSION,NULL);
+		register_ioctl32_conversion(EVMS_GET_VERSION,NULL);
+		register_ioctl32_conversion(EVMS_UPDATE_DEVICE_INFO,NULL);
+		register_ioctl32_conversion(EVMS_CHECK_MOUNT_STATUS,NULL);
+		register_ioctl32_conversion(EVMS_GET_VOL_STRIPE_INFO,NULL);
+		unlock_kernel();
+#endif
+
+	}
+
+	return rc;
+}
+
+/*
+ * Function:  evms_exit_module
+ * This function runs once when the EVMS core module is unloaded.
+ */
+static void __exit
+evms_exit_module(void)
+{
+	LOG_DEFAULT("EVMS v%d.%d.%d unloading ....\n",
+		    EVMS_MAJOR_VERSION,
+		    EVMS_MINOR_VERSION, EVMS_PATCHLEVEL_VERSION);
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_SPARC64)
+	/* Un-Register evms 32bit ioctl handlers */
+	lock_kernel();
+	unregister_ioctl32_conversion(EVMS_GET_INFO_LEVEL);
+	unregister_ioctl32_conversion(EVMS_SET_INFO_LEVEL);
+	unregister_ioctl32_conversion(EVMS_REDISCOVER_VOLUMES_32);
+	unregister_ioctl32_conversion(EVMS_DELETE_VOLUME);
+	unregister_ioctl32_conversion(EVMS_PLUGIN_IOCTL_32);
+	unregister_ioctl32_conversion(EVMS_PROCESS_NOTIFY_EVENT);
+	unregister_ioctl32_conversion(EVMS_GET_LOGICAL_DISK);
+	unregister_ioctl32_conversion(EVMS_GET_LOGICAL_DISK_INFO);
+	unregister_ioctl32_conversion(EVMS_SECTOR_IO_32);
+	unregister_ioctl32_conversion(EVMS_GET_MINOR);
+	unregister_ioctl32_conversion(EVMS_GET_VOLUME_DATA);
+	unregister_ioctl32_conversion(EVMS_GET_PLUGIN);
+	unregister_ioctl32_conversion(EVMS_COMPUTE_CSUM_32);
+	unregister_ioctl32_conversion(EVMS_GET_BMAP);
+	unregister_ioctl32_conversion(EVMS_GET_IOCTL_VERSION);
+	unregister_ioctl32_conversion(EVMS_GET_VERSION);
+	unregister_ioctl32_conversion(EVMS_UPDATE_DEVICE_INFO);
+	unregister_ioctl32_conversion(EVMS_CHECK_MOUNT_STATUS);
+	unregister_ioctl32_conversion(EVMS_GET_VOL_STRIPE_INFO);
+	unlock_kernel();
+#endif
+
+	/* unregister with devfs
+	 */
+	devfs_unregister(evms_dir_devfs_handle);
+	/* clean up the queue for the block device
+	 */
+	blk_cleanup_queue(blk_get_queue(MKDEV(EVMS_MAJOR, 0)));
+	/* unregister block device
+	 */
+	devfs_unregister_blkdev(EVMS_MAJOR, EVMS_DIR_NAME);
+	/* deallocate device arrays
+	 */
+	kfree(blk_size[EVMS_MAJOR]);
+	blk_size[EVMS_MAJOR] = NULL;
+	kfree(blksize_size[EVMS_MAJOR]);
+	blksize_size[EVMS_MAJOR] = NULL;
+	kfree(hardsect_size[EVMS_MAJOR]);
+	hardsect_size[EVMS_MAJOR] = NULL;
+	read_ahead[EVMS_MAJOR] = 0;
+	/* deallocate logical volumes array
+	 */
+	kfree(evms_logical_volumes);
+	/* destroy buffer head pool
+	 */
+	evms_cs_destroy_pool(evms_bh_pool);
+	/* destroy io notify pool
+	 */
+	evms_cs_destroy_pool(evms_io_notify_pool);
+#ifdef CONFIG_PROC_FS
+	if (evms_proc_dir) {
+		remove_proc_entry("volumes", evms_proc_dir);
+		remove_proc_entry("plugins", evms_proc_dir);
+		remove_proc_entry("info", evms_proc_dir);
+		remove_proc_entry("evms", NULL);
+	}
+	unregister_sysctl_table(evms_table_header);
+#endif
+}
+
+/*
+ * Function: evms_init_discover
+ * If EVMS is statically built into the kernel, this function will be called
+ * to perform an initial volume discovery.
+ */
+int __init
+evms_init_discover(void)
+{
+	/* go find volumes */
+	evms_discover_volumes(NULL);
+
+	/* Check if the root fs is on EVMS */
+	if (MAJOR(ROOT_DEV) == EVMS_MAJOR) {
+		find_root_fs_dev();
+	}
+
+	return 0;
+}
+
+/*
+ * a placeholder for cluster enablement
+ */
+void
+evms_cluster_init(int nodeid, int clusterid)
+{
+	/* dummy */
+	return;
+}
+
+EXPORT_SYMBOL(evms_cluster_init);
+
+/*
+ * a placeholder for cluster enablement
+ */
+int
+evms_cluster_shutdown(void)
+{
+	/* dummy */
+	return -1;
+}
+
+EXPORT_SYMBOL(evms_cluster_shutdown);
+
+static int __init
+evms_boot_info_level(char *str)
+{
+	int evms_boot_info_level = (int) simple_strtoul(str, NULL, 10);
+	if (evms_boot_info_level) {
+		evms_info_level = evms_boot_info_level;
+	}
+	return 1;
+}
+
+__setup("evms_info_level=", evms_boot_info_level);
+module_init(evms_init_module);
+module_exit(evms_exit_module);
+__initcall(evms_init_discover);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
+
+/**********************************************************/
+/* END -- INIT/DISCOVERY support functions                */
+/**********************************************************/
diff -Naur linux-2002-09-30/drivers/evms/evms_bbr.c evms-2002-09-30/drivers/evms/evms_bbr.c
--- linux-2002-09-30/drivers/evms/evms_bbr.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/evms_bbr.c	Wed Sep 25 15:04:22 2002
@@ -0,0 +1,1817 @@
+/* -*- linux-c -*- */
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/* linux/driver/evms/evms_bbr.c
+ *
+ * EVMS - Bad Block Relocation (BBR) Feature Plugin
+ *
+ * BBR feature is designed to remap I/O write failures to another safe location
+ * on disk. Note that most disk drives have BBR built into them, this means
+ * that our software BBR will be only activated when all hardware BBR
+ * replacement sectors have been used.
+ */
+
+#define LOG_PREFIX "bbr: "
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mempool.h>
+#include <asm/uaccess.h>
+
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_bbr_k.h>
+
+/* API prototypes. */
+static int bbr_discover(struct evms_logical_node ** discover_list);
+static int bbr_delete(struct evms_logical_node * node);
+static void bbr_read(struct evms_logical_node * node, struct buffer_head * bh);
+static void bbr_write(struct evms_logical_node * node, struct buffer_head * bh);
+static int bbr_ioctl(struct evms_logical_node * bbr_node,
+		     struct inode * inode,
+		     struct file * file,
+		     unsigned int cmd,
+		     unsigned long arg);
+static int bbr_direct_ioctl(struct inode * inode,
+			    struct file * file,
+			    unsigned int cmd,
+			    unsigned long arg);
+static int bbr_init_io(struct evms_logical_node * bbr_node,
+		       int io_flag,
+		       u64 startLSN,
+		       u64 nr_sects,
+		       void * bufptr);
+
+/* Other function prototypes. */
+static int bbr_create_pools(void);
+static void bbr_destroy_pools(void);
+static u32 bbr_table_to_remap_list(struct bbr_private * bbr_id);
+static void bbr_io_handler(void * void_data);
+static void bbr_free_private(struct bbr_private * bbr_id);
+static inline void bbr_list_add(struct bbr_private * bbr_id);
+
+/* List of all BBR nodes. */
+static struct bbr_private * bbr_instances = NULL;
+
+/* Data pertaining to the I/O thread. */
+static struct evms_thread * bbr_io_thread = NULL;
+static spinlock_t bbr_io_list_lock = SPIN_LOCK_UNLOCKED;
+static struct list_head bbr_io_list = LIST_HEAD_INIT(bbr_io_list);
+
+/* Global pools for bbr_io_buf's and bbr_remap's. */
+kmem_cache_t * bbr_io_buf_slab;
+mempool_t * bbr_io_buf_pool;
+kmem_cache_t * bbr_remap_slab;
+mempool_t * bbr_remap_pool;
+
+/* Plugin function table and header. */
+static struct evms_plugin_fops function_table = {
+	.discover	= bbr_discover,
+	.delete		= bbr_delete,
+	.read		= bbr_read,
+	.write		= bbr_write,
+	.init_io	= bbr_init_io,
+	.ioctl		= bbr_ioctl,
+	.direct_ioctl	= bbr_direct_ioctl
+};
+
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_FEATURE,
+			  EVMS_BBR_FEATURE_ID),
+	.version = {
+		.major		= EVMS_BBR_VERSION_MAJOR,
+		.minor		= EVMS_BBR_VERSION_MINOR,
+		.patchlevel	= EVMS_BBR_VERSION_PATCHLEVEL
+	},
+	.required_services_version = {
+		.major		= EVMS_BBR_COMMON_SERVICES_MAJOR,
+		.minor		= EVMS_BBR_COMMON_SERVICES_MINOR,
+		.patchlevel	= EVMS_BBR_COMMON_SERVICES_PATCHLEVEL
+	},
+	.fops = &function_table
+};
+
+/**
+ * le_meta_data_to_cpu
+ *
+ * Convert bbr meta data from on-disk (LE) format
+ * to the native cpu endian format.
+ */
+void le_meta_data_to_cpu(struct evms_bbr_metadata * md)
+{
+	md->signature		   = le32_to_cpup(&md->signature);
+	md->crc			   = le32_to_cpup(&md->crc);
+	md->block_size		   = le32_to_cpup(&md->block_size);
+	md->flags		   = le32_to_cpup(&md->flags);
+	md->sequence_number	   = le64_to_cpup(&md->sequence_number);
+	md->start_sect_bbr_table   = le64_to_cpup(&md->start_sect_bbr_table);
+	md->nr_sects_bbr_table	   = le64_to_cpup(&md->nr_sects_bbr_table);
+	md->start_replacement_sect = le64_to_cpup(&md->start_replacement_sect);
+	md->nr_replacement_blks	   = le64_to_cpup(&md->nr_replacement_blks);
+}
+
+/**
+ * le_bbr_table_sector_to_cpu 
+ *
+ * Convert bbr meta data from on-disk (LE) format
+ * to the native cpu endian format.
+ */
+void le_bbr_table_sector_to_cpu(struct evms_bbr_table * p)
+{
+	int i;
+	p->signature		= le32_to_cpup(&p->signature);
+	p->crc			= le32_to_cpup(&p->crc);
+	p->sequence_number	= le32_to_cpup(&p->sequence_number);
+	p->in_use_cnt		= le32_to_cpup(&p->in_use_cnt);
+	for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) {
+		p->entries[i].bad_sect =
+			le64_to_cpup(&p->entries[i].bad_sect);
+		p->entries[i].replacement_sect =
+			le64_to_cpup(&p->entries[i].replacement_sect);
+	}
+}
+
+/**
+ * cpu_bbr_table_sector_to_le
+ *
+ * Convert bbr meta data from cpu endian format to on-disk (LE) format
+ */
+void cpu_bbr_table_sector_to_le(struct evms_bbr_table * p,
+				struct evms_bbr_table * le)
+{
+	int i;
+	le->signature		= cpu_to_le32p(&p->signature);
+	le->crc			= cpu_to_le32p(&p->crc);
+	le->sequence_number	= cpu_to_le32p(&p->sequence_number);
+	le->in_use_cnt		= cpu_to_le32p(&p->in_use_cnt);
+	for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) {
+		le->entries[i].bad_sect =
+			cpu_to_le64p(&p->entries[i].bad_sect);
+		le->entries[i].replacement_sect =
+			cpu_to_le64p(&p->entries[i].replacement_sect);
+	}
+}
+
+#ifdef EVMS_BBR_DEBUG
+static void print_meta_data(struct evms_bbr_metadata * md)
+{
+	LOG_DEBUG("BBR Metadata Sector:\n"
+		  "  signature               0x%08X\n"
+		  "  crc                     0x%08X\n"
+		  "  block_size              %u\n"
+		  "  start_sect_bbr_table    "PFU64"\n"
+		  "  nr_sects_bbr_table      "PFU64"\n"
+		  "  start_replacement_sect  "PFU64"\n"
+		  "  nr_replacement_blks     "PFU64"\n",
+		  md->signature, md->crc, md->block_size,
+		  md->start_sect_bbr_table, md->nr_sects_bbr_table,
+		  md->start_replacement_sect, md->nr_replacement_blks);
+}
+
+static void print_bbr_table_sector(struct evms_bbr_table * p)
+{
+	int i;
+	LOG_DEBUG("BBR Table Sector:\n"
+		  "  sig          0x%08X\n"
+		  "  crc          0x%08X\n"
+		  "  sequence     %u\n"
+		  "  in_use_cnt   %u\n"
+		  "  Table Entries:\n",
+		  p->signature, p->crc, p->sequence_number, p->in_use_cnt);
+	for ( i = 0; i < EVMS_BBR_ENTRIES_PER_SECT; i++ ) {
+		LOG_DEBUG("  [%d] bad_sect: "PFU64"     replacement_sect: "PFU64"\n",
+			  i, p->entries[i].bad_sect,
+			  p->entries[i].replacement_sect);
+	}
+}
+
+void print_binary_tree(struct bbr_runtime_remap * node)
+{
+	if (node) {
+		LOG_DEFAULT("["PFU64","PFU64"]\n", node->remap.bad_sect,
+			    node->remap.replacement_sect);
+		print_binary_tree(node->left);
+		print_binary_tree(node->right);
+	}
+}
+
+static void print_remap_list(struct bbr_private * bbr_id)
+{
+	if (bbr_id->remap_root) {
+		LOG_DEFAULT("%s for %s\n", __FUNCTION__, bbr_id->node->name);
+		print_binary_tree(bbr_id->remap_root);
+	}
+}
+#endif
+
+/**
+ * validate_bbr_table_sector
+ *
+ * Check the specified BBR table sector for a valid signature and CRC.
+ */
+static int validate_bbr_table_sector(struct evms_bbr_table * p)
+{
+	int rc = 0;
+	int org_crc, final_crc;
+
+	if ( le32_to_cpup(&p->signature) != EVMS_BBR_TABLE_SIGNATURE ) {
+		LOG_ERROR("BBR table signature doesn't match!\n");
+		LOG_ERROR("Sector has (0x%08X) expected(0x%08X)\n",
+			  le32_to_cpup(&p->signature),	
+			  EVMS_BBR_TABLE_SIGNATURE);
+		rc = -EINVAL;
+	} else {
+		if (p->crc) {
+			org_crc = le32_to_cpup(&p->crc);
+			p->crc = 0;
+			final_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, p,
+							  sizeof(*p));
+			if ( final_crc != org_crc ) {
+				LOG_ERROR("CRC failed!\n");
+				LOG_ERROR("Sector has (0x%08X) calculated(0x%08X)\n",
+					  org_crc, final_crc);
+				rc = -EINVAL;
+			}
+			p->crc = cpu_to_le32p(&org_crc);
+		} else {
+			LOG_ERROR("BBR table sector has no CRC!\n");
+			rc = -EINVAL;
+		}
+	}
+	if (rc)
+		BBR_DEBUG_PRINT_TABLE_SECTOR(p);
+	le_bbr_table_sector_to_cpu(p);
+	return rc;
+}
+
+/**
+ * update_invalid_bbr_table_sector
+ *
+ * If one copy of a BBR table sector is bad, replace it with the valid copy.
+ */
+void update_invalid_bbr_table_sector(struct evms_logical_node * node,
+				     struct evms_bbr_table * valid,
+				     struct evms_bbr_table * invalid,
+				     u64 lsn)
+{
+	int rc;
+	struct evms_bbr_table * tmp_bbr_table;
+
+	/* Correct the invalid bbr table sector */
+	memcpy(invalid, valid, sizeof(struct evms_bbr_table));
+
+	/* Allocate memory for I/O */
+	tmp_bbr_table = kmalloc(sizeof(struct evms_bbr_table), GFP_KERNEL);
+	if (tmp_bbr_table) {
+		memset(tmp_bbr_table, 0, sizeof(struct evms_bbr_table));
+		cpu_bbr_table_sector_to_le(valid, tmp_bbr_table);
+		LOG_WARNING("Correcting BBR table sector "PFU64"\n", lsn);
+		rc = INIT_IO(node, 1, lsn, 1, tmp_bbr_table);
+		if (rc) {
+			LOG_ERROR("Could not correct BBR table sector "PFU64".\n",
+				  lsn);
+		}
+		kfree(tmp_bbr_table);
+	}
+}
+
+/**
+ * validate_bbr_table
+ *
+ * Validate the entire range of sectors in the BBR table.
+ */
+static u32 validate_bbr_table(struct evms_bbr_metadata * md,
+			      struct evms_bbr_table * p)
+{
+	u32 i, nr_sects;
+
+	nr_sects = md->nr_sects_bbr_table;
+
+	for ( i = 0; i < nr_sects; i++, p++ ) {
+		if ( validate_bbr_table_sector(p) )
+			break;
+	}
+
+	if ( i != nr_sects ) {
+		LOG_SERIOUS("Stopped BBR table validation at sector %u.\n", i);
+		nr_sects = i;
+	}
+	LOG_DEBUG("Validated %u BBR table sectors.\n", nr_sects);
+	return nr_sects;
+}
+
+/**
+ * validate_bbr_tables
+ * @node:	BBR node to validate.
+ * @MD1:	Primary metadata sector.
+ * @MD2:	Secondary metadata sector.
+ * @p1:		Primary BBR table.
+ * @p2:		Secondary BBR table.
+ *
+ * Validate both copies of the BBR table. If one of them is invalid, 
+ * try to correct the errors using the valid copy.
+ */
+static u32 validate_bbr_tables(struct evms_logical_node * node,
+			       struct evms_bbr_metadata * MD1,
+			       struct evms_bbr_metadata * MD2,
+			       struct evms_bbr_table * p1,
+			       struct evms_bbr_table * p2)
+{
+	u32 i, rc1, rc2, nr_sects;
+
+	nr_sects = MD1->nr_sects_bbr_table;
+	if ( nr_sects != MD2->nr_sects_bbr_table ) {
+		nr_sects = (nr_sects < MD2->nr_sects_bbr_table) ?
+			   nr_sects : MD2->nr_sects_bbr_table;
+		LOG_SERIOUS("Size of BBR tables don't match. Using %u\n",
+			    nr_sects);
+	}
+
+	for ( i = 0; i < nr_sects; i++, p1++, p2++ ) {
+		rc1 = validate_bbr_table_sector(p1);
+		if (rc1) {
+			LOG_WARNING("Invalid BBR table sector at "PFU64".\n",
+				    MD1->start_sect_bbr_table + i);
+		}
+		rc2 = validate_bbr_table_sector(p2);
+		if (rc2) {
+			LOG_WARNING("Invalid BBR table sector at "PFU64".\n",
+				    MD2->start_sect_bbr_table + i);
+		}
+
+		/* Correct BBR table errors. */
+		if (rc1 && rc2) {
+			/* Cannot fix. */
+			break;
+		} else if (rc1) {
+			update_invalid_bbr_table_sector(node, p2, p1,
+							MD1->start_sect_bbr_table + i);
+			continue;
+		} else if (rc2) {
+			update_invalid_bbr_table_sector(node, p1, p2,
+							MD2->start_sect_bbr_table + i);
+			continue;
+		}
+
+		if ( p1->sequence_number != p2->sequence_number ) {
+			LOG_WARNING("Sequence numbers for BBR table index %u don't match.\n", i);
+			LOG_WARNING("MD1 sequence_nr=%u, MD2 sequence_nr_2=%u\n",
+				    p1->sequence_number, p2->sequence_number);
+			if ( p1->sequence_number < p2->sequence_number ) {
+				update_invalid_bbr_table_sector(node, p2, p1,
+								MD1->start_sect_bbr_table + i);
+			} else {
+				update_invalid_bbr_table_sector(node, p1, p2,
+								MD2->start_sect_bbr_table + i);
+			}
+		}
+	}
+	if ( i != nr_sects ) {
+		LOG_SERIOUS("Stopped validation at sector %u\n", i);
+		nr_sects = i;
+	}
+	LOG_DEBUG("Validated %u BBR table sectors.\n", nr_sects);
+	return nr_sects;
+}
+
+/**
+ * validate_meta_data
+ *
+ * Check the specified BBR metadata sector for a valid signature and CRC.
+ */
+static int validate_meta_data(struct evms_bbr_metadata * md)
+{
+	int org_crc, final_crc;
+
+	BBR_DEBUG_PRINT_META_DATA(md);
+
+	if ( le32_to_cpup(&md->signature) != EVMS_BBR_SIGNATURE ) {
+		LOG_SERIOUS("BBR signature doesn't match!\n");
+		LOG_SERIOUS("Found: 0x%08X  Expecting: 0x%08X\n",
+			    le32_to_cpup(&md->signature), EVMS_BBR_SIGNATURE);
+		return -EINVAL;
+	}
+
+	if (md->crc) {
+		org_crc = le32_to_cpup(&md->crc);
+		md->crc = 0;
+		final_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC, md,
+						  sizeof(*md));
+		if ( final_crc != org_crc ) {
+			LOG_ERROR("CRC failed!\n");
+			LOG_ERROR("Sector has (0x%08X) calculated(0x%08X)\n",
+				    org_crc, final_crc);
+			return -EINVAL;
+		}
+		md->crc = cpu_to_le32p(&org_crc);
+	} else {
+		LOG_WARNING("Metadata sector has no CRC!\n");
+	}
+
+	le_meta_data_to_cpu(md);
+	return 0;
+}
+
+/**
+ * bbr_load_meta_data
+ * @node:	BBR node to read metadata from.
+ * @lsn:	Sector to read metadata from.
+ * @md:		Pointer to return metadata structure.
+ * @bbr_table:	Pointer to return BBR table.
+ *
+ * Load one copy of the BBR metadata. If the metadata is valid, load the
+ * corresponding copy of the BBR table.
+ */
+static int load_meta_data(struct evms_logical_node * node,
+			  u64 lsn,
+			  struct evms_bbr_metadata ** md,
+			  struct evms_bbr_table ** bbr_table)
+{
+	int rc;
+
+	*md = NULL;
+	*bbr_table = NULL;
+
+	if (!lsn) {
+		LOG_WARNING("No sector specified for BBR metadata on %s.\n",
+			    node->name);
+		return -ENODATA;
+	}
+
+	/* Allocate a buffer for the metadata sector. */
+	*md = kmalloc(sizeof(struct evms_bbr_metadata), GFP_KERNEL);
+	if (!*md) {
+		LOG_ERROR("kmalloc error creating metadata buffer for %s.\n",
+			  node->name);
+		return -ENOMEM;
+	}
+
+	/* Read the metadata sector. */
+	rc = INIT_IO(node, 0, lsn, 1, *md);
+	if (rc) {
+		LOG_ERROR("init_io error on %s.\n", node->name);
+		kfree(*md);
+		*md = NULL;
+		return rc;
+	}
+
+	/* Validate the metadata sector. */
+	rc = validate_meta_data(*md);
+	if (rc) {
+		LOG_ERROR("Error validating metadata for %s.\n", node->name);
+		kfree(*md);
+		*md = NULL;
+		return rc;
+	}
+
+	/* Allocate a buffer for the BBR table. */
+	*bbr_table = kmalloc((*md)->nr_sects_bbr_table <<
+			     EVMS_VSECTOR_SIZE_SHIFT, GFP_KERNEL);
+	if (!*bbr_table) {
+		LOG_ERROR("kmalloc error creating BBR table buffer for %s.\n",
+			  node->name);
+		kfree(*md);
+		*md = NULL;
+		return -ENOMEM;
+	}
+
+	/* Read the BBR table but don't validate here. */
+	rc = INIT_IO(node, 0, (*md)->start_sect_bbr_table,
+		     (*md)->nr_sects_bbr_table, *bbr_table);
+	if (rc) {
+		LOG_ERROR("init_io error on %s.\n", node->name);
+		kfree(*md);
+		*md = NULL;
+		kfree(*bbr_table);
+		*bbr_table = NULL;
+	}
+
+	return rc;
+}
+
+/**
+ * bbr_load_feature_data
+ * @node:	BBR node
+ * @ID:		Return pointer to BBR private data.
+ *
+ * Load both copies of the BBR metadata and table. If one is invalid, try
+ * to correct is using the valid copy. When a valid copy is found, create
+ * a private data structure for the specified node.
+ */
+static int load_feature_data(struct evms_logical_node * node,
+			     struct bbr_private ** ID)
+{
+	struct evms_bbr_metadata * md1 = NULL;
+	struct evms_bbr_metadata * md2 = NULL;
+	struct evms_bbr_table * table1 = NULL;
+	struct evms_bbr_table * table2 = NULL;
+	u64 lba_table1 = 0, lba_table2 = 0;
+	u32 nr_sects = 0;
+	int rc = 0, rc1, rc2;
+
+	*ID = NULL;
+
+	/* Load metadata 1 */
+	rc1 = load_meta_data(node,
+			     node->feature_header->feature_data1_start_lsn,
+			     &md1, &table1);
+	/* Load metadata 2 */
+	rc2 = load_meta_data(node,
+			     node->feature_header->feature_data2_start_lsn,
+			     &md2, &table2);
+
+	if (rc1 && rc2) {
+		/* Both copies are bad? Cannot continue. */
+		rc = -ENODATA;
+	} else if (rc1 || rc2) {
+		/* One copy is bad. Use the good copy. */
+		if (rc1) {
+			lba_table2 = md2->start_sect_bbr_table;
+			kfree(table1);
+			kfree(md1);
+			table1 = table2;
+			table2 = NULL;
+			md1 = md2;
+			md2 = NULL;
+		} else {
+			lba_table1 = md1->start_sect_bbr_table;
+		}
+
+		nr_sects = validate_bbr_table(md1, table1);
+		if ( nr_sects == 0 ) {
+			rc = -ENODATA;
+		}
+	} else {
+		lba_table1 = md1->start_sect_bbr_table;
+		lba_table2 = md2->start_sect_bbr_table;
+		nr_sects = validate_bbr_tables(node, md1, md2, table1, table2);
+		if ( nr_sects == 0 ) {
+			rc = -ENODATA;
+		}
+	}
+
+	if (!rc && nr_sects) {
+		*ID = kmalloc(sizeof(struct bbr_private), GFP_KERNEL);
+		if (*ID) {
+			memset(*ID, 0, sizeof(struct bbr_private));
+			(*ID)->source = node;
+			(*ID)->blksize_in_sects = md1->block_size >>
+						  EVMS_VSECTOR_SIZE_SHIFT;
+			(*ID)->remap_root = NULL;
+			(*ID)->lba_table1 = lba_table1;
+			(*ID)->lba_table2 = lba_table2;
+			(*ID)->bbr_table = table1;
+			(*ID)->nr_sects_bbr_table = nr_sects;
+			if ( nr_sects < md1->nr_sects_bbr_table ) {
+				LOG_WARNING("Making BBR node read-only\n");
+				(*ID)->flag |= EVMS_VOLUME_READ_ONLY;
+			}
+			(*ID)->nr_replacement_blks = nr_sects *
+						     EVMS_BBR_ENTRIES_PER_SECT;
+			(*ID)->start_replacement_sect = md1->start_replacement_sect;
+			(*ID)->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
+			(*ID)->bbr_id_lock = SPIN_LOCK_UNLOCKED;
+			if ( !bbr_remap_pool || !bbr_io_buf_pool ) {
+				rc = bbr_create_pools();
+			}
+			if (!rc) {
+				atomic_set(&(*ID)->in_use_replacement_blks,
+					   bbr_table_to_remap_list(*ID));
+			}
+		} else {
+			rc = -ENOMEM;
+		}
+	}
+
+	if (!rc) {
+		if (!bbr_io_thread) {
+			const char * name = "evms_bbr_io";
+			bbr_io_thread = evms_cs_register_thread(bbr_io_handler,
+								NULL, name);
+			if (!bbr_io_thread) {
+				rc = -EINVAL;
+			}
+		}
+	}
+
+	/* If error, free table1. */
+	if (rc)	{
+		if (table1) {
+			kfree(table1);
+		}
+		if (*ID) {
+			(*ID)->bbr_table = NULL;
+			bbr_free_private(*ID);
+			(*ID) = NULL;
+		}
+	}
+
+	/* Will never use md1, md2 and table2 again */
+	if (md1) {
+		kfree(md1);
+	}
+	if (md2) {
+		kfree(md2);
+	}
+	if (table2) {
+		kfree(table2);
+	}
+
+	return rc;
+}
+
+/**
+ * bbr_binary_tree_insert
+ *
+ * Insert a node into the binary tree.
+ */
+void bbr_binary_tree_insert(struct bbr_runtime_remap ** root,
+			    struct bbr_runtime_remap * newnode)
+{
+	struct bbr_runtime_remap ** node = root;
+	while (node && *node) {
+		if ( newnode->remap.bad_sect > (*node)->remap.bad_sect ) {
+			node = &((*node)->right);
+		} else {
+			node = &((*node)->left);
+		}
+	}
+	
+	newnode->left = newnode->right = NULL;
+	*node = newnode;
+}
+
+/**
+ * bbr_binary_search
+ *
+ * Search for a node that contains bad_sect = lsn.
+ */
+struct bbr_runtime_remap * bbr_binary_search(struct bbr_runtime_remap * root,
+					     u64 lsn)
+{
+	struct bbr_runtime_remap * node = root;
+	while (node) {
+		if (node->remap.bad_sect == lsn) {
+			break;
+		}
+		if ( lsn > node->remap.bad_sect ) {
+			node = node->right;
+		} else {
+			node = node->left;
+		}
+	}
+	return node;
+}
+
+/**
+ * bbr_binary_tree_destroy
+ *
+ * Destroy the binary tree.
+ */
+void bbr_binary_tree_destroy(struct bbr_runtime_remap * root,
+			     struct bbr_private * bbr_id)
+{
+	struct bbr_runtime_remap ** link = NULL;
+	struct bbr_runtime_remap * node = root;
+
+	while (node) {
+		if (node->left) {
+			link = &(node->left);
+			node = node->left;
+			continue;
+		}
+		if (node->right) {
+			link = &(node->right);
+			node = node->right;
+			continue;
+		}
+
+		mempool_free(node, bbr_remap_pool);
+		if (node == root) {
+			/* If root is deleted, we're done. */
+			break;
+		}
+
+		/* Back to root. */
+		node = root;
+		*link = NULL;
+	}
+}
+
+static void bbr_free_remap(struct bbr_private * bbr_id)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&bbr_id->bbr_id_lock, flags);	
+	bbr_binary_tree_destroy(bbr_id->remap_root, bbr_id);
+	bbr_id->remap_root = NULL;
+	spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags);
+}
+
+/**
+ * bbr_insert_remap_entry
+ *
+ * Create a new remap entry and add it to the binary tree for this node.
+ */
+static int bbr_insert_remap_entry(struct bbr_private * bbr_id,
+				  struct evms_bbr_table_entry * new_bbr_entry)
+{
+	struct bbr_runtime_remap * newnode = NULL;
+	unsigned long flags;
+	int rc;
+
+	newnode = mempool_alloc(bbr_remap_pool, GFP_NOIO);
+	if (!newnode) {
+		rc = -ENOMEM;
+		LOG_SERIOUS("Could not allocate from remap pool! (rc=%d)\n", rc);
+		return rc;
+	}
+	newnode->remap.bad_sect  = new_bbr_entry->bad_sect;
+	newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
+	spin_lock_irqsave(&bbr_id->bbr_id_lock, flags);	
+	bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
+	spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags);
+	return 0;
+}
+
+/**
+ * bbr_table_to_remap_list
+ *
+ * The on-disk bbr table is sorted by the replacement sector LBA. In order to
+ * improve run time performance, the in memory remap list must be sorted by
+ * the bad sector LBA. This function is called at discovery time to initialize
+ * the remap list. This function assumes that at least one copy of meta data
+ * is valid.
+ */
+static u32 bbr_table_to_remap_list(struct bbr_private * bbr_id)
+{
+	u32 in_use_blks = 0;
+	int i, j;
+	struct evms_bbr_table * p;
+	
+
+	for ( i = 0, p = bbr_id->bbr_table;
+	      i < bbr_id->nr_sects_bbr_table;
+	      i++, p++ ) {
+		if (!p->in_use_cnt) {
+			break;
+		}
+		in_use_blks += p->in_use_cnt;
+		for ( j = 0; j < p->in_use_cnt; j++ ) {
+			bbr_insert_remap_entry(bbr_id, &p->entries[j]);
+		}
+	}
+
+	return in_use_blks;
+}
+
+/**
+ * bbr_search_remap_entry
+ *
+ * Search remap entry for the specified sector. If found, return a pointer to
+ * the table entry. Otherwise, return NULL.
+ */
+static struct evms_bbr_table_entry * bbr_search_remap_entry(struct bbr_private * bbr_id,
+							    u64 lsn)
+{
+	struct bbr_runtime_remap * p;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bbr_id->bbr_id_lock, flags);
+	p = bbr_binary_search(bbr_id->remap_root, lsn);
+	spin_unlock_irqrestore(&bbr_id->bbr_id_lock, flags);
+	if (p) {
+		return (&p->remap);
+	} else {
+		return NULL;
+	}
+}
+
+/**
+ * bbr_remap
+ *
+ * If *lsn is in the remap table, return TRUE and modify *lsn,
+ * else, return FALSE.
+ */
+static inline int bbr_remap(struct bbr_private * bbr_id,
+			    u64 * lsn)
+{
+	struct evms_bbr_table_entry *e;
+
+	if ( atomic_read(&bbr_id->in_use_replacement_blks) && 
+	     ! (bbr_id->flag & BBR_STOP_REMAP) ) {
+		e = bbr_search_remap_entry(bbr_id, *lsn);
+		if (e) {
+			*lsn = e->replacement_sect;
+			LOG_EXTRA("%s replacement sector (LSN="PFU64")\n",
+				  __FUNCTION__, *lsn);
+			return TRUE;
+		}
+	}
+	return FALSE;
+}
+
+/**
+ * bbr_remap_probe
+ *
+ * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
+ * table return TRUE, Else, return FALSE.
+ */
+static inline int bbr_remap_probe(struct bbr_private * bbr_id,
+				  u64 lsn, u64 nr_sects)
+{
+	u64 tmp, cnt;
+
+	if ( atomic_read(&bbr_id->in_use_replacement_blks) &&
+	     ! (bbr_id->flag & BBR_STOP_REMAP) ) {
+		for ( cnt = 0, tmp = lsn;
+		      cnt < nr_sects;
+		      cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
+			if ( bbr_remap(bbr_id,&tmp) ) {
+				return TRUE;
+			}
+		}
+	}
+	return FALSE;
+}
+
+static void *bbr_slab_pool_alloc(int gfp_mask, void * data)
+{
+	return kmem_cache_alloc(data, gfp_mask);
+}
+
+static void bbr_slab_pool_free(void *ptr, void * data)
+{
+	kmem_cache_free(data, ptr);
+}
+
+static int bbr_create_pools(void)
+{
+	/* Create a memory pool for the remap list. */
+	if (!bbr_remap_slab) {
+		bbr_remap_slab = kmem_cache_create("BBR_Remap_Slab",
+						   sizeof(struct bbr_runtime_remap),
+						   0, SLAB_HWCACHE_ALIGN,
+						   NULL, NULL);
+		if (!bbr_remap_slab) {
+			panic("Unable to create BBR remap cache.");
+		}
+	}
+	if (!bbr_remap_pool) {
+		bbr_remap_pool = mempool_create(64, bbr_slab_pool_alloc,
+						bbr_slab_pool_free,
+						bbr_remap_slab);
+		if (!bbr_remap_pool) {
+			panic("Unable to create BBR remap pool.");
+		}
+	}
+
+	/* Create a memory pool for the BBR I/O anchors. */
+	if (!bbr_io_buf_slab) {
+		bbr_io_buf_slab = kmem_cache_create("BBR_IO_Buf_Slab",
+						    sizeof(struct bbr_io_buffer),
+						    0, SLAB_HWCACHE_ALIGN,
+						    NULL, NULL);
+		if (!bbr_io_buf_slab) {
+			panic("Unable to create BBR I/O buffer cache.");
+		}
+	}
+	if (!bbr_io_buf_pool) {
+		bbr_io_buf_pool = mempool_create(256, bbr_slab_pool_alloc,
+						 bbr_slab_pool_free,
+						 bbr_io_buf_slab);
+		if (!bbr_io_buf_pool) {
+			panic("Unable to create BBR I/O buffer pool.");
+		}
+	}
+
+	return 0;
+}
+
+static void bbr_destroy_pools(void)
+{
+	if (bbr_io_buf_pool) {
+		mempool_destroy(bbr_io_buf_pool);
+		bbr_io_buf_pool = NULL;
+	}
+	if (bbr_io_buf_slab) {
+		kmem_cache_destroy(bbr_io_buf_slab);
+		bbr_io_buf_slab = NULL;
+	}
+	if (bbr_remap_pool) {
+		mempool_destroy(bbr_remap_pool);
+		bbr_remap_pool = NULL;
+	}
+	if (bbr_remap_slab) {
+		kmem_cache_destroy(bbr_remap_slab);
+		bbr_remap_slab = NULL;
+	}
+}
+
+/**
+ * bbr_discover
+ *
+ * Search through the discover list looking for object with BBR metadata.
+ * Remove them from the list and replace with a new BBR node.
+ */
+static int bbr_discover(struct evms_logical_node ** discover_list)
+{
+        struct evms_logical_node * node, * next_node;
+        struct evms_logical_node * bbr_node = NULL;
+        struct bbr_private * bbr_id;
+	int bad_blocks, rc = 0;
+	
+	MOD_INC_USE_COUNT;
+
+	next_node = *discover_list;
+	while (next_node) {
+		node = next_node;
+		next_node = node->next;
+
+		/* The node must have a BBR feature-header. */
+       		if ( ! node->feature_header ||
+		     node->feature_header->feature_id != plugin_header.id ) {
+       			continue;
+		}
+
+		rc = load_feature_data(node, &bbr_id);
+		if (rc) {
+			/* Error loading feature data.
+			 * This node belongs to us, but metadata is invalid,
+			 * - remove it from the discovery list
+			 * - delete it
+			 * - clear error code then continue.
+			 * Will consider creating a read only BBR node in
+			 * the future.
+			 */
+			LOG_SERIOUS("Error in node (%s) with "PFU64" sectors.\n",
+				    node->name, node->total_vsectors);
+			evms_cs_remove_logical_node_from_list(discover_list,
+							      node);
+			DELETE(node);
+			rc = 0;
+			continue;
+		}
+
+		rc = evms_cs_allocate_logical_node(&bbr_node);
+		if (rc) {
+			LOG_SERIOUS("Could not allocate logical node! rc=%d\n", rc);
+			bbr_free_private(bbr_id);
+			continue;
+		}
+
+		MOD_INC_USE_COUNT;
+		bbr_node->volume_info = node->volume_info;
+		bbr_node->flags |= node->flags;
+		bbr_node->plugin = &plugin_header;
+		strcpy(bbr_node->name,
+		       node->feature_header->object_name);
+		bbr_node->hardsector_size = node->hardsector_size;
+		bbr_node->total_vsectors = node->total_vsectors - 2 -
+					   node->feature_header->feature_data1_size -
+					   node->feature_header->feature_data2_size;
+		bbr_node->block_size = node->block_size;
+		bbr_node->private = bbr_id;
+		bbr_id->node = bbr_node;
+
+		/* Free the feature header */
+		kfree(node->feature_header);
+		node->feature_header = NULL;
+		evms_cs_remove_logical_node_from_list(discover_list, node);
+
+		/* If bad blocks exist, give warning */
+		bad_blocks = atomic_read(&bbr_id->in_use_replacement_blks);
+		if (bad_blocks) {
+			BBR_DEBUG_PRINT_REMAP_LIST(bbr_id);
+			LOG_WARNING("%s has %d bad blocks.\n",
+				    bbr_id->source->name, bad_blocks);
+			LOG_WARNING("There are "PFU64" total replacement blocks.\n",
+				    bbr_id->nr_replacement_blks);
+			LOG_WARNING("There are "PFU64" remaining replacement blocks.\n",
+				    bbr_id->nr_replacement_blks -
+				    bad_blocks);
+		}
+
+		evms_cs_add_logical_node_to_list(discover_list, bbr_node);
+		bbr_list_add(bbr_id);
+	}
+
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+static inline void bbr_list_add(struct bbr_private * bbr_id)
+{
+       	bbr_id->next = bbr_instances;
+	bbr_instances = bbr_id;
+}
+
+static void bbr_list_remove(struct bbr_private * bbr_id)
+{
+	struct bbr_private ** p;
+
+	for ( p = &bbr_instances; *p; p = &(*p)->next ) {
+		if ( *p == bbr_id ) {
+			*p = (*p)->next;
+			break;
+		}
+	}
+}
+
+static struct bbr_private * bbr_find_private(char * object_name)
+{
+	struct bbr_private * p;
+
+	for ( p = bbr_instances; p; p = p->next ) {
+		if ( ! strncmp(p->node->name, object_name,
+			       EVMS_VOLUME_NAME_SIZE) ) {
+			return p;
+		}
+	}
+	return NULL;
+}
+
+static void bbr_free_private(struct bbr_private * bbr_id)
+{
+	if (bbr_id->remap_root) {
+		bbr_free_remap(bbr_id);
+	}
+	if (bbr_id->bbr_table) {
+		kfree(bbr_id->bbr_table);
+	}
+	bbr_list_remove(bbr_id);
+	kfree(bbr_id);
+}
+
+/**
+ * bbr_delete
+ *
+ * Delete the specified BBR node and the node it is built on. If the last BBR
+ * node is deleted, shut down the I/O thread.
+ */
+static int bbr_delete(struct evms_logical_node * bbr_node)
+{
+	struct bbr_private * bbr_id;
+	int rc;
+	
+        bbr_id = bbr_node->private;
+
+        rc = DELETE(bbr_id->source);
+	if (!rc) {
+		/* Now cleanup and go away */
+		bbr_free_private(bbr_id);
+		evms_cs_deallocate_logical_node(bbr_node);
+		if (!bbr_instances) {
+			bbr_destroy_pools();
+			if (bbr_io_thread) {
+				evms_cs_unregister_thread(bbr_io_thread);
+				bbr_io_thread = NULL;
+			}
+		}
+		MOD_DEC_USE_COUNT;
+	}
+        return rc;
+}
+
+static struct bbr_io_buffer * allocate_bbr_io_buf(struct bbr_private * bbr_id,
+						  struct buffer_head * bh,
+						  int rw)
+{
+	struct bbr_io_buffer * bbr_io_buf;
+
+	bbr_io_buf = mempool_alloc(bbr_io_buf_pool, GFP_NOIO);
+	if (bbr_io_buf) {
+		memset(bbr_io_buf, 0, sizeof(struct bbr_io_buffer));
+		INIT_LIST_HEAD(&bbr_io_buf->bbr_io_list);
+		bbr_io_buf->bbr_id = bbr_id;
+		bbr_io_buf->bh = bh;
+		bbr_io_buf->rw = rw;
+	} else {
+		LOG_WARNING("Could not allocate from BBR I/O buffer pool!\n");
+	}
+	return bbr_io_buf;
+}
+
+static void free_bbr_io_buf(struct bbr_io_buffer * bbr_io_buf)
+{
+	mempool_free(bbr_io_buf, bbr_io_buf_pool);
+}
+
+/**
+ * bbr_io_remap_error
+ * @bbr_id:		Private data for the BBR node.
+ * @rw:			READ or WRITE.
+ * @starting_lsn:	Starting sector of request to remap.
+ * @count:		Number of sectors in the request.
+ * @buffer:		Data buffer for the request.
+ *
+ * For the requested range, try to write each sector individually. For each
+ * sector that fails, find the next available remap location and write the
+ * data to that new location. Then update the table and write both copies
+ * of the table to disk. Finally, update the in-memory mapping and do any
+ * other necessary bookkeeping.
+ */
+static int bbr_io_remap_error(struct bbr_private * bbr_id,
+			      int rw,
+			      u64 starting_lsn,
+			      u64 count,
+			      char * buffer )
+{
+	struct evms_bbr_table * bbr_table;
+	unsigned long table_sector_index;
+	unsigned long table_sector_offset;
+	unsigned long index;
+	u64 lsn, new_lsn;
+	int rc;
+
+	if ( rw == READ ) {
+		/* Nothing can be done about read errors. */
+		return -EIO;
+	}
+
+	/* For each sector in the request. */
+	for ( lsn = 0; lsn < count; lsn++, buffer += EVMS_VSECTOR_SIZE ) {
+		rc = INIT_IO(bbr_id->source, rw, starting_lsn + lsn, 1, buffer);
+		while (rc) {
+			if ( bbr_id->flag & BBR_STOP_REMAP ) {
+				/* Can't allow new remaps if the
+				 * engine told us to stop.
+				 */
+				LOG_ERROR("Object %s: Bad sector ("PFU64"), but remapping is turned off.\n",
+					  bbr_id->node->name, starting_lsn+lsn);
+				return -EIO;
+			}
+
+			/* Find the next available relocation sector. */
+			new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
+			if ( new_lsn >= bbr_id->nr_replacement_blks ) {
+				/* No more replacement sectors available. */
+				return -EIO;
+			}
+			new_lsn += bbr_id->start_replacement_sect;
+
+			/* Write the data to its new location. */
+			LOG_WARNING("Object %s: Trying to remap bad sector ("PFU64") to sector ("PFU64")\n",
+				    bbr_id->node->name, starting_lsn + lsn,
+				    new_lsn);
+			rc = INIT_IO(bbr_id->source, rw, new_lsn, 1, buffer);
+			if (rc) {
+				/* This replacement sector is bad.
+				 * Try the next one.
+				 */
+				LOG_ERROR("Object %s: Replacement sector ("PFU64") is bad. Skipping.\n",
+					bbr_id->node->name, new_lsn);
+				atomic_inc(&bbr_id->in_use_replacement_blks);
+				continue;
+			}
+
+			/* Add this new entry to the on-disk table. */
+			table_sector_index = new_lsn -
+					     bbr_id->start_replacement_sect;
+			table_sector_offset = table_sector_index /
+					      EVMS_BBR_ENTRIES_PER_SECT;
+			index = table_sector_index % EVMS_BBR_ENTRIES_PER_SECT;
+
+			bbr_table = &bbr_id->bbr_table[table_sector_offset];
+			bbr_table->entries[index].bad_sect = starting_lsn + lsn;
+			bbr_table->entries[index].replacement_sect = new_lsn;
+			bbr_table->in_use_cnt++;
+			bbr_table->sequence_number++;
+			bbr_table->crc = 0;
+			bbr_table->crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC,
+							       bbr_table,
+							       sizeof(struct evms_bbr_table));
+
+			/* Write the table to disk. */
+			cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
+			if ( bbr_id->lba_table1 ) {
+				rc = INIT_IO(bbr_id->source, WRITE,
+					     bbr_id->lba_table1 +
+					     table_sector_offset,
+					     1, bbr_table);
+			}
+			if ( bbr_id->lba_table2 ) {
+				rc |= INIT_IO(bbr_id->source, WRITE,
+					      bbr_id->lba_table2 +
+					      table_sector_offset,
+					      1, bbr_table);
+			}
+			le_bbr_table_sector_to_cpu(bbr_table);
+
+			if (rc) {
+				/* Error writing one of the tables to disk. */
+				LOG_ERROR("Object %s: Error updating BBR tables on disk.\n",
+					  bbr_id->node->name);
+				return rc;
+			}
+
+			/* Insert a new entry in the remapping binary-tree. */
+			rc = bbr_insert_remap_entry(bbr_id,
+						    &bbr_table->entries[index]);
+			if (rc) {
+				LOG_ERROR("Object %s: Error adding new entry to remap tree.\n",
+					  bbr_id->node->name);
+				return rc;
+			}
+
+			atomic_inc(&bbr_id->in_use_replacement_blks);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * bbr_io_process_request
+ *
+ * For each sector in this request, check if the sector has already
+ * been remapped. If so, process all previous sectors in the request,
+ * followed by the remapped sector. Then reset the starting lsn and
+ * count, and keep going with the rest of the request as if it were
+ * a whole new request. If any of the INIT_IO's return an error,
+ * call the remapper to relocate the bad sector(s).
+ */
+static int bbr_io_process_request(struct bbr_io_buffer * bbr_io_buf)
+{
+	struct bbr_private * bbr_id = bbr_io_buf->bbr_id;
+	u64 starting_lsn = bbr_io_buf->bh->b_rsector;
+	u64 count = bbr_io_buf->bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	u64 lsn, remapped_lsn;
+	char * buffer = bbr_io_buf->bh->b_data;
+	int rc = 0, rw = bbr_io_buf->rw;
+
+	/* For each sector in this request, check if this sector has already
+	 * been remapped. If so, process all previous sectors in this request,
+	 * followed by the remapped sector. Then reset the starting lsn and
+	 * count and keep going with the rest of the request as if it were
+	 * a whole new request.
+	 */
+	for ( lsn = 0; lsn < count && !(bbr_id->flag & BBR_STOP_REMAP); lsn++ ) {
+		remapped_lsn = starting_lsn + lsn;
+		rc = bbr_remap(bbr_id, &remapped_lsn);
+		if (!rc) {
+			/* This sector is fine. */
+			continue;
+		}
+
+		/* Process all sectors in the request up to this one. */
+		if ( lsn > 0 ) {
+			rc = INIT_IO(bbr_id->source, rw,
+				     starting_lsn, lsn, buffer);
+			if (rc) {
+				/* If this I/O failed, then one of the sectors
+				 * in this request needs to be relocated.
+				 */
+				rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
+							lsn, buffer);
+				if (rc) {
+					return rc;
+				}
+			}
+			buffer += (lsn << EVMS_VSECTOR_SIZE_SHIFT);
+		}
+
+		/* Process the remapped sector. */
+		rc = INIT_IO(bbr_id->source, rw, remapped_lsn, 1, buffer);
+		if (rc) {
+			/* BUGBUG - Need more processing if this caused an
+			 * an error. If this I/O failed, then the existing
+			 * remap is now bad, and we need to find a new remap.
+			 * Can't use bbr_io_remap_error(), because the existing
+			 * map entry needs to be changed, not added again, and
+			 * the original table entry also needs to be changed.
+			 */
+			return rc;
+		}
+
+		buffer		+= EVMS_VSECTOR_SIZE;
+		starting_lsn	+= (lsn + 1);
+		count		-= (lsn + 1);
+		lsn		= -1;
+	}
+
+	/* Check for any remaining sectors after the last split. This could
+	 * potentially be the whole request, but that should be a rare case
+	 * because requests should only be processed by the thread if we know
+	 * an error occurred or they contained one or more remapped sectors.
+	 */
+	if ( count ) {
+		rc = INIT_IO(bbr_id->source, rw, starting_lsn, count, buffer);
+		if (rc) {
+			/* If this I/O failed, then one of the sectors in this
+			 * request needs to be relocated.
+			 */
+			rc = bbr_io_remap_error(bbr_id, rw, starting_lsn,
+						count, buffer);
+			if (rc) {
+				return rc;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * bbr_io_handler
+ *
+ * This is the handler for the bbr_io_thread. It continuously loops,
+ * taking I/O requests off its list and processing them. If nothing
+ * is on the list, the thread goes back to sleep until specifically
+ * woken up.
+ *
+ * I/O requests should only be sent to this thread if we know that:
+ * a) the request contains at least one remapped sector.
+ *   or
+ * b) the request caused an error on the normal I/O path.
+ * This function uses synchronous I/O, so sending a request to this
+ * thread that doesn't need special processing will cause severe
+ * performance degredation.
+ */
+static void bbr_io_handler(void * void_data)
+{
+	struct bbr_io_buffer * bbr_io_buf;
+	struct buffer_head * bh;
+	unsigned long flags;
+	int rc = 0;
+
+	while (1) {
+		/* Process bbr_io_list, one entry at a time. */
+		spin_lock_irqsave(&bbr_io_list_lock, flags);
+		if (list_empty(&bbr_io_list)) {
+			/* No more items on the list. */
+			spin_unlock_irqrestore(&bbr_io_list_lock, flags);
+			break;
+		}
+		bbr_io_buf = list_entry(bbr_io_list.next,
+					struct bbr_io_buffer, bbr_io_list);
+		list_del(&bbr_io_buf->bbr_io_list);
+		spin_unlock_irqrestore(&bbr_io_list_lock, flags);
+
+		rc = bbr_io_process_request(bbr_io_buf);
+
+		/* Clean up and complete the original I/O. */
+		bh = bbr_io_buf->bh;
+		if (bh->b_end_io) {
+			free_bbr_io_buf(bbr_io_buf);
+			evms_cs_volume_request_in_progress(bh->b_rdev, -1, NULL);
+			bh->b_end_io(bh, rc ? 0 : 1);
+		} else {
+			/* A request that originated from bbr_init_io. */
+			bbr_io_buf->rc = rc;
+			complete(bbr_io_buf->complete);
+		}
+	}
+}
+
+/**
+ * bbr_schedule_io
+ *
+ * Place the specified bbr_io_buf on the thread's processing list.
+ */
+static void bbr_schedule_io(struct bbr_io_buffer * bbr_io_buf)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&bbr_io_list_lock, flags);
+	list_add_tail(&bbr_io_buf->bbr_io_list, &bbr_io_list);
+	spin_unlock_irqrestore(&bbr_io_list_lock, flags);
+	evms_cs_wakeup_thread(bbr_io_thread);
+}
+
+/**
+ * bbr_read
+ *
+ * If there are any remapped sectors on this object, send this request over
+ * to the thread for processing. Otherwise send it down the stack normally.
+ */
+static void bbr_read(struct evms_logical_node * bbr_node,
+		     struct buffer_head * bh )
+{
+        struct bbr_private * bbr_id = bbr_node->private;
+	struct bbr_io_buffer * bbr_io_buf;
+
+	if ( bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) >
+	     bbr_node->total_vsectors ) {
+		/* Request is off the end of the object. */
+		bh->b_end_io(bh, 0);
+		return;
+	}
+
+	if ( atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
+	     bbr_id->flag & BBR_STOP_REMAP ||
+	     ! bbr_remap_probe(bbr_id, bh->b_rsector,
+			       bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) ) {
+		/* No existing remaps, this request doesn't contain any
+		 * remapped sectors, or the engine told us not to remap.
+		 */
+		R_IO(bbr_id->source, bh);
+		return;
+	}
+
+	/* This request has at least one remapped sector. */
+	bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, READ);
+	if (!bbr_io_buf) {
+		/* Can't get memory to track the I/O. */
+		bh->b_end_io(bh, 0);
+		return;
+	}
+
+	evms_cs_volume_request_in_progress(bbr_io_buf->bh->b_rdev, +1, NULL);
+	bbr_schedule_io(bbr_io_buf);
+}
+
+/**
+ * bbr_write_callback
+ *
+ * This is the callback for normal write requests. Check for an error
+ * during the I/O, and send to the thread for processing if necessary.
+ */
+static void bbr_write_callback(struct buffer_head * bh,
+			       int uptodate)
+{
+	struct bbr_io_buffer * bbr_io_buf = bh->b_private;
+
+	bh->b_end_io = bbr_io_buf->org_end_io;
+	bh->b_private = bbr_io_buf->org_private;
+	bh->b_rsector = bbr_io_buf->org_rsector;
+	bh->b_rdev = bbr_io_buf->org_dev;
+
+	if (!(bbr_io_buf->bbr_id->flag & BBR_STOP_REMAP) &&
+	    !uptodate) {
+		LOG_ERROR("Object %s: Write failure on sector ("PFU64"). Scheduling for retry.\n",
+			bbr_io_buf->bbr_id->node->name, (u64)bbr_io_buf->bh->b_rsector);
+		bbr_schedule_io(bbr_io_buf);
+	} else {
+		free_bbr_io_buf(bbr_io_buf);
+		evms_cs_volume_request_in_progress(bh->b_rdev, -1, NULL);
+		bh->b_end_io(bh, uptodate);
+	}
+}
+
+/**
+ * bbr_write
+ *
+ * If there are any remapped sectors on this object, send the request over
+ * to the thread for processing. Otherwise, register for callback
+ * notification, and send the request down normally.
+ */
+static void bbr_write(struct evms_logical_node * bbr_node,
+		      struct buffer_head * bh)
+{
+        struct bbr_private * bbr_id = bbr_node->private;
+	struct bbr_io_buffer * bbr_io_buf;
+
+	if ( bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) >
+	     bbr_node->total_vsectors ||
+	     bbr_id->flag & EVMS_VOLUME_READ_ONLY ) {
+		/* Request is off the end of the object, or this
+		 * is a read-only object.
+		 */
+		bh->b_end_io(bh, 0);
+		return;
+	}
+
+	bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, WRITE);
+	if (!bbr_io_buf) {
+		/* Can't get memory to track the I/O. */
+		bh->b_end_io(bh, 0);
+		return;
+	}
+
+	evms_cs_volume_request_in_progress(bh->b_rdev, +1, NULL);
+
+	if ( atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
+	     bbr_id->flag & BBR_STOP_REMAP ||
+	     ! bbr_remap_probe(bbr_id, bh->b_rsector,
+			       bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) ) {
+		/* No existing remaps, this request contains no remapped
+		 * sectors, or the engine said to stop remapping.
+		 */
+		bbr_io_buf->org_end_io = bh->b_end_io;
+		bbr_io_buf->org_private = bh->b_private;
+		bbr_io_buf->org_rsector = bh->b_rsector;
+		bbr_io_buf->org_dev = bh->b_rdev;
+		bh->b_end_io = bbr_write_callback;
+		bh->b_private = bbr_io_buf;
+		W_IO(bbr_id->source, bh);
+	} else {
+		/* This request contains at least one remapped sector. */
+		bbr_schedule_io(bbr_io_buf);
+	}
+}
+
+/**
+ * bbr_init_io_schedule_io
+ * @bbr_id:	Private data for the BBR node.
+ * @rw:		READ or WRITE.
+ * @lsn:	Starting sector for the request.
+ * @count:	Number of sectors in the request.
+ * @buffer:	Data buffer for the request.
+ *
+ * During init_io, failures must still be handled by the I/O thread. Create
+ * a bbr_io_buf, and schedule it to be handled by the thread. Then wait until
+ * the request is complete.
+ */
+static int bbr_init_io_schedule_io(struct bbr_private * bbr_id,
+				   int rw,
+				   u64 lsn,
+				   u64 count,
+				   void * buffer)
+{
+	struct bbr_io_buffer * bbr_io_buf;
+	struct buffer_head bh;
+	struct completion complete;
+	int rc = 0;
+
+	if ( rw != WRITE ) {
+		/* Nothing can be done about read failures. */
+		return -EIO;
+	}
+
+	LOG_ERROR("Object %s: init_io write failure (sector "PFU64": count "PFU64"). Scheduling for retry.\n",
+		  bbr_id->node->name, lsn, count);
+	bbr_io_buf = allocate_bbr_io_buf(bbr_id, &bh, rw);
+	if (!bbr_io_buf) {
+		return -ENOMEM;
+	}
+
+	memset(&bh, 0, sizeof(struct buffer_head));
+	init_waitqueue_head(&bh.b_wait);
+	bh.b_rsector = lsn;
+	bh.b_size = count << EVMS_VSECTOR_SIZE_SHIFT;
+	bh.b_data = buffer;
+	bh.b_end_io = NULL;
+
+	/* Schedule the I/O and wait for it to finish. */
+	bbr_io_buf->complete = &complete;
+	init_completion(bbr_io_buf->complete);
+	bbr_schedule_io(bbr_io_buf);
+	wait_for_completion(bbr_io_buf->complete);
+
+	rc = bbr_io_buf->rc;
+	free_bbr_io_buf(bbr_io_buf);
+
+	return rc;
+}
+
+/**
+ * bbr_init_io
+ * @bbr_node:	BBR node.
+ * @rw:		READ or WRITE.
+ * @lsn:	Starting sector for I/O request.
+ * @count:	Number of sectors in the I/O request.
+ * @buffer:	Data buffer for the I/O request.
+ *
+ * Synchronous I/O requests.
+ */
+static int bbr_init_io(struct evms_logical_node * bbr_node,
+		       int rw,
+		       u64 start_lsn,
+		       u64 count,
+		       void * buffer )
+{
+        struct bbr_private * bbr_id = bbr_node->private;
+	u64 lsn;
+	int rc = 0;
+
+	if ( start_lsn + count > bbr_node->total_vsectors ) {
+		/* Request is off the end of the object. */
+		return -EINVAL;
+	}
+
+	if ( rw == WRITE && (bbr_id->flag & EVMS_VOLUME_READ_ONLY) ) {
+		/* Can't write to a read-only object. */
+		return -EINVAL;
+	}
+
+	if ( bbr_id->flag & BBR_STOP_REMAP ||
+	     atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
+	     ! bbr_remap_probe(bbr_id, start_lsn, count) ) {
+		/* Normal case (no existing remaps). */
+		rc = INIT_IO(bbr_id->source, rw, start_lsn, count, buffer);
+		if (rc && ! (bbr_id->flag & BBR_STOP_REMAP) ) {
+			/* Init_io error. Send request over to
+			 * thread for further processing.
+			 */
+			rc = bbr_init_io_schedule_io(bbr_id, rw, start_lsn,
+						     count, buffer);
+		}
+	} else {
+		/* At least one sector in this request needs to be remapped.
+		 * Test and send each one down individually.
+		 */
+		for ( lsn = start_lsn;
+		      lsn < start_lsn + count;
+		      lsn++, buffer += EVMS_VSECTOR_SIZE ) {
+			bbr_remap(bbr_id, &lsn);
+			rc = INIT_IO(bbr_id->source, rw, lsn, 1, buffer);
+			if (rc) {
+				/* Init_io error. Send request
+				 * to thread for processing.
+				 */
+				rc = bbr_init_io_schedule_io(bbr_id, rw,
+							     lsn, 1, buffer);
+				if (rc) {
+					break;
+				}
+			}
+		}
+	}
+
+	return rc;
+}
+
+/**
+ * bbr_direct_ioctl_sector_io
+ *
+ * Process an I/O from the engine on an active BBR object.
+ */
+static int bbr_direct_ioctl_sector_io(struct bbr_private * bbr_id,
+				      struct evms_notify_bbr * notify)
+{
+	char * buffer, * user_buffer;
+	u64 lsn;
+	int rc = 0;
+
+	buffer = kmalloc(EVMS_VSECTOR_SIZE, GFP_NOIO);
+	if (!buffer) {
+		return -ENOMEM;
+	}
+
+	user_buffer = (char*)notify->buffer;
+
+	for ( lsn = 0;
+	      lsn < notify->nr_sect;
+	      lsn++, user_buffer += EVMS_VSECTOR_SIZE ) {
+		if ( notify->rw == WRITE ) {
+			if ( copy_from_user(buffer, user_buffer,
+					    EVMS_VSECTOR_SIZE) ) {
+				rc = -EFAULT;
+				break;
+			}
+		}
+
+		rc = bbr_init_io(bbr_id->node, notify->rw,
+				 notify->start_sect + lsn, 1, buffer);
+		if (rc) {
+			break;
+		}
+
+		if ( notify->rw == READ ) {
+			if ( copy_to_user(user_buffer, buffer,
+					  EVMS_VSECTOR_SIZE) ) {
+				rc = -EFAULT;
+				break;
+			}
+		}
+	}
+
+	kfree(buffer);
+	return rc;
+}
+
+/**
+ * bbr_direct_ioctl
+ * @inode:	N/A
+ * @file:	N/A
+ * @cmd:	N/A
+ * @arg:	Pointer to an evms_plugin_ioctl_pkt.
+ *
+ * BBR-specific ioctls from the engine. Currently handles:
+ *   BBR_STOP_REMAP_CMD
+ *   BBR_GET_INFO_CMD
+ *   BBR_SECTOR_IO_CMD
+ */
+static int bbr_direct_ioctl(struct inode * inode,
+			    struct file * file,
+			    unsigned int cmd,
+			    unsigned long arg)
+{
+	int rc = 0;
+	struct bbr_private * bbr_id;
+	struct evms_plugin_ioctl_pkt pkt, * user_pkt;
+	struct evms_notify_bbr notify, * user_notify;
+
+	MOD_INC_USE_COUNT;
+
+	user_pkt = (struct evms_plugin_ioctl_pkt *)arg;
+	if ( copy_from_user(&pkt, user_pkt, sizeof(pkt)) ) {
+		MOD_DEC_USE_COUNT;
+		return -EFAULT;
+	}
+
+	if ( pkt.feature_id != plugin_header.id ) {
+		MOD_DEC_USE_COUNT;
+		return -EINVAL;
+	}
+
+	user_notify = (struct evms_notify_bbr *)pkt.feature_ioctl_data;
+	if ( copy_from_user(&notify, user_notify, sizeof(notify)) ) {
+		rc = -EFAULT;
+	} else {
+		bbr_id = bbr_find_private(notify.object_name);
+		if (!bbr_id) {
+			rc = -ENODEV;
+		} else {
+
+			switch(pkt.feature_command) {
+
+			case BBR_STOP_REMAP_CMD:
+				bbr_id->flag |= BBR_STOP_REMAP;
+				/* Fall through. */
+
+			case BBR_GET_INFO_CMD:
+				notify.count = atomic_read(&bbr_id->in_use_replacement_blks);
+				if ( copy_to_user(&user_notify->count,
+						  &notify.count,
+						  sizeof(user_notify->count))) {
+					rc = -EFAULT;
+				}
+				break;
+
+			case BBR_SECTOR_IO_CMD:
+				rc = bbr_direct_ioctl_sector_io(bbr_id,
+								&notify);
+				break;
+
+			default:
+				rc = -ENOSYS;
+			}
+		}
+	}
+
+	pkt.status = rc;
+	copy_to_user(user_pkt, &pkt, sizeof(pkt));
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/**
+ * bbr_ioctl
+ * @bbr_node:	BBR node.
+ * @inode:	N/A
+ * @file:	N/A
+ * @cmd:	ioctl command to process.
+ * @arg:	ioctl-specific data pointer.
+ *
+ * IOCTL handler. Currently BBR handles plugin-specific ioctls, as well as
+ * EVMS_GET_BMAP. All others are passed to the child node.
+ */
+static int bbr_ioctl (struct evms_logical_node * bbr_node,
+		      struct inode * inode,
+		      struct file * file,
+		      unsigned int cmd,
+		      unsigned long arg)
+{
+        struct bbr_private * bbr_id = bbr_node->private;
+	struct evms_get_bmap_pkt * bmap;
+        int rc = 0;
+
+        switch (cmd) {
+		case EVMS_PLUGIN_IOCTL:
+			rc = bbr_direct_ioctl(inode, file, cmd, arg);
+			break;
+
+		case EVMS_GET_BMAP:
+			bmap = (struct evms_get_bmap_pkt *)arg;
+			bbr_remap(bbr_id, &bmap->rsector);
+			/* fall thru */
+	
+		default:
+			rc = IOCTL(bbr_id->source, inode, file, cmd, arg);
+        }
+        return rc;
+}
+
+static int __init bbr_init(void)
+{
+        return evms_cs_register_plugin(&plugin_header);
+}
+
+static void __exit bbr_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(bbr_init);
+module_exit(bbr_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
+
diff -Naur linux-2002-09-30/drivers/evms/evms_drivelink.c evms-2002-09-30/drivers/evms/evms_drivelink.c
--- linux-2002-09-30/drivers/evms/evms_drivelink.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/evms_drivelink.c	Fri Sep 13 16:09:55 2002
@@ -0,0 +1,1274 @@
+/* -*- linux-c -*- 
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+/*
+ * linux/drivers/evms/drvlink.c
+
+ *
+ * EVMS Drive Linking Feature.
+ *
+ * This feature provides the ability to link multiple storage objects
+ * together as a single virtual storage object.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/genhd.h>
+#include <linux/blk.h>
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_drivelink.h>
+#include <asm/uaccess.h>
+
+#define LOG_PREFIX "drivelink: "
+
+/* prototypes for mandatory plugin interface functions */
+static int drivelink_discover(struct evms_logical_node **);
+static int drivelink_delete(struct evms_logical_node *);
+static void drivelink_read(struct evms_logical_node *, struct buffer_head *);
+static void drivelink_write(struct evms_logical_node *, struct buffer_head *);
+static int drivelink_ioctl(struct evms_logical_node *,
+			   struct inode *,
+			   struct file *, unsigned int, unsigned long);
+static int drivelink_init_io(struct evms_logical_node *,
+			     int, u64, u64, void *);
+
+/* plugin function table definition */
+static struct evms_plugin_fops fops = {
+	.discover	= drivelink_discover,
+	.delete		= drivelink_delete,
+	.read		= drivelink_read,
+	.write		= drivelink_write,
+	.init_io	= drivelink_init_io,
+	.ioctl		= drivelink_ioctl
+};
+
+/* plugin header definition */
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_FEATURE,
+			  EVMS_DRIVELINK_FEATURE_ID),
+	.version = {
+		.major		= 2,
+		.minor		= 0,
+		.patchlevel	= 1
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &fops
+};
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Discover function & Support routines            */
+/********************************************************/
+
+/**
+ * le_feature_data_to_cpu:
+ * @md: 	drivelink metadata
+ * 
+ * convert feature data from on-disk (Little Endian) format
+ * to the native cpu endian format.
+**/
+static void
+le_feature_data_to_cpu(struct evms_drivelink_metadata *md)
+{
+	int i;
+
+	md->signature = le32_to_cpup(&md->signature);
+	md->crc = le32_to_cpup(&md->crc);
+	md->version.major = le32_to_cpup(&md->version.major);
+	md->version.minor = le32_to_cpup(&md->version.minor);
+	md->version.patchlevel = le32_to_cpup(&md->version.patchlevel);
+	md->flags = le32_to_cpup(&md->flags);
+	md->sequence_number = le64_to_cpup(&md->sequence_number);
+	md->child_serial_number = le64_to_cpup(&md->child_serial_number);
+	md->parent_serial_number = le64_to_cpup(&md->parent_serial_number);
+	md->child_count = le64_to_cpup(&md->child_count);
+	for (i = 0; i < EVMS_DRIVELINK_MAX_ENTRIES; i++) {
+		struct evms_dl_ordering_table_entry *child_entry;
+
+		child_entry = &md->ordering_table[i];
+		child_entry->child_serial_number =
+		    le64_to_cpup(&child_entry->child_serial_number);
+		child_entry->child_vsize =
+		    le64_to_cpup(&child_entry->child_vsize);
+	}
+}
+
+/**
+ * load_feature_data:	load a feature header from disk
+ * @node: 	storage object
+ * @md: 	ptr to drivelink metadata
+ * 
+ * loads and verifies redundant copies of drivelink metadata. @md is modified
+ * and returned to the caller.
+ * 
+ * Return value: 0 on success
+ *		 Otherwise error code
+**/
+static int
+load_feature_data(struct evms_logical_node *node,
+		  struct evms_drivelink_metadata **md)
+{
+	int i, rc = 0, rc_array[2] = { 0, 0 }, size_in_bytes;
+	u64 real_metadata_size, feature_data_size;
+	u64 starting_sector;
+	struct evms_drivelink_metadata *cur_md, *md1, *md2 = NULL;
+	char *location_name;
+
+	/* verify the feature metadata size from the  */
+	/* feature header agrees with the real size   */
+	/* of the current metadata structure.         */
+	real_metadata_size = evms_cs_size_in_vsectors(sizeof (**md));
+
+	/* allocate a buffer large enough to hold all */
+	/* sectors containing the feature's metadata  */
+	size_in_bytes = real_metadata_size * EVMS_VSECTOR_SIZE;
+	md1 = kmalloc(size_in_bytes, GFP_KERNEL);
+	if (md1) {
+		md2 = kmalloc(size_in_bytes, GFP_KERNEL);
+		if (!md2) {
+			kfree(md1);
+			rc = -ENOMEM;
+		}
+	} else {
+		rc = -ENOMEM;
+	}
+	if (!rc) {
+		for (i = 0; i < 2; i++) {
+			if (i == 0) {
+				starting_sector =
+				    node->feature_header->
+				    feature_data1_start_lsn;
+				feature_data_size =
+				    node->feature_header->feature_data1_size;
+				cur_md = md1;
+				location_name = evms_primary_string;
+			} else {
+				starting_sector =
+				    node->feature_header->
+				    feature_data2_start_lsn;
+				feature_data_size =
+				    node->feature_header->feature_data2_size;
+				cur_md = md2;
+				location_name = evms_secondary_string;
+			}
+			/* check that real metadata size matches the  */
+			/* feature data size                          */
+			if (real_metadata_size != feature_data_size) {
+				LOG_ERROR
+				    ("%s feature data size("PFU64" bytes) doesn't match expected size("PFU64" bytes).\n",
+				     location_name,
+				     feature_data_size <<
+				     EVMS_VSECTOR_SIZE_SHIFT,
+				     real_metadata_size <<
+				     EVMS_VSECTOR_SIZE_SHIFT);
+				rc = -EINVAL;
+				rc_array[i] = rc;
+				continue;
+			}
+			/* load the node's feature data */
+			rc = INIT_IO(node,
+				     0,
+				     starting_sector,
+				     feature_data_size, cur_md);
+			if (rc) {
+				LOG_ERROR
+				    ("error(%d) probing for %s feature data at sector("PFU64") on '%s'.\n",
+				     rc, location_name, starting_sector,
+				     node->name);
+				rc_array[i] = rc;
+				continue;
+			}
+			/* check for valid metadata signature */
+			if (le32_to_cpup(&cur_md->signature) !=
+			    EVMS_DRIVELINK_SIGNATURE) {
+				rc = -ENODATA;
+				LOG_SERIOUS
+				    ("error(%d) invalid signature in %s feature data on '%s'\n",
+				     rc, location_name, node->name);
+				rc_array[i] = rc;
+				continue;
+			}
+			/* validate feature data CRC */
+			if (cur_md->crc != EVMS_MAGIC_CRC) {
+				int org_crc, final_crc;
+				org_crc = le32_to_cpup(&cur_md->crc);
+				cur_md->crc = 0;
+				final_crc =
+				    evms_cs_calculate_crc(EVMS_INITIAL_CRC,
+							  cur_md,
+							  sizeof (*cur_md));
+				if (final_crc != org_crc) {
+					LOG_ERROR
+					    ("CRC mismatch error [stored(%x), computed(%x)] in %s feature data on '%s'.\n",
+					     org_crc, final_crc, location_name,
+					     node->name);
+					rc = -EINVAL;
+					rc_array[i] = rc;
+					continue;
+				}
+			} else {
+				LOG_WARNING
+				    ("CRC disabled in %s feature data on '%s'.\n",
+				     location_name, node->name);
+			}
+			/* convert feature data from on-disk
+			 * format (Little Endian) to native
+			 * cpu endian format.
+			 */
+			le_feature_data_to_cpu(cur_md);
+			/* check for valid structure version */
+			rc = evms_cs_check_version(&metadata_ver,
+						   &cur_md->version);
+			if (rc) {
+				LOG_SERIOUS
+				    ("error(%d) obsolete version detected: actual(%d,%d,%d), requires(%d,%d,%d) in %s feature data on '%s'\n",
+				     rc, cur_md->version.major,
+				     cur_md->version.minor,
+				     cur_md->version.patchlevel,
+				     DRIVELINK_METADATA_MAJOR,
+				     DRIVELINK_METADATA_MINOR,
+				     DRIVELINK_METADATA_PATCHLEVEL,
+				     location_name, node->name);
+				rc_array[i] = rc;
+			}
+		}
+		/* getting same return code for both copies? */
+		if (rc_array[0] == rc_array[1]) {
+			rc = rc_array[0];
+			/* if no errors on both copies,
+			 * check the sequence numbers.
+			 * use the highest sequence number.
+			 */
+			if (!rc) {
+				/* compare sequence numbers */
+				if (md1->sequence_number ==
+				    md2->sequence_number) {
+					cur_md = md1;
+				} else {
+					LOG_WARNING
+					    ("sequence number mismatches between front("PFU64") and rear("PFU64") feature data copies on node(%s)!\n",
+					     md2->sequence_number,
+					     md1->sequence_number, node->name);
+					if (md1->sequence_number >
+					    md2->sequence_number)
+						cur_md = md1;
+					else
+						cur_md = md2;
+					LOG_WARNING
+					    ("using %s feature data copy!\n",
+					     (cur_md ==
+					      md1) ? evms_primary_string :
+					     evms_secondary_string);
+				}
+			}
+			/* getting different return codes for each copy */
+		} else if (rc_array[0] == 0) {
+			/* use 1st (rear) copy if its good */
+			rc = 0;
+			cur_md = md1;
+		} else if (rc_array[1] == 0) {
+			/* use 2nd (front) copy if its good */
+			rc = 0;
+			cur_md = md2;
+		} else if ((rc_array[0] == -EINVAL) || (rc_array[1] == -EINVAL)) {
+			/* fail if either give a fatal error */
+			rc = -EINVAL;
+			cur_md = NULL;
+		}
+
+		/* deallocate metadata buffers appropriately */
+		if (rc || (cur_md == md1))
+			kfree(md2);
+		if (rc || (cur_md == md2))
+			kfree(md1);
+
+		/* save validated feature header pointer */
+		if (!rc)
+			*md = cur_md;
+	}
+	return (rc);
+}
+
+/**
+ * find_parent_node_for_child_node: finds or creates a parent node for this child node
+ * @child_node: 	input, child node
+ * @md: 		input, on-disk metadata
+ * @parent_node: 	output, parent node
+ * @dl_private: 	output, runtime metadata
+ * @discover_list: 	input/output, list of objects being discovered
+ * 
+ * finds or creates a parent node for the specified child node. if the parent node is
+ * created, create and initialize the parent's private data area.
+ * 
+ * Return value: 0 on success
+ *		 Otherwise error code.
+**/
+static int
+find_parent_node_for_child_node(struct evms_logical_node *child_node,
+				struct evms_drivelink_metadata *md,
+				struct evms_logical_node **parent_node,
+				struct runtime_data **dl_private,
+				struct evms_logical_node **discover_list)
+{
+	int rc = 0, parent_found = FALSE;
+	struct evms_logical_node *parent = NULL;
+	struct runtime_data *rd = NULL;
+
+	/* find the parent node for this child */
+	for (parent = *discover_list; parent; parent = parent->next) {
+		/* only parent nodes will have null feature headers */
+		if (!parent->feature_header) {
+			rd = (struct runtime_data *) parent->private;
+			if (rd->parent_sn == md->parent_serial_number) {
+				parent_found = TRUE;
+				break;
+			}
+		}
+	}
+	/* if no parent node found, create it */
+	if (parent_found == FALSE) {
+		rc = evms_cs_allocate_logical_node(&parent);
+		if (!rc) {
+			/* transpose info from child to parent */
+			parent->flags |= child_node->flags;
+			strcpy(parent->name,
+			       child_node->feature_header->object_name);
+			/* copy evms system data to parent */
+			parent->volume_info = child_node->volume_info;
+			/* initialize the plugin id field */
+			parent->plugin = &plugin_header;
+			/* allocate parent's instance data */
+			parent->private = kmalloc(sizeof(*rd), GFP_KERNEL);
+			if (!parent->private)
+				rc = -ENOMEM;
+		}
+		if (!rc) {
+			/* initialize some instance data fields */
+			rd = (struct runtime_data *) parent->private;
+			rd->block_size = 0;
+			rd->parent_sn = md->parent_serial_number;
+			rd->child_count = md->child_count;
+			/* allocate the child table */
+			rd->child_table = kmalloc(sizeof(struct runtime_entry) *
+						  rd->child_count, GFP_KERNEL);
+			if (!rd->child_table)
+				rc = -ENOMEM;
+		}
+		if (!rc) {
+			memset(rd->child_table, 0,
+				sizeof(struct runtime_entry) * rd->child_count);
+			/* add the parent node to the discover list */
+			rc = evms_cs_add_logical_node_to_list(discover_list,
+							      parent);
+			MOD_INC_USE_COUNT;
+		}
+		/* if any errors encountered, try to clean up */
+		if (rc) {
+			LOG_SERIOUS("find_parent_node: rc(%d) from '%s'\n",
+				    rc, child_node->name);
+			if (parent) {
+				DELETE(parent);
+				parent = NULL;
+				rd = NULL;
+			}
+		}
+	}
+
+	*dl_private = rd;
+	*parent_node = parent;
+
+	return (rc);
+}
+
+/**
+ * compute_child_index: compute the index for a specific child node
+ * @node: 	the child node
+ * @md: 	the drivelink on-disk metadata
+ * 
+ * compute and return and 0-based index value of this child node's position
+ * in the parent node's ordering table.
+ * 
+ * Return value: -1 on error
+ *		 otherwise the index of the specified child.
+**/
+static int
+compute_child_index(struct evms_logical_node *node,
+		    struct evms_drivelink_metadata *md)
+{
+	int i, position = -1;
+
+	for (i = 0; i < md->child_count; i++) {
+		if (md->ordering_table[i].child_serial_number ==
+		    md->child_serial_number) {
+			position = i;
+			break;
+		}
+	}
+	if (position == -1) {
+		LOG_SERIOUS("%s: child not found from '%s'\n",
+			    __FUNCTION__, node->name);
+	}
+	return (position);
+}
+
+/**
+ * process_child_nodes: perform the discovery operation on each child node
+ * @discover_list: 	the list of potential child objects
+ * 
+ * search the discovery list of drivelink child nodes. for each node found,
+ * perform the discovery operation on it.
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+process_child_nodes(struct evms_logical_node **discover_list)
+{
+	int rc = 0, index = -1;
+	struct evms_logical_node *node, *next_node, *parent;
+	struct evms_drivelink_metadata *md;
+	struct runtime_data *rd;
+	struct runtime_entry *child_entry = NULL;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+		if ((!node->feature_header) ||
+		    (node->feature_header->feature_id != plugin_header.id)) {
+			continue;
+		}
+
+		rc = evms_cs_remove_logical_node_from_list(discover_list, node);
+		if (rc)
+			BUG();
+		/* we need to load the feature data to   */
+		/* find the parent's serial number this  */
+		/* child node belongs to.                */
+		md = NULL;
+		rc = load_feature_data(node, &md);
+		if (!rc) {
+			/* find the parent node for this child */
+			parent = NULL;
+			rc = find_parent_node_for_child_node(node, md,
+							     &parent, &rd,
+							     discover_list);
+		}
+		if (!rc) {
+			/* determine position of child in drive link object */
+			index = compute_child_index(node, md);
+			if (index == -1)
+				rc = index;
+		}
+		if (!rc) {
+			/* check for multiple child index requests */
+			child_entry =
+			    (struct runtime_entry *) &rd->child_table[index];
+			/* check to see if this child index is 
+			 * already in use.
+			 */
+			if (child_entry->child_node) {
+				LOG_SERIOUS
+				    ("attempt to put '%s' in child index(%d). Already occupied by '%s'.\n",
+				     node->name, index,
+				     child_entry->child_node->name);
+				rc = -1;
+			}
+		}
+		if (!rc) {
+			/* fill in child info in parent */
+
+			/* check the sector size for this node */
+			if (node->hardsector_size > parent->hardsector_size)
+				parent->hardsector_size = node->hardsector_size;
+			/* check the block size for this node */
+			if (node->block_size > parent->block_size)
+				parent->block_size = node->block_size;
+			/* set the child node */
+			child_entry->child_node = node;
+			/* set the metadata for this node */
+			child_entry->child_metadata = md;
+		}
+
+		/* on error, clean up accordingly */
+		if (rc) {
+			if (md)
+				kfree(md);
+			LOG_SERIOUS("%s: rc(%d) from '%s'\n",
+				    __FUNCTION__, rc, node->name);
+			LOG_SERIOUS("deleting child node '%s'.\n", node->name);
+			rc = DELETE(node);
+			if (rc) {
+				LOG_SERIOUS
+				    ("error(%d) attempting to delete '%s'.\n",
+				     rc, node->name);
+			}
+		}
+	}
+
+	/* errors are handled internal to this function */
+	/* by deleting the failed node. This will get   */
+	/* picked up by finalize_parent_nodes as a      */
+	/* missing child node                           */
+	return (0);
+}
+
+#define TEST_CHILD_PRESENCE		0
+#define TEST_CHILD_COUNT		1
+#define TEST_CHILD_PARENTS_SERIAL_NUM	2
+#define TEST_CHILD_POSITION		3
+#define TEST_CHILD_METADATA		4
+
+/**
+ * test_parent_node: verify that a parent is complete
+ * @node: 	specified parent node
+ * 
+ * verify that the parent node has all of its child nodes accounted for.
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+test_parent_node(struct evms_logical_node *node)
+{
+	int i, rc = 0;
+	struct runtime_data *rd;
+	struct runtime_entry *child_entry;
+
+	rd = (struct runtime_data *) node->private;
+	for (i = 0; i < rd->child_count; i++) {
+		child_entry = (struct runtime_entry *) &rd->child_table[i];
+
+		/* insure each child entry is filled */
+		if (!child_entry->child_node) {
+			node->flags |=
+			    EVMS_VOLUME_SET_READ_ONLY | EVMS_VOLUME_PARTIAL;
+			LOG_ERROR("%s: missing child(%d).\n", __FUNCTION__, i);
+		} else
+			/* insure child count is the same */
+			/* in each child's metadata       */
+		if (child_entry->child_metadata->child_count != rd->child_count) {
+			rc = -EVMS_FEATURE_FATAL_ERROR;
+			LOG_ERROR("%s: child count wrong for node '%s'\n",
+				  __FUNCTION__, node->name);
+		} else
+			/* insure parent serial number is    */
+			/* the same in each child's metadata */
+		if (child_entry->child_metadata->parent_serial_number !=
+			    rd->parent_sn) {
+			rc = -EVMS_FEATURE_FATAL_ERROR;
+			LOG_ERROR
+			    ("%s: incorrect [is("PFU64"), should be("PFU64")] child serial number for node '%s'\n",
+			     __FUNCTION__,
+			     child_entry->child_metadata->parent_serial_number,
+			     rd->parent_sn, node->name);
+		} else
+			/* insure each is in the correct entry */
+		if (child_entry->child_metadata->ordering_table[i].
+			    child_serial_number !=
+			    child_entry->child_metadata->child_serial_number) {
+			rc = -EVMS_FEATURE_FATAL_ERROR;
+			LOG_ERROR
+			    ("%s: child reports different index for node '%s'\n",
+			     __FUNCTION__, node->name);
+		} else {
+			struct runtime_entry *other_child_entry;
+			int j, rc2;
+			/* compare the children's metadata */
+
+			/* look for another present child to 
+			 * compare against.
+			 */
+			other_child_entry = NULL;
+			for (j = 0; j < rd->child_count; j++) {
+				/* skip comparing to ourselves */
+				if (j == i) {
+					continue;
+				}
+				/* is this child is present? */
+				if (rd->child_table[j].child_node) {
+					/* yes, use it */
+					other_child_entry = &rd->child_table[j];
+					break;
+				}
+			}
+			/* if we can't find another valid
+			 * child node's metadata to compare
+			 * against, just skip this test.
+			 */
+			if (!other_child_entry) {
+				continue;
+			}
+			rc2 =
+			    memcmp(other_child_entry->child_metadata->
+				   ordering_table,
+				   child_entry->child_metadata->ordering_table,
+				   sizeof (child_entry->child_metadata->
+					   ordering_table));
+			if (rc2) {
+				rc = -EVMS_FEATURE_FATAL_ERROR;
+				LOG_ERROR
+				    ("%s: mismatching child metadata for nodes '%s' and '%s'\n",
+				     __FUNCTION__,
+				     rd->child_table[i - 1].child_node->name,
+				     child_entry->child_node->name);
+			}
+		}
+		/* stop if fatal error encountered */
+		if (rc == -EVMS_FEATURE_FATAL_ERROR) {
+			break;
+		}
+	}
+	return (rc);
+}
+
+/**
+ * perform_final_adjustments:	do final tweaks to parent node
+ * @node: 	parent node
+ * 
+ * This function does the following:
+ *           sets the vsize (in vsectors) field in each child node
+ *           sets the voffset (in vsectors) field in each child node
+ *           frees each child node's metadata
+ *           sets the parent's total size field
+**/
+static void
+perform_final_adjustments(struct evms_logical_node *node)
+{
+	int i;
+	struct runtime_data *rd;
+	struct runtime_entry *child_entry = NULL;
+	struct evms_drivelink_metadata *ref_data = NULL;
+
+	rd = (struct runtime_data *) node->private;
+	/* find a valid copy of the ordering table.
+	 * since all the ordering tables are the same
+	 * we can just pick one to use for all the
+	 * child computations.
+	 */
+	for (i = 0; i < rd->child_count; i++) {
+		child_entry = (struct runtime_entry *) &rd->child_table[i];
+		if (child_entry->child_node) {
+			ref_data = child_entry->child_metadata;
+			break;
+		}
+	}
+	/* if we got this far, there should
+	 * always be at least one valid child.
+	 */
+	if (!ref_data)
+		BUG();
+	/* compute the parent's usable size,
+	 * and construct the table used to
+	 * remap parent I/Os to child I/Os */
+	for (i = 0; i < rd->child_count; i++) {
+		child_entry = (struct runtime_entry *) &rd->child_table[i];
+		/* set the LBA count for this child node */
+		child_entry->vsize = ref_data->ordering_table[i].child_vsize;
+		/* set the start LBA value for this child node */
+		child_entry->voffset = node->total_vsectors;
+		/* keep a running total of size in sectors */
+		node->total_vsectors += child_entry->vsize;
+		/* free the metadata for this child node */
+		if (ref_data != child_entry->child_metadata) {
+			kfree(child_entry->child_metadata);
+		}
+		child_entry->child_metadata = NULL;
+		/* free the feature header for this child node */
+		if (child_entry->child_node) {
+			kfree(child_entry->child_node->feature_header);
+			child_entry->child_node->feature_header = NULL;
+		}
+	}
+	/* free the reference data */
+	kfree(ref_data);
+}
+
+/**
+ * finalize_parent_nodes: verify and prepare parent nodes
+ * @discover_list: 	list of potential drivelink parent objects
+ * 
+ * verify the completeness of each parent node. if not complete, purge the in-memory
+ * structs for this object and all its children. If complete, perform final tweaks
+ * to allow this node to useable.
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+finalize_parent_nodes(struct evms_logical_node **discover_list)
+{
+	int rc = 0, rc2;
+	struct evms_logical_node *node, *next_node;
+
+	for (node = *discover_list; node; node = next_node) {
+		next_node = node->next;
+		/* only check parent nodes */
+		if (!node->feature_header) {
+			/* valid the children of this parent */
+			rc = test_parent_node(node);
+			if (!rc) {
+				/* compute parent size and
+				 * child remap table.
+				 */
+				perform_final_adjustments(node);
+			} else {
+				/* fatal error encountered. 
+				 * cleanup from this node and
+				 * delete it from memory.
+				 */
+				evms_cs_remove_logical_node_from_list
+				    (discover_list, node);
+				rc2 = DELETE(node);
+				if (rc2) {
+					LOG_SERIOUS
+					    ("error(%d) attempting to delete '%s'.\n",
+					     rc2, node->name);
+				}
+			}
+		}
+	}
+	return (rc);
+}
+
+/**
+ * drivelink_discover: discover drivelinked storage objects
+ * @discover_list: 	the list of objects to inspect
+ * 
+ * perform the drivelink discover process on the objects in the discovery list
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+drivelink_discover(struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+
+	MOD_INC_USE_COUNT;
+	rc = process_child_nodes(discover_list);
+	if (!rc)
+		rc = finalize_parent_nodes(discover_list);
+
+	MOD_DEC_USE_COUNT;
+	return (rc);
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Delete function                                 */
+/********************************************************/
+
+/**
+ * drivelink_delete: purges a drivelink object and its children from memory
+ * @node: 	the drivelink object to delete
+ * 
+ * purge the drivelink object, its private data, and all its children from memory.
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+drivelink_delete(struct evms_logical_node *node)
+{
+	int i, rc = 0;
+	struct runtime_data *rd;
+	struct runtime_entry *child_entry;
+
+	LOG_DETAILS("deleting '%s'.\n", node->name);
+
+	rd = (struct runtime_data *) node->private;
+	if (rd) {
+		for (i = 0; i < rd->child_count; i++) {
+			child_entry = &rd->child_table[i];
+			/* delete the child node */
+			if (child_entry->child_node) {
+				rc = DELETE(child_entry->child_node);
+				if (rc)
+					break;
+				child_entry->child_node = NULL;
+			}
+			/* delete the child's metadata */
+			if (child_entry->child_metadata) {
+				kfree(child_entry->child_metadata);
+				child_entry->child_metadata = NULL;
+			}
+		}
+		if (!rc) {
+			/* delete the child table */
+			if (rd->child_table) {
+				kfree(rd->child_table);
+				rd->child_table = NULL;
+			}
+			/* delete the instance data */
+			kfree(rd);
+			node->private = NULL;
+		}
+	}
+	if (!rc) {
+		evms_cs_deallocate_logical_node(node);
+		MOD_DEC_USE_COUNT;
+	}
+
+	return (rc);
+}
+
+/**
+ * which_child: find the child node targetted by a IO to this drivelink object
+ * @parent: 		parent drivelink object
+ * @rsector: 		relative sector on the parent object
+ * @max_io_sects: 	largest IO size on the child, starting from rsector position
+ * 
+ * This function find the child node a parent rsector maps to.
+ * It then adjusts the rsector value to be child relative and
+ * optionally computes the max # of sectors that can be access
+ * from this starting point on the child. 
+ *
+ * Return value: 
+ *   The child node, the child relative rsector and max io size are 
+ * returned to the caller. On error, the returned child node will
+ * be NULL.
+**/
+static struct evms_logical_node *
+which_child(struct evms_logical_node *parent,
+	    u64 * rsector, u64 * max_io_sects)
+{
+	int i;
+	struct evms_logical_node *child = NULL;
+	struct runtime_data *rd;
+	struct runtime_entry *child_entry = NULL;
+
+	rd = (struct runtime_data *) parent->private;
+	for (i = 0; i < rd->child_count; i++) {
+		child_entry = (struct runtime_entry *) &rd->child_table[i];
+
+		if (*rsector >= child_entry->vsize) {
+			*rsector -= child_entry->vsize;
+		} else {
+			/* get the child node */
+			child = child_entry->child_node;
+			/* compute the sector count if requested */
+			if (max_io_sects)
+				/* this is only used for INIT I/O
+				 * to return the largest sector
+				 * count size for this child based
+				 * on first sector in the I/O.
+				 */
+				*max_io_sects = child_entry->vsize - *rsector;
+			break;
+		}
+	}
+	return (child);
+}
+
+/**
+ * drivelink_io_error:	log an IO error for drivelink
+ * @node: 	drivelink object
+ * @bh: 	buffer head targetting this object
+ * 
+ * this function was primarily created because the function
+ * buffer_IO_error is inline and kgdb doesn't allow breakpoints
+ * to be set on inline functions. Since this was an error path
+ * and not mainline, I decided to add a trace statement to help
+ * report on the failing condition.
+**/
+static void
+drivelink_io_error(struct evms_logical_node *node, int io_flag, struct buffer_head *bh)
+{
+	LOG_SERIOUS("%s error on '%s' remapping rsector("PFU64").\n",
+		    (io_flag) ? "WRITE" : "READ",
+		    node->name, (u64) bh->b_rsector);
+
+	bh->b_end_io(bh, 0);
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Read function & Support routines                */
+/********************************************************/
+
+/**
+ * drivelink_read: handles IO read operations to drivelink objects
+ * @node: 	drivelink object
+ * @bh: 	buffer head targetting this object
+ * 
+ * handles IO read operations to the drivelink objects. internally remaps the 
+ * drivelink relative requests to the child relative requests and then routes 
+ * it to the child for further processing.
+**/
+static void
+drivelink_read(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	struct evms_logical_node *child;
+	u64 io_size, rsector;
+
+	rsector = bh->b_rsector;
+	child = which_child(node, &rsector, &io_size);
+	if (child && ((bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= io_size)) {
+		bh->b_rsector = rsector;
+		R_IO(child, bh);
+	} else {
+		drivelink_io_error(node, READ, bh);
+	}
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Write function & Support routines               */
+/********************************************************/
+
+/**
+ * drivelink_read_write: handles IO write operations to drivelink objects
+ * @node: 	drivelink object
+ * @bh: 	buffer head targetting this object
+ * 
+ * handles IO write operations to the drivelink objects. internally remaps the 
+ * drivelink relative requests to the child relative requests and then routes 
+ * it to the child for further processing.
+**/
+static void
+drivelink_write(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	struct evms_logical_node *child;
+	u64 io_size, rsector;
+
+	rsector = bh->b_rsector;
+	child = which_child(node, &rsector, &io_size);
+	if (child && ((bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <= io_size)) {
+		bh->b_rsector = rsector;
+		W_IO(child, bh);
+	} else {
+		drivelink_io_error(node, WRITE, bh);
+	}
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Init I/O function                               */
+/********************************************************/
+
+/**
+ * drivelink_init_io: performs synchronous IO to drivelink objects
+ * @node: 	drivelink object
+ * @io_flag: 	read/write flag
+ * @sect_nr: 	starting sector, object relative (512 byte units)
+ * @num_sects: 	count of sectors
+ * @buf_addr: 	buffer address to read from/write to
+ * 
+ * This function must determine which child or children a
+ * specified I/O request must be passed to. Also if, when,
+ * and how a request must be broken up. 
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+drivelink_init_io(struct evms_logical_node *node, int io_flag,	
+		  u64 sect_nr,
+		  u64 num_sects,	
+		  void *buf_addr)
+{				
+	int rc = 0;
+
+	if (!node)
+		rc = -EINVAL;
+	else {
+		u64 starting_sector, remaining_sectors;
+		void *io_buf;
+		struct runtime_data *rd;
+
+		if ((sect_nr + num_sects) > node->total_vsectors) {
+			LOG_SERIOUS
+			    ("attempted out of bound("PFU64") %s on '%s' at sector("PFU64"), count("PFU64").\n",
+			     node->total_vsectors, (io_flag) ? "WRITE" : "READ",
+			     node->name, sect_nr, num_sects);
+			rc = -EINVAL;
+		} else {
+			rd = (struct runtime_data *) node->private;
+			/* make working copies of input parameters */
+			starting_sector = sect_nr;
+			remaining_sectors = num_sects;
+			io_buf = buf_addr;
+			/* loop until all I/O is performed */
+			while (remaining_sectors) {
+				u64 io_start, io_size;
+				struct evms_logical_node *child;
+
+				/* compute the child relative io_start
+				 * and max io_size.
+				 */
+				io_start = starting_sector;
+				child = which_child(node, &io_start, &io_size);
+				/* adjust io_size based on
+				 * original remaining sectors
+				 * in this io.
+				 */
+				if (io_size > remaining_sectors)
+					io_size = remaining_sectors;
+				if (child) {
+					rc = INIT_IO(child,
+						     io_flag,
+						     io_start, io_size, io_buf);
+				} else {
+					/* if partial volume, return 0's
+					 * for missing children.
+					 */
+					if (io_flag == READ) {
+						memset(io_buf, 0,
+						       io_size <<
+						       EVMS_VSECTOR_SIZE_SHIFT);
+					}
+				}
+				if (!rc) {
+					/* adjust working copies */
+					starting_sector += io_size;
+					remaining_sectors -= io_size;
+					io_buf += io_size <<
+					    EVMS_VSECTOR_SIZE_SHIFT;
+				} else
+					break;
+			}
+		}
+	}
+
+	return (rc);
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      IOCTL function & Support routines               */
+/********************************************************/
+
+/**
+ * drivelink_ioctl_cmd_plugin_ioctl: drivelink support for the 'plugin ioctl' command
+ * @node: 	drivelink object
+ * @inode: 	VFS supplied parameter
+ * @file: 	VFS supplied parameter
+ * @cmd: 	the specific ioctl command
+ * @arg: 	the specific ioctl arguments
+ * 
+ * this function handles 'plugin ioctl' commands. currently there is no specific
+ * commands for this plugin. however, this plugin must broadcast some commands so
+ * lower layers can receive them.
+ * 
+ * Return value: 0 on success
+ *		 otherwise error code
+**/
+static int
+drivelink_ioctl_cmd_plugin_ioctl(struct evms_logical_node *node,
+				 struct inode *inode, struct file *file,
+				 unsigned long cmd, unsigned long arg)
+{
+	int i, rc = 0;
+	struct runtime_data *rd;
+	struct evms_plugin_ioctl_pkt tmp, *user_parms;
+
+	user_parms = (struct evms_plugin_ioctl_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		rd = (struct runtime_data *) node->private;
+		/* is this cmd targetted at this feature ? */
+		if (tmp.feature_id == node->plugin->id) {
+			switch (tmp.feature_command) {
+			default:
+				break;
+			}
+		} else {	/* broadcast this cmd to all children */
+			for (i = 0; i < rd->child_count; i++) {
+				struct evms_logical_node *child_node;
+
+				child_node = rd->child_table[i].child_node;
+				if (child_node) {
+					rc = IOCTL(child_node, inode, file,
+						   cmd, arg);
+					if (rc)
+						break;
+				}
+			}
+		}
+		/* copy info to userspace */
+		if (copy_to_user(user_parms, &tmp, sizeof (tmp)))
+			rc = -EFAULT;
+	}
+	return (rc);
+}
+
+/**
+ * drivelink_ioctl_cmd_broadcast: broadcast ioctls to your children
+ * @node: 	drivelink object
+ * @inode: 	VFS supplied parameter
+ * @file: 	VFS supplied parameter
+ * @cmd: 	the specific ioctl command
+ * @arg: 	the specific ioctl arguments
+ * 
+ * broadcast the specified ioctl command and arguments to all this objects
+ * children. OR (logical opeation) the return values from all the children
+ * and return the OR'd value to the caller.
+ * 
+ * Return value: 0 on success
+ * 		 otherwise error code
+**/
+static int
+drivelink_ioctl_cmd_broadcast(struct evms_logical_node *node,
+			      struct inode *inode, struct file *file,
+			      unsigned long cmd, unsigned long arg)
+{
+	int i, rc = 0;
+	struct runtime_data *rd;
+
+	rd = (struct runtime_data *) node->private;
+	/* broadcast this cmd to all children */
+	for (i = 0; i < rd->child_count; i++) {
+		struct evms_logical_node *child_node;
+
+		child_node = rd->child_table[i].child_node;
+		if (child_node) {
+			rc |= IOCTL(child_node, inode, file, cmd, arg);
+		}
+	}
+	return (rc);
+}
+
+/**
+ * drivelink_ioctl: main ioctl entry point and handler
+ * @node:	drivelink object
+ * @inode:	VFS supplied parameter
+ * @file:	VFS supplied parameter
+ * @cmd:	a specific ioctl command
+ * @arg:	a specific ioctl argument
+ *
+ * handles specific ioctl command internally and routes other ioctls commands to
+ * the appropriate entry points.
+ *
+ * Returns: 0 on success
+ *	    otherwise error code
+ **/
+static int
+drivelink_ioctl(struct evms_logical_node *node,
+		struct inode *inode,
+		struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = 0;
+	struct runtime_data *rd = NULL;
+	struct hd_geometry hdgeo;
+
+	if ((!node) || (!inode))
+		rc = -EINVAL;
+
+	if (!rc) {
+		rd = (struct runtime_data *) node->private;
+		switch (cmd) {
+		case HDIO_GETGEO:
+			hdgeo.heads = 255;
+			hdgeo.sectors = 63;
+			hdgeo.cylinders =
+			    ((unsigned int) node->total_vsectors) /
+			    hdgeo.heads / hdgeo.sectors;
+			hdgeo.start = 0;
+			if (copy_to_user((int *) arg, &hdgeo, sizeof (hdgeo)))
+				rc = -EFAULT;
+			break;
+		case EVMS_QUIESCE_VOLUME:
+		case EVMS_GET_DISK_LIST:
+		case EVMS_CHECK_MEDIA_CHANGE:
+		case EVMS_REVALIDATE_DISK:
+		case EVMS_OPEN_VOLUME:
+		case EVMS_CLOSE_VOLUME:
+		case EVMS_CHECK_DEVICE_STATUS:
+			rc = drivelink_ioctl_cmd_broadcast(node, inode, file,
+							   cmd, arg);
+			break;
+		case EVMS_PLUGIN_IOCTL:
+			rc = drivelink_ioctl_cmd_plugin_ioctl(node, inode, file,
+							      cmd, arg);
+			break;
+		case EVMS_GET_BMAP:
+			{
+				struct evms_get_bmap_pkt *bmap;
+				u64 io_start, io_size;
+				struct evms_logical_node *child;
+
+				bmap = (struct evms_get_bmap_pkt *) arg;
+				io_start = bmap->rsector;
+				child = which_child(node, &io_start, &io_size);
+				if (child) {
+					if (node->block_size !=
+					    child->block_size) {
+						bmap->status = -EPERM;
+					} else {
+						bmap->rsector = io_start;
+						rc = IOCTL(child,
+							   inode,
+							   file, cmd, arg);
+					}
+				}
+			}
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+	}
+	return (rc);
+}
+
+/********************************************************/
+/* Required Module Entry Point:                         */
+/*      drivelink_init                                  */
+/********************************************************/
+
+/**
+ * drivelink_init: register this module for use within the EVMS framework
+ * 
+ * Return value: 0 on success
+ *	         otherwise error code.
+**/
+int __init
+drivelink_init(void)
+{
+	return evms_cs_register_plugin(&plugin_header);
+}
+
+/**
+ * drivelink_exit: unregister this module from use within the EVMS framework
+ * 
+ * Return value: 0 on success
+ *	         otherwise error code.
+**/
+void __exit
+drivelink_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(drivelink_init);
+module_exit(drivelink_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/evms_ecr.c evms-2002-09-30/drivers/evms/evms_ecr.c
--- linux-2002-09-30/drivers/evms/evms_ecr.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/evms_ecr.c	Fri Aug 16 16:19:56 2002
@@ -0,0 +1,213 @@
+/* -*- linux-c -*- */
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* linux/driver/evms/evms_ecr.c
+ *
+ * EVMS - Cluster enablement (ECR) module
+ *
+ */
+
+
+#include <linux/kernel.h> 
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_ecr.h>
+
+#define LOG_PREFIX "ecr: "
+
+
+/*
+ *  ecr_group_join
+ */
+ecr_group_t ecr_group_join(char *group_name, ecr_table_t *f_table, 
+		   ecr_cred_t * cred, size_t size, ecr_instance_t *instance)
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+
+/*
+ *  ecr_group_leave
+ */
+void  ecr_group_leave(ecr_group_t group)
+{
+	/* dummy */
+	return;
+}
+
+
+
+/*
+ * ecr_group_send
+ */
+int ecr_group_send(ecr_group_t group, ecr_nodeid_t node, void *message,
+		size_t size, ecr_instance_t *instance, 
+		void callback(int ret, ecr_instance_t *instance))
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_send_wait
+ */
+int ecr_group_send_wait(ecr_group_t group, ecr_nodeid_t node, void *message,
+		size_t size, int *ret)
+{
+	/* dummy */
+	*ret = ECR_FAIL;
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_broadcast
+ */
+int ecr_group_broadcast(ecr_group_t group, void *message, size_t size,
+			ecr_instance_t *instance,
+			void callback(u_char ret, ecr_instance_t *instance))
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_broadcast_wait
+ */
+int ecr_group_broadcast_wait(ecr_group_t group, void *message, size_t size,
+			u_char *ret)
+{
+	/* dummy */
+	*ret = ECR_FAIL;
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_atomic_execute
+ */
+int ecr_group_atomic_execute(ecr_group_t group, void *message, size_t size,
+			ecr_instance_t *instance,
+			void callback(ecr_instance_t *instance))
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_atomic_execute_wait
+ */
+int ecr_group_atomic_execute_wait(ecr_group_t group, void *message, size_t size)
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_group_success_response
+ */
+void ecr_group_success_response(ecr_message_t *handle)
+{
+	/* dummy */
+	return;
+}
+
+
+
+
+/*
+ * ecr_group_failure_response
+ */
+void ecr_group_failure_response(ecr_message_t *handle, int ret)
+{
+	/* dummy */
+	return;
+}
+			
+
+
+/*
+ * ecr_lock_create
+ */
+ecr_lock_t ecr_lock_create(char *lockname)
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+/*
+ * ecr_lock
+ */
+int  ecr_lock(ecr_lock_t lock, u64 start, u64 length, 
+		ecr_lock_mode_t mode, u_char flag)
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+
+
+
+/*
+ * ecr_unlock
+ */
+int ecr_unlock(ecr_lock_t lock, u64 start, u64 length)
+{
+	/* dummy */
+	return ECR_FAIL;
+}
+		
+
+/********************************************************/
+/* Required Module Entry Point:                         */
+/*      ecr_init()                                        */
+/********************************************************/
+
+static int __init ecr_init(void)
+{
+        /* dummy */
+	return 0;
+}
+
+static void __exit ecr_exit(void)
+{
+	return;
+}
+
+module_init(ecr_init);
+module_exit(ecr_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
+
diff -Naur linux-2002-09-30/drivers/evms/evms_passthru.c evms-2002-09-30/drivers/evms/evms_passthru.c
--- linux-2002-09-30/drivers/evms/evms_passthru.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/evms_passthru.c	Fri Sep 13 16:09:55 2002
@@ -0,0 +1,298 @@
+/* -*- linux-c -*- */
+
+/*
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+/*
+ * linux/drivers/evms/evms_passthru.c
+ *
+ * EVMS System Data Manager
+ *
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/genhd.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/evms/evms.h>
+#include <asm/system.h>
+
+#define EVMS_PASSTHRU_ID     0
+#define LOG_PREFIX "passthru: "
+
+static int passthru_mgr_discover(struct evms_logical_node **);
+static int passthru_mgr_delete(struct evms_logical_node *);
+static void passthru_mgr_read(struct evms_logical_node *, struct buffer_head *);
+static void passthru_mgr_write(struct evms_logical_node *, struct buffer_head *);
+static int passthru_mgr_ioctl(struct evms_logical_node *,
+			      struct inode *,
+			      struct file *, unsigned int, unsigned long);
+static int passthru_mgr_init_io(struct evms_logical_node *,
+				int, u64, u64, void *);
+
+static struct evms_plugin_fops fops = {
+	.discover	= passthru_mgr_discover,
+	.delete		= passthru_mgr_delete,
+	.read		= passthru_mgr_read,
+	.write		= passthru_mgr_write,
+	.init_io	= passthru_mgr_init_io,
+	.ioctl		= passthru_mgr_ioctl
+};
+
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_FEATURE,
+			  EVMS_PASSTHRU_ID),
+	.version = {
+		.major		= 1,
+		.minor		= 1,
+		.patchlevel	= 1
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &fops
+};
+
+/*******************************/
+/* discovery support functions */
+/*******************************/
+
+static int
+process_passthru_data(struct evms_logical_node **pp)
+{
+	int rc, size_in_sectors;
+	struct evms_logical_node *node, *new_node;
+
+	node = *pp;
+
+	size_in_sectors =
+	    evms_cs_size_in_vsectors(sizeof (struct evms_feature_header));
+
+	/* allocate "parent" node */
+	rc = evms_cs_allocate_logical_node(&new_node);
+	if (!rc) {
+		/* initialize "parent" node */
+		new_node->private = node;
+		new_node->flags = node->flags;
+		new_node->plugin = &plugin_header;
+		new_node->system_id = node->system_id;
+		new_node->block_size = node->block_size;
+		new_node->hardsector_size = node->hardsector_size;
+		new_node->total_vsectors = node->total_vsectors;
+		new_node->total_vsectors -=
+		    (size_in_sectors << 1) +
+		    node->feature_header->alignment_padding;
+		new_node->volume_info = node->volume_info;
+		strcpy(new_node->name, node->name);
+		if (strlen(node->feature_header->object_name))
+			strcat(new_node->name,
+			       node->feature_header->object_name);
+		else
+			strcat(new_node->name, "_Passthru");
+
+		/* return "parent" node to caller */
+		*pp = new_node;
+
+		MOD_INC_USE_COUNT;
+
+		LOG_DETAILS("feature header found on '%s', created '%s'.\n",
+			    node->name, new_node->name);
+		/* we're done with the passthru feature headers
+		 * so lets delete them now.
+		 */
+		kfree(node->feature_header);
+		node->feature_header = NULL;
+	} else {
+		/* on any fatal error, delete the node */
+		int rc2 = DELETE(node);
+		if (rc2) {
+			LOG_DEFAULT
+			    ("error(%d) attempting to delete node(%p,%s).\n",
+			     rc2, node, node->name);
+		}
+	}
+	return (rc);
+}
+
+/********** Required Plugin Functions **********/
+
+/*
+ * Function: passthru_mgr_discover
+ *
+ */
+static int
+passthru_mgr_discover(struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+	struct evms_logical_node *node, *tmp_list_head;
+
+	MOD_INC_USE_COUNT;
+	tmp_list_head = *discover_list;
+	*discover_list = NULL;
+
+	while (tmp_list_head) {
+		node = tmp_list_head;
+		rc = evms_cs_remove_logical_node_from_list(&tmp_list_head,
+							   node);
+		if (!rc)
+			rc = process_passthru_data(&node);
+		if (!rc)
+			if (node)
+				rc = evms_cs_add_logical_node_to_list
+				    (discover_list, node);
+	}
+	MOD_DEC_USE_COUNT;
+	return (rc);
+}
+
+/*
+ * Function: passthru_mgr_delete
+ *
+ */
+static int
+passthru_mgr_delete(struct evms_logical_node *node)
+{
+	int rc;
+	struct evms_logical_node *p;
+
+	LOG_DETAILS("deleting '%s'.\n", node->name);
+
+	p = node->private;
+	rc = DELETE(p);
+	if (!rc) {
+		evms_cs_deallocate_logical_node(node);
+		MOD_DEC_USE_COUNT;
+	}
+	return (rc);
+}
+
+/* 
+ * function: passthru_io_error
+ * 
+ * this function was primarily created because the function
+ * buffer_IO_error is inline and kgdb doesn't allow breakpoints
+ * to be set on inline functions. Since this was an error path
+ * and not mainline, I decided to add a trace statement to help
+ * report on the failing condition.
+ *
+ */
+static void
+passthru_io_error(struct evms_logical_node *node, int io_flag, struct buffer_head *bh)
+{
+	LOG_SERIOUS
+	    ("attempt to %s beyond boundary("PFU64") on (%s), rsector("PFU64").\n",
+	     (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1,
+	     node->name, (u64) bh->b_rsector);
+
+	bh->b_end_io(bh, 0);
+}
+
+/*
+ * Function: passthru_mgr_read
+ */
+static void
+passthru_mgr_read(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    node->total_vsectors) {
+		R_IO(((struct evms_logical_node *) (node->private)), bh);
+	} else
+		passthru_io_error(node, READ, bh);
+}
+
+/*
+ * Function: passthru_mgr_write
+ *
+ */
+static void
+passthru_mgr_write(struct evms_logical_node *node, struct buffer_head *bh)
+{
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    node->total_vsectors) {
+		W_IO(((struct evms_logical_node *) (node->private)), bh);
+	} else
+		passthru_io_error(node, WRITE, bh);
+}
+
+/*
+ * Function: passthru_mgr_ioctl
+ *
+ */
+static int
+passthru_mgr_ioctl(struct evms_logical_node *node,
+		   struct inode *inode,
+		   struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc;
+
+	if ((!node) || (!inode))
+		rc = -EINVAL;
+	else
+		rc = IOCTL(((struct evms_logical_node *) (node->private)),
+			   inode, file, cmd, arg);
+	return (rc);
+}
+
+static int
+passthru_mgr_init_io(struct evms_logical_node *node, int io_flag,	/* 0=read, 1=write */
+		     u64 sect_nr,	/* disk LBA */
+		     u64 num_sects,	/* # of sectors */
+		     void *buf_addr)
+{				/* buffer address */
+	int rc;
+	if ((sect_nr + num_sects) <= node->total_vsectors) {
+		rc = INIT_IO(((struct evms_logical_node *) (node->
+							    private)),
+			     io_flag, sect_nr, num_sects, buf_addr);
+	} else
+		rc = -EINVAL;
+	return (rc);
+}
+
+/*
+ * Function: passthru_init
+ *
+ */
+int __init
+evms_passthru_manager_init(void)
+{
+	return evms_cs_register_plugin(&plugin_header);	/* register with EVMS */
+}
+
+void __exit
+evms_passthru_manager_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(evms_passthru_manager_init);
+module_exit(evms_passthru_manager_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/gpt_part.c evms-2002-09-30/drivers/evms/gpt_part.c
--- linux-2002-09-30/drivers/evms/gpt_part.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/gpt_part.c	Fri Sep 13 16:09:55 2002
@@ -0,0 +1,1018 @@
+/* -*- linux-c -*- */
+/*
+ *
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ */
+
+/* linux/driver/evms/gpt_part.c
+ *
+ * EVMS - EFI GPT segment manager plugin
+ *
+ * This plugin provides support for the GUID Partition Table format specified
+ * by the Extensible Firmware Interface documentation ... version 1.02
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+#include <asm/uaccess.h>
+#include <linux/evms/evms.h>
+
+/* prefix used in logging messages */
+#define LOG_PREFIX "gpt_part: "
+
+/**
+ * struct gpt_private -  Private data structure for this plugin
+ * @source_object: object this IO will get remapped to
+ * @start_sect: source object relative starting address in 512 byte units
+ * @nr_sect: partition size in 512 bytes units
+ * @type: partition type or filesystem format indicator
+ * 
+ * private copy of the just the fields we require to remap IO requests
+ * to the underlying object.
+ **/
+struct gpt_private {
+	struct evms_logical_node *source_disk;
+	u64 start_sect;
+	u64 nr_sects;
+	unsigned char type;
+};
+
+#define GPT_DISKMAGIC               0x5452415020494645	// "EFI PART"
+#define GPT_PNAME_SIZE              36	// max unicode partition name size
+
+/**
+ * struct guid - GUID structure
+ * @time_low: timestamp - low order 32 bits
+ * @time_mid: timestamp - mid 16 bits
+ * @time_high: timestamp - high 16 bits
+ * @clock_seq_high: clock - high order 8 bits
+ * @clock_seq_low: clock - low order 8 bits
+ * @node: spatial reference - unique id (ie. mac address of nic)
+ *
+ * GUID structure
+ **/
+struct guid {
+	u32 time_low;
+	u16 time_mid;
+	u16 time_high;
+	u8 clock_seq_high;
+	u8 clock_seq_low;
+	u8 node[6];
+};
+
+/**
+ * struct gpt_partition - GPT partition record definition
+ * @type: partition type
+ * @part_id: partition record id
+ * @start: address of 1st block of partition
+ * @end: address of last block of partition
+ * @attributes: bit field reserved by EFI spec
+ * @name: unicode name of partition
+ *
+ * GPT partition record definition
+ **/
+struct gpt_partition {
+	struct guid type;
+	struct guid part_id;
+	u64 start;
+	u64 end;
+	u64 attributes;
+	u16 name[GPT_PNAME_SIZE];
+};
+
+/**
+ * struct gpt_header - GPT header
+ * @signature: EFI compatible header signature
+ * @version: spec revision number
+ * @size: size (bytes) of gpt header
+ * @crc: crc of gpt header
+ * @reserve: reserved by spec ... must be zero
+ * @my_lba: lba of gpt header
+ * @alternate_lba: lba of 2nd copy of gpt header
+ * @start_useable: lba of 1st block of useable area on disk
+ * @end_useable: lba of last block of useable area on disk
+ * @disk_id: GUID - identifies this disk
+ * @ptable_lba: lba of partition table
+ * @ptable_count: number of entries in the partition table
+ * @ptable_entry_size: size of partition table entry
+ * @ptable_crc: crc of partition table
+ *
+ * GPT header
+ **/
+struct gpt_header {
+	u64 signature;
+	u32 version;
+	u32 size;
+	u32 crc;
+	u32 reserve;
+	u64 my_lba;
+	u64 alternate_lba;
+	u64 start_useable;
+	u64 end_useable;
+	struct guid disk_id;
+	u64 ptable_lba;
+	u32 ptable_count;
+	u32 ptable_entry_size;
+	u32 ptable_crc;
+};
+
+struct guid EFI_SYSTEM_PARTITION = {
+	0xC12A7328,
+	0xF81F,
+	0x11D2,
+	0xBA,
+	0x4B,
+	{0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B}
+};
+
+struct guid BASIC_DATA_PARTITION = {
+	0xEBD0A0A2,
+	0xB9E5,
+	0x4433,
+	0x87,
+	0xC0,
+	{0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7}
+};
+
+struct guid LEGACY_MBR_PARTITION = {
+	0x024DEE41,
+	0x33E7,
+	0x11D3,
+	0x9D,
+	0x69,
+	{0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F}
+};
+
+struct guid GPT_SWAP_PARTITION = {
+	0x0657FD6D,
+	0xA4AB,
+	0x43C4,
+	0x84,
+	0xE5,
+	{0x09, 0x33, 0xC8, 0x4B, 0x4F, 0x4F}
+};
+
+struct guid UNUSED_GPT_PARTITION = {
+	0, 0, 0, 0, 0,
+	{0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
+};
+
+static int exported_nodes;	/* total # of exported segments
+				 * produced during this discovery.
+				 */
+
+/* Prototypes */
+static int partition_discover(struct evms_logical_node **);
+static int partition_delete(struct evms_logical_node *);
+static void partition_read(struct evms_logical_node *, struct buffer_head *);
+static void partition_write(struct evms_logical_node *, struct buffer_head *);
+static int partition_ioctl(struct evms_logical_node *,
+			   struct inode *,
+			   struct file *, unsigned int, unsigned long);
+static int partition_init_io(struct evms_logical_node *,
+			     int, u64, u64, void *);
+
+static struct evms_plugin_fops fops = {
+	.discover	= partition_discover,
+	.delete		= partition_delete,
+	.read		= partition_read,
+	.write		= partition_write,
+	.init_io	= partition_init_io,
+	.ioctl		= partition_ioctl
+};
+
+#define EVMS_GPT_PARTITION_MANAGER_ID 3
+
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_SEGMENT_MANAGER,
+			  EVMS_GPT_PARTITION_MANAGER_ID),
+	.version = {
+		.major		= 1,
+		.minor		= 1,
+		.patchlevel	= 1
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &fops
+};
+
+/***************************************************/
+/* List Support - Typedefs, Variables, & Functions */
+/***************************************************/
+
+/* Typedefs */
+
+struct segment_list_node {
+	struct evms_logical_node *segment;
+	struct segment_list_node *next;
+};
+
+struct disk_list_node {
+	struct evms_logical_node *disk;
+	struct segment_list_node *segment_list;
+	struct disk_list_node *next;
+};
+
+/* Variables */
+
+static struct disk_list_node *my_disk_list;
+
+/* Functions */
+
+/*
+ *  Function: Convert a GPT header from disk format to the arch specific
+ *  format.
+ */
+static void
+disk_gpt_header_to_cpu(struct gpt_header *gh)
+{
+	gh->signature = le64_to_cpu(gh->signature);
+	gh->version = le32_to_cpu(gh->version);
+	gh->size = le32_to_cpu(gh->size);
+	gh->crc = le32_to_cpu(gh->crc);
+	gh->reserve = le32_to_cpu(gh->reserve);
+	gh->my_lba = le64_to_cpu(gh->my_lba);
+	gh->alternate_lba = le64_to_cpu(gh->alternate_lba);
+	gh->start_useable = le64_to_cpu(gh->start_useable);
+	gh->end_useable = le64_to_cpu(gh->end_useable);
+	gh->disk_id.time_low = le32_to_cpu(gh->disk_id.time_low);
+	gh->disk_id.time_mid = le16_to_cpu(gh->disk_id.time_mid);
+	gh->disk_id.time_high = le16_to_cpu(gh->disk_id.time_high);
+	gh->ptable_lba = le64_to_cpu(gh->ptable_lba);
+	gh->ptable_count = le32_to_cpu(gh->ptable_count);
+	gh->ptable_entry_size = le32_to_cpu(gh->ptable_entry_size);
+	gh->ptable_crc = le32_to_cpu(gh->ptable_crc);
+}
+
+static int
+matching_guids(struct guid *g1, struct guid *g2)
+{
+	if ((le32_to_cpu(g1->time_low) == g2->time_low) &&
+	    (le16_to_cpu(g1->time_mid) == g2->time_mid) &&
+	    (le16_to_cpu(g1->time_high) == g2->time_high) &&
+	    (g1->clock_seq_high == g2->clock_seq_high) &&
+	    (g1->clock_seq_low == g2->clock_seq_low)) {
+		return 1;
+	}
+	return 0;
+}
+static inline int
+isa_basic_data_gpt_partition_record(struct gpt_partition *p)
+{
+	return (matching_guids(&p->type, &BASIC_DATA_PARTITION));
+}
+static inline int
+isa_legacy_mbr_gpt_partition_record(struct gpt_partition *p)
+{
+	return (matching_guids(&p->type, &LEGACY_MBR_PARTITION));
+}
+static inline int
+isa_esp_gpt_partition_record(struct gpt_partition *p)
+{
+	return (matching_guids(&p->type, &EFI_SYSTEM_PARTITION));
+}
+static inline int
+isa_gpt_swap_partition_record(struct gpt_partition *p)
+{
+	return (matching_guids(&p->type, &GPT_SWAP_PARTITION));
+}
+static inline int
+isa_unused_gpt_partition_record(struct gpt_partition *p)
+{
+	return (matching_guids(&p->type, &UNUSED_GPT_PARTITION));
+}
+
+static struct disk_list_node **
+lookup_disk(struct evms_logical_node *disk)
+{
+	struct disk_list_node **ldln;
+
+	ldln = &my_disk_list;
+	while (*ldln) {
+		if ((*ldln)->disk == disk)
+			break;
+		ldln = &(*ldln)->next;
+	}
+	return (ldln);
+}
+
+static struct segment_list_node **
+lookup_segment(struct disk_list_node *disk, struct evms_logical_node *segment)
+{
+	struct segment_list_node **lsln;
+
+	lsln = &disk->segment_list;
+	while (*lsln) {
+		if ((*lsln)->segment == segment)
+			break;
+		lsln = &(*lsln)->next;
+	}
+	return (lsln);
+}
+
+static struct evms_logical_node *
+find_segment_on_disk(struct evms_logical_node *disk,
+		     u64 start_sect, u64 nr_sects)
+{
+	struct evms_logical_node *rc = NULL;
+	struct disk_list_node **ldln;
+	struct segment_list_node **lsln;
+	struct gpt_private *gpt_prv;
+
+	ldln = lookup_disk(disk);
+	if (*ldln) {
+		/* disk found in list */
+		/* attempt to find segment */
+
+		lsln = &(*ldln)->segment_list;
+		while (*lsln) {
+			gpt_prv = (*lsln)->segment->private;
+			if (gpt_prv->start_sect == start_sect)
+				if (gpt_prv->nr_sects == nr_sects)
+					break;
+			lsln = &(*lsln)->next;
+		}
+		if (*lsln)
+			rc = (*lsln)->segment;
+	}
+	return (rc);
+}
+
+/* function description: add_segment_to_disk
+ *
+ * this function attempts to add a segment to the segment
+ * list of a disk. if the specified disk is not found, it
+ * will be added to the global disk list. this function will
+ * return a pointer to the matching segment in the disk's
+ * segment list. the caller must compare the returned pointer
+ * to the specified segment to see if the
+ * specified segment was already present in the disk's segment
+ * list. if the return pointer matches the specified segment,
+ * then the specified segment was added to the list. if the
+ * return segment pointer to does not match the specified
+ * segment pointer, then the specified segment pointer was
+ * a duplicate and can be thrown away.
+ */
+static int
+add_segment_to_disk(struct evms_logical_node *disk,
+		    struct evms_logical_node *segment)
+{
+	int rc = 0;
+	struct disk_list_node **ldln, *new_disk;
+	struct segment_list_node **lsln, *new_segment;
+
+	ldln = lookup_disk(disk);
+	if (*ldln == NULL) {
+		/* disk not in list, add disk */
+		new_disk = kmalloc(sizeof (*new_disk), GFP_KERNEL);
+		if (new_disk) {
+			memset(new_disk, 0, sizeof (*new_disk));
+			new_disk->disk = disk;
+			*ldln = new_disk;
+		} else {
+			rc = -ENOMEM;
+		}
+	}
+	if (!rc) {
+		/* attempt to add segment */
+		lsln = lookup_segment(*ldln, segment);
+		if (*lsln == NULL) {
+			/* segment not in list, add segment */
+			new_segment =
+			    kmalloc(sizeof (*new_segment), GFP_KERNEL);
+			if (new_segment) {
+				memset(new_segment, 0, sizeof (*new_segment));
+				new_segment->segment = segment;
+				*lsln = new_segment;
+			} else {
+				rc = -ENOMEM;
+			}
+		} else
+			rc = -1;
+	}
+	return (rc);
+}
+
+static int
+remove_segment_from_disk(struct evms_logical_node *disk,
+			 struct evms_logical_node *segment,
+			 struct evms_logical_node **empty_disk)
+{
+	int rc = 0;
+	struct disk_list_node **ldln, *tmp_disk_node;
+	struct segment_list_node **lsln, *tmp_segment_node;
+
+	*empty_disk = NULL;
+	ldln = lookup_disk(disk);
+	if (*ldln == NULL) {
+		rc = -1;
+	} else {
+		/* disk found in list */
+		/* attempt to add segment */
+		lsln = lookup_segment(*ldln, segment);
+		if (*lsln == NULL) {
+			rc = -2;
+		} else {
+			tmp_segment_node = *lsln;
+			/* remove segment from list */
+			*lsln = (*lsln)->next;
+			/* free the segment list node */
+			kfree(tmp_segment_node);
+
+			if ((*ldln)->segment_list == NULL) {
+				tmp_disk_node = *ldln;
+				*empty_disk = tmp_disk_node->disk;
+				/* remove disk from list */
+				*ldln = (*ldln)->next;
+				/* free the disk list node */
+				kfree(tmp_disk_node);
+			}
+		}
+	}
+	return (rc);
+}
+
+/*
+ * Function:  add_segment
+ */
+static int
+process_segment(struct evms_logical_node **discover_list,
+		struct evms_logical_node *node,
+		u64 start_sect,
+		u64 nr_sects,
+		int type, int part_num, int evms_top_segment)
+{
+	struct gpt_private *gpt_prv = NULL;
+	struct evms_logical_node *segment;
+	int rc = 0;
+
+	segment = find_segment_on_disk(node, start_sect, nr_sects);
+	if (segment) {
+		LOG_DETAILS("exporting segment '%s'.\n", segment->name);
+	} else {
+		gpt_prv = kmalloc(sizeof (*gpt_prv), GFP_KERNEL);
+		if (gpt_prv) {
+			gpt_prv->source_disk = node;
+			gpt_prv->start_sect = start_sect;
+			gpt_prv->nr_sects = nr_sects;
+			gpt_prv->type = type;
+			rc = evms_cs_allocate_logical_node(&segment);
+		} else {
+			rc = -ENOMEM;
+		}
+		if (!rc) {
+			segment->plugin = &plugin_header;
+			segment->system_id = (unsigned int) type;
+			segment->total_vsectors = nr_sects;
+			segment->block_size = node->block_size;
+			segment->hardsector_size = node->hardsector_size;
+			segment->private = gpt_prv;
+			segment->flags = node->flags;
+			if (evms_top_segment)
+				segment->iflags |= EVMS_TOP_SEGMENT;
+			strcpy(segment->name, node->name);
+			if (GetPluginType(node->plugin->id) ==
+			    EVMS_SEGMENT_MANAGER) {
+				strcat(segment->name, ".");
+			}
+			sprintf(segment->name + strlen(segment->name), "%d",
+				part_num);
+			LOG_DETAILS("creating segment '%s'.\n", segment->name);
+			rc = add_segment_to_disk(node, segment);
+			if (rc) {
+				LOG_ERROR
+				    ("%s: error(%d) adding segment '%s'!\n",
+				     __FUNCTION__, rc, segment->name);
+				rc = 0;
+			} else {
+				MOD_INC_USE_COUNT;
+			}
+		}
+		if (rc) {
+			if (gpt_prv)
+				kfree(gpt_prv);
+			if (segment)
+				evms_cs_deallocate_logical_node(segment);
+		}
+	}
+	if (!rc) {
+		evms_cs_add_logical_node_to_list(discover_list, segment);
+		exported_nodes++;
+	}
+	return rc;
+}
+
+void
+print_mem(void *buffer, int length)
+{
+	int i, done;
+	unsigned char *bufptr;
+
+	bufptr = (unsigned char *) buffer;
+	i = done = 0;
+	while (!done) {
+		if ((i % 16) == 0)
+			printk(KERN_INFO "\n0x%p->", buffer + i);
+		printk(KERN_INFO "%02x ", bufptr[i]);
+		if (++i >= length)
+			done++;
+	}
+	printk(KERN_INFO "\n");
+}
+
+/*
+ *  Function: get GPT Partition Table - reads partition table
+ *            into memory and performs crc check.
+ *
+ */
+static struct gpt_partition *
+get_gpt_partition_table(struct evms_logical_node *node, struct gpt_header *gh)
+{
+	int rc;
+	struct gpt_partition *pt;
+	u32 sector_count, calculated_crc;
+
+	sector_count =
+	    evms_cs_size_in_vsectors(gh->ptable_count * gh->ptable_entry_size);
+
+	pt = kmalloc(sector_count * EVMS_VSECTOR_SIZE, GFP_KERNEL);
+	if (pt) {
+
+		rc = INIT_IO(node, 0, gh->ptable_lba, sector_count, pt);
+		if (!rc) {
+
+			calculated_crc = evms_cs_calculate_crc(EVMS_INITIAL_CRC,
+							       pt,
+							       gh->
+							       ptable_count *
+							       gh->
+							       ptable_entry_size);
+
+			if (~calculated_crc != gh->ptable_crc) {
+				rc = -ENODATA;
+			}
+
+		}
+	} else {
+		rc = -ENOMEM;
+	}
+
+	if (rc) {
+		if (pt)
+			kfree(pt);
+		pt = NULL;
+	}
+
+	return (pt);
+}
+
+/*
+ *  Function: Validate GPT Header - runs basic checks to
+ *            sanity check a gpt header.
+ *
+ */
+static int
+isa_valid_gpt_header(struct evms_logical_node *node, u64 lsn,
+		     struct gpt_header *gh)
+{
+	u32 crc;
+	u32 calculated_crc;
+	u64 sector_count;
+
+	/* signature */
+	if (le64_to_cpu(gh->signature) != GPT_DISKMAGIC)
+		return 0;
+
+	/* crc */
+	crc = le32_to_cpu(gh->crc);
+	gh->crc = 0;
+	calculated_crc =
+	    ~(evms_cs_calculate_crc(EVMS_INITIAL_CRC, gh, le32_to_cpu(gh->size)));
+	gh->crc = cpu_to_le32(crc);
+
+	if (calculated_crc != crc)
+		return 0;
+
+	/* spec says lba reported by header must match actual location on disk */
+	if (lsn != le64_to_cpu(gh->my_lba))
+		return 0;
+
+	/* sanity check partition table info found in header */
+	if (gh->ptable_count == 0 || gh->ptable_entry_size == 0)
+		return 0;
+
+	sector_count =
+	    evms_cs_size_in_vsectors(le64_to_cpu(gh->ptable_count) *
+				     le64_to_cpu(gh->ptable_entry_size));
+
+	if ((le64_to_cpu(gh->ptable_lba) + sector_count - 1) >=
+	    node->total_vsectors - 1)
+		return 0;
+
+	return 1;
+}
+
+/*
+ *  Function: get GPT Partition Table Header
+ *
+ */
+static struct gpt_header *
+get_gpt_header(struct evms_logical_node *node, u64 lsn)
+{
+	int rc;
+	struct gpt_header *gh = NULL;
+
+	gh = kmalloc(EVMS_VSECTOR_SIZE, GFP_KERNEL);
+	if (gh) {
+		rc = INIT_IO(node, 0, lsn, 1, gh);
+		if (!rc) {
+			if (isa_valid_gpt_header(node, lsn, gh)) {
+				disk_gpt_header_to_cpu(gh);
+			} else {
+				rc = -ENODATA;
+			}
+
+		}
+		if (rc) {
+			kfree(gh);
+			gh = NULL;
+		}
+	}
+
+	return (gh);
+}
+
+/*
+ *  Function: Get GPT Information
+ *
+ */
+static int
+get_gpt_info(struct evms_logical_node *node,
+	     struct gpt_header **gh, struct gpt_partition **ptable)
+{
+	struct gpt_header *gh1 = NULL, *gh2 = NULL;
+
+	*gh = NULL;
+	*ptable = NULL;
+
+	gh1 = get_gpt_header(node, 1);	// offset past protective mbr
+
+	if (gh1) {
+		*gh = gh1;
+		gh2 = get_gpt_header(node, gh1->alternate_lba);
+		if (gh2)
+			kfree(gh2);
+		else
+			LOG_WARNING
+			    ("alternate guid partition table header is invalid, using primary copy.\n");
+	} else {
+		gh2 = get_gpt_header(node, node->total_vsectors - 1);
+		if (gh2) {
+			*gh = gh2;
+			LOG_WARNING
+			    ("primary guid partition table header is invalid, using alternate copy\n");
+		} else {
+			LOG_DETAILS("no gpt header discovered on node %s\n",
+				    node->name);
+			return 0;
+		}
+	}
+
+	*ptable = get_gpt_partition_table(node, *gh);
+	if (!*ptable) {
+		kfree(*gh);
+		*gh = NULL;
+		return 0;
+	}
+
+	return 1;
+}
+
+/*
+ *  Function: Probe for GPT segments on logical node
+ *
+ */
+static int
+probe_for_segments(struct evms_logical_node **discover_list,
+		   struct evms_logical_node *node)
+{
+	int rc;
+	int nextminor = 1;
+	int evms_top_segment;
+	u32 i;
+	u64 pstart,pend;
+	struct gpt_header *gh = NULL;
+	struct gpt_partition *ptable = NULL;
+	struct gpt_partition *part = NULL;
+
+	/* no need to inspect our own nodes */
+	if (node->plugin->id == plugin_header.id)
+		return 0;
+
+	/* nor nodes marked as EVMS_TOP_SEGMENT */
+	if (node->iflags & EVMS_TOP_SEGMENT)
+		return 0;
+
+	/* look for guid partition table & header */
+	if (!get_gpt_info(node, &gh, &ptable)) {
+		if (gh)
+			kfree(gh);
+		if (ptable)
+			kfree(ptable);
+		return 0;
+	}
+
+	/* walk the guid partition table, producing segment storage objects */
+	for (i = 0, part = ptable; i < gh->ptable_count; i++, part++) {
+
+		if (!isa_unused_gpt_partition_record(part)) {
+
+			pstart = le64_to_cpu(part->start);
+			pend   = le64_to_cpu(part->end);
+
+			LOG_DETAILS
+			    ("gpt partition start="PFU64"  end="PFU64"\n",
+			     pstart, (pend - pstart + 1));
+
+			/* stop other seg mgrs from recursive discovery on a gpt system partition */
+			if (isa_esp_gpt_partition_record(part))
+				evms_top_segment = 1;
+			else
+				evms_top_segment = 0;
+
+			rc = process_segment(discover_list,
+					     node,
+					     pstart,
+					     (pend - pstart + 1),
+					     0, nextminor, evms_top_segment);
+
+			if (!rc) {
+				++nextminor;
+			}
+		}
+
+	}
+
+	/* remove node we just consumed */
+	evms_cs_remove_logical_node_from_list(discover_list, node);
+
+	kfree(ptable);
+	kfree(gh);
+	return 1;
+}
+
+/*
+ * Function: partition_discover
+ *
+ */
+static int
+partition_discover(struct evms_logical_node **discover_list)
+{
+	int rc = 0;
+	struct evms_logical_node *node, *next_node;
+
+	MOD_INC_USE_COUNT;
+	LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__);
+
+	/* initialize global variable */
+	exported_nodes = 0;
+
+	/* examine each node on the discover list */
+	next_node = *discover_list;
+	while (next_node) {
+		node = next_node;
+		next_node = node->next;
+		probe_for_segments(discover_list, node);
+	}
+
+	LOG_ENTRY_EXIT("%s: EXIT(exported nodes:%d, error code:%d)\n",
+		       __FUNCTION__, exported_nodes, rc);
+	if (exported_nodes)
+		rc = exported_nodes;
+	MOD_DEC_USE_COUNT;
+	return (rc);
+}
+
+/*
+ * Function: partition_delete
+ *
+ */
+static int
+partition_delete(struct evms_logical_node *segment)
+{
+	int rc = 0;
+	struct gpt_private *gpt_prv;
+	struct evms_logical_node *empty_disk = NULL;
+
+	LOG_DETAILS("deleting segment '%s'.\n", segment->name);
+
+	if (!segment) {
+		rc = -ENODEV;
+	} else {
+		gpt_prv = segment->private;
+		if (gpt_prv) {
+			/* remove the segment from the
+			 * disk's segment list
+			 */
+			rc = remove_segment_from_disk(gpt_prv->source_disk,
+						      segment, &empty_disk);
+			/* free the local instance data */
+			kfree(gpt_prv);
+		}
+		/* free the segment node */
+		evms_cs_deallocate_logical_node(segment);
+		MOD_DEC_USE_COUNT;
+		/* if the last segment on the disk was
+		 * deleted, delete the disk node too
+		 */
+		if (empty_disk)
+			DELETE(empty_disk);
+	}
+	return (rc);
+}
+
+/*
+ * function: partition_io_error
+ *
+ * this function was primarily created because the function
+ * buffer_IO_error is inline and kgdb doesn't allow breakpoints
+ * to be set on inline functions. Since this was an error path
+ * and not mainline, I decided to add a trace statement to help
+ * report on the failing condition.
+ *
+ */
+static void
+partition_io_error(struct evms_logical_node *node, int io_flag,
+		   struct buffer_head *bh)
+{
+	LOG_SERIOUS
+	    ("attempt to %s beyond partition boundary("PFU64") on (%s), rsector(%ld).\n",
+	     (io_flag) ? "WRITE" : "READ", node->total_vsectors - 1, node->name,
+	     bh->b_rsector);
+
+	bh->b_end_io(bh, 0);
+}
+
+/*
+ * Function: partition_read
+ *
+ */
+static void
+partition_read(struct evms_logical_node *partition, struct buffer_head *bh)
+{
+	struct gpt_private *gpt_prv = partition->private;
+
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    partition->total_vsectors) {
+		bh->b_rsector += gpt_prv->start_sect;
+		R_IO(gpt_prv->source_disk, bh);
+	} else
+		partition_io_error(partition, READ, bh);
+}
+
+/*
+ * Function: partition_write
+ *
+ */
+static void
+partition_write(struct evms_logical_node *partition, struct buffer_head *bh)
+{
+	struct gpt_private *gpt_prv = partition->private;
+
+	if ((bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT)) <=
+	    partition->total_vsectors) {
+		bh->b_rsector += gpt_prv->start_sect;
+		W_IO(gpt_prv->source_disk, bh);
+	} else
+		partition_io_error(partition, WRITE, bh);
+}
+
+/*
+ * Function: partition_init_io
+ *
+ */
+static int
+partition_init_io(struct evms_logical_node *partition, int io_flag,	/* 0=read, 1=write */
+		  u64 sect_nr,	/* disk LBA */
+		  u64 num_sects,	/* # of sectors */
+		  void *buf_addr)
+{				/* buffer address */
+	int rc;
+	struct gpt_private *gpt_prv = partition->private;
+
+	if ((sect_nr + num_sects) <= partition->total_vsectors) {
+		rc = INIT_IO(gpt_prv->source_disk, io_flag,
+			     sect_nr + gpt_prv->start_sect, num_sects,
+			     buf_addr);
+	} else {
+		LOG_SERIOUS
+		    ("init_io: attempt to %s beyond partition(%s) boundary("PFU64") at sector("PFU64") for count("PFU64").\n",
+		     (io_flag) ? "WRITE" : "READ", partition->name,
+		     (gpt_prv->nr_sects - 1), sect_nr, num_sects);
+		rc = -EINVAL;
+	}
+
+	return (rc);
+}
+
+/*
+ * Function: partition_ioctl
+ *
+ */
+static int
+partition_ioctl(struct evms_logical_node *partition,
+		struct inode *inode,
+		struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct gpt_private *gpt_prv;
+	struct hd_geometry hd_geo;
+	int rc;
+
+	rc = 0;
+	gpt_prv = partition->private;
+	if (!inode)
+		return -EINVAL;
+	switch (cmd) {
+	case HDIO_GETGEO:
+		{
+			rc = IOCTL(gpt_prv->source_disk, inode, file, cmd, arg);
+			if (rc)
+				break;
+			if (copy_from_user
+			    (&hd_geo, (void *) arg,
+			     sizeof (struct hd_geometry)))
+				rc = -EFAULT;
+			if (rc)
+				break;
+			hd_geo.start = gpt_prv->start_sect;
+			if (copy_to_user
+			    ((void *) arg, &hd_geo,
+			     sizeof (struct hd_geometry)))
+				rc = -EFAULT;
+		}
+		break;
+	case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt *bmap =
+			    (struct evms_get_bmap_pkt *) arg;
+			bmap->rsector += gpt_prv->start_sect;
+			/* intentionally fall thru to
+			 * default ioctl down to device
+			 * manager.
+			 */
+		}
+	default:
+		rc = IOCTL(gpt_prv->source_disk, inode, file, cmd, arg);
+	}
+	return rc;
+}
+
+/*
+ * Function: gpt_module_init
+ *
+ */
+static int __init
+gpt_module_init(void)
+{
+	return evms_cs_register_plugin(&plugin_header);	/* register with EVMS */
+}
+
+/*
+ * Function: gpt module exit
+ */
+static void __exit
+gpt_module_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(gpt_module_init);
+module_exit(gpt_module_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/ldev_mgr.c evms-2002-09-30/drivers/evms/ldev_mgr.c
--- linux-2002-09-30/drivers/evms/ldev_mgr.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/ldev_mgr.c	Fri Sep 13 16:45:06 2002
@@ -0,0 +1,1500 @@
+/* -*- linux-c -*- */
+/*
+ *
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* linux/driver/evms/ldev_mgr.c
+ *
+ * EVMS - Local Device (Hard Drive) Manager
+ *
+ *  This plugin walks the gendisk list and creates logical disk structures for each
+ *  local ide or scsi device.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <linux/blk.h>		/* must be included by all block drivers */
+#include <linux/genhd.h>
+#include <linux/ide.h>
+#include <linux/version.h>
+#include "../scsi/scsi.h"
+#include "../scsi/sd.h"
+#include <linux/init.h>
+#include <linux/evms/evms.h>
+#include <linux/evms/ldev_mgr.h>
+
+#define LOG_PREFIX "ldev_mgr: "
+
+#define EVMS_LOCAL_DEVICE_MANAGER_ID    1
+
+/**
+ * struct ldev_private - private data used by this plugin
+ * @major: major device number
+ * @minor: minor device number
+ * @bdev: block_device record for this device
+ * @gd: gendisk entry for this device
+ * @media_changed: media changed status field
+ *
+ * private data maintained for each device by this plugin
+ **/
+struct ldev_private {
+	int major, minor;
+	struct block_device *bdev;
+	struct gendisk *gd;
+	int media_changed;
+};
+
+/* prototypes for mandatory plugin interface functions */
+static int discover_disks(struct evms_logical_node **);
+static int ldev_mgr_delete(struct evms_logical_node *);
+static void ldev_mgr_read(struct evms_logical_node *, struct buffer_head *);
+static void ldev_mgr_write(struct evms_logical_node *, struct buffer_head *);
+static int ldev_mgr_ioctl(struct evms_logical_node *,
+			  struct inode *,
+			  struct file *, unsigned int, unsigned long);
+static int ldev_init_io(struct evms_logical_node *,
+			int, u64, u64, void *);
+static int ldev_mgr_direct_ioctl(struct inode *,
+				 struct file *, unsigned int, unsigned long);
+
+/* plugin function table definition */
+static struct evms_plugin_fops fops = {
+	.discover	= discover_disks,
+	.delete		= ldev_mgr_delete,
+	.read		= ldev_mgr_read,
+	.write		= ldev_mgr_write,
+	.init_io	= ldev_init_io,
+	.ioctl		= ldev_mgr_ioctl,
+	.direct_ioctl	= ldev_mgr_direct_ioctl
+};
+
+/* plugin header definition */
+static struct evms_plugin_header plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_DEVICE_MANAGER,
+			  EVMS_LOCAL_DEVICE_MANAGER_ID),
+	.version = {
+		.major		= 1,
+		.minor		= 1,
+		.patchlevel	= 1
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &fops
+};
+
+#define TYPE_NONE	0
+#define TYPE_GENERIC	1
+#define TYPE_IDE	2
+#define TYPE_SCSI	3
+
+#define INDEX_ALPHA	0
+#define INDEX_NUMERIC	1
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Discover function & Support routines            */
+/********************************************************/
+
+#define MAX_NAME_BASE_SIZE	10
+#define MAX_NAME_MODIFIER_SIZE	4
+/**
+ * struct blk_device_info - block device info
+ * @devnode_name_base: base name (ie. hd or sd) for device
+ * @null1: guaranteed end-of-string NULL
+ * @devnode_name_modifier: name suffix (ie. ag for sdag) for device
+ * @null2: guaranteed end-of-string NULL
+ * @devnode_name_index: numeric device index (ie. 1 for hda1)
+ * @devnode_name_type: indicates numeric or alpha modifier
+ * @devnode_type: device type, IDE, SCSI, or GENERIC
+ *
+ * generic block device naming descriptor structure
+ **/
+struct blk_device_info {
+	char devnode_name_base[MAX_NAME_BASE_SIZE];
+	char null1;
+	char devnode_name_modifier[MAX_NAME_MODIFIER_SIZE];
+	char null2;
+	int devnode_name_index;
+	int devnode_name_type;
+	int device_type;
+};
+
+static struct blk_device_info *blk_dev_info = NULL;
+
+#define BLK_DEV_INFO(a,b,c,d,e) 				\
+	strncpy(blk_dev_info[a].devnode_name_base, b, MAX_NAME_BASE_SIZE); 	\
+	blk_dev_info[a].null1 = 0; 				\
+	strncpy(blk_dev_info[a].devnode_name_modifier, c, MAX_NAME_MODIFIER_SIZE); 	\
+	blk_dev_info[a].null2 = 0; 				\
+	blk_dev_info[a].devnode_name_index = 0;  		\
+	blk_dev_info[a].device_type = d;     			\
+	blk_dev_info[a].devnode_name_type = e;
+
+static void
+init_blk_dev_info(struct blk_device_info *blk_dev_info)
+{
+	BLK_DEV_INFO(IDE0_MAJOR, "hd", "a", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE1_MAJOR, "hd", "c", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE2_MAJOR, "hd", "e", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE3_MAJOR, "hd", "g", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE4_MAJOR, "hd", "i", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE5_MAJOR, "hd", "k", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE6_MAJOR, "hd", "m", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE7_MAJOR, "hd", "o", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE8_MAJOR, "hd", "q", TYPE_IDE, INDEX_ALPHA);
+	BLK_DEV_INFO(IDE9_MAJOR, "hd", "s", TYPE_IDE, INDEX_ALPHA);
+
+	BLK_DEV_INFO(SCSI_DISK0_MAJOR, "sd", "a", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK1_MAJOR, "sd", "q", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK2_MAJOR, "sd", "ag", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK3_MAJOR, "sd", "aw", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK4_MAJOR, "sd", "bm", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK5_MAJOR, "sd", "cc", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK6_MAJOR, "sd", "cs", TYPE_SCSI, INDEX_ALPHA);
+	BLK_DEV_INFO(SCSI_DISK7_MAJOR, "sd", "di", TYPE_SCSI, INDEX_ALPHA);
+
+	BLK_DEV_INFO(XT_DISK_MAJOR, "xd", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(CYCLADES_MAJOR, "double", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+
+	BLK_DEV_INFO(MFM_ACORN_MAJOR, "mfm", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(ACSI_MAJOR, "ad", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(PS2ESDI_MAJOR, "ed", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(40, "ez", "a", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(43, "nb", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(44, "ftl", "a", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(45, "pd", "a", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(47, "pf", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(DAC960_MAJOR + 0, "rd/c0d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 1, "rd/c1d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 2, "rd/c2d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 3, "rd/c3d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 4, "rd/c4d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 5, "rd/c5d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 6, "rd/c6d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(DAC960_MAJOR + 7, "rd/c7d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR, "ida/c0d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR1, "ida/c1d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR2, "ida/c2d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR3, "ida/c3d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR4, "ida/c4d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR5, "ida/c5d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR6, "ida/c6d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+	BLK_DEV_INFO(COMPAQ_SMART2_MAJOR7, "ida/c7d", "0", TYPE_GENERIC,
+		     INDEX_NUMERIC);
+
+	BLK_DEV_INFO(I2O_MAJOR + 0, "i2o/hd", "a", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 1, "i2o/hd", "q", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 2, "i2o/hd", "ag", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 3, "i2o/hd", "aw", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 4, "i2o/hd", "bm", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 5, "i2o/hd", "cc", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 6, "i2o/hd", "cs", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(I2O_MAJOR + 7, "i2o/hd", "di", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(92, "ppdd", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(93, "nftl", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(DASD_MAJOR, "dasd", "a", TYPE_GENERIC, INDEX_ALPHA);
+	BLK_DEV_INFO(MDISK_MAJOR, "mdisk", "a", TYPE_GENERIC, INDEX_ALPHA);
+
+	BLK_DEV_INFO(96, "msd", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(97, "pktcdvd", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(UBD_MAJOR, "ubd", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(JSFD_MAJOR, "jsfd", "", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(101, "amiraid/ar", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(104, "cciss/c0d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(105, "cciss/c1d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(106, "cciss/c2d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(107, "cciss/c3d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(108, "cciss/c4d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(108, "cciss/c5d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(110, "cciss/c6d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(111, "cciss/c7d", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(RAW_MAJOR, "raw", "0", TYPE_GENERIC, INDEX_NUMERIC);
+
+	BLK_DEV_INFO(VXVM_MAJOR, "vx/dsk", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(VXDMP_MAJOR, "vx/dmp", "0", TYPE_GENERIC, INDEX_NUMERIC);
+	BLK_DEV_INFO(LOOP_MAJOR, "loop", "0", TYPE_GENERIC, INDEX_NUMERIC);
+}
+
+static int
+is_in_device_list(struct gendisk *gd, int major, int minor)
+{
+	int found, done, rc;
+	struct evms_logical_node *device = NULL;
+	struct ldev_private *ldev_prv;
+
+	done = found = FALSE;
+	while (done == FALSE) {
+		rc = evms_cs_find_next_device(device, &device);
+		if (rc || !device)
+			done = TRUE;
+		else {
+			ldev_prv = device->private;
+			if (ldev_prv->gd == gd)
+				if (ldev_prv->major == major)
+					if (ldev_prv->minor == minor)
+						done = found = TRUE;
+		}
+	}
+	return (found);
+}
+
+static void
+build_devnode_name(char *name_buf, int major)
+{
+	char buf[11], *modifier, *buf_ptr;
+	int int_mod, done;
+	struct blk_device_info *bdi;
+
+	bdi = &blk_dev_info[major];
+
+	/* convert the base name modifier to an integer */
+	modifier = bdi->devnode_name_modifier;
+	int_mod = 0;
+	while (*modifier) {
+		if (bdi->devnode_name_type == INDEX_ALPHA) {
+			int_mod *= 26;
+			int_mod += *modifier - 'a';
+		} else {
+			int_mod *= 10;
+			int_mod += *modifier - '0';
+		}
+		modifier++;
+		if (*modifier) {
+			int_mod++;
+		}
+	}
+	/* add in device_index_value */
+	int_mod += bdi->devnode_name_index;
+	bdi->devnode_name_index++;
+
+	/* convert integer modifier back to ALPHA/NUMERIC chars */
+	memset(buf, 0, sizeof (buf));
+	/* fill the buffer from the rear to front with the
+	 * ascii version of the modifier, leaving space for
+	 * NULL terminator at the end.
+	 */
+	buf_ptr = &buf[sizeof (buf) - 2];
+	done = FALSE;
+	do {
+		if (bdi->devnode_name_type == INDEX_ALPHA) {
+			*buf_ptr = (int_mod % 26) + 'a';
+			int_mod /= 26;
+		} else {
+			*buf_ptr = (int_mod % 10) + '0';
+			int_mod /= 10;
+		}
+		if (int_mod) {
+			int_mod--;
+		} else {
+			done = TRUE;
+		}
+		buf_ptr--;
+	} while (!done);
+
+	/* find beginning of modifier in buffer */
+	modifier = buf;
+	while (!*modifier)
+		modifier++;
+
+	/* build the final device devnode name */
+	sprintf(name_buf, "%s%s", bdi->devnode_name_base, modifier);
+}
+
+static int
+ldev_mgr_lock_device(struct ldev_private *ldev_prv)
+{
+	int rc;
+	struct block_device *bdev;
+
+	bdev = bdget(MKDEV(ldev_prv->major, ldev_prv->minor));
+	if (!bdev)
+		return -ENOMEM;
+	rc = blkdev_get(bdev, FMODE_READ | FMODE_WRITE, 0, BDEV_RAW);
+	if (rc)
+		return rc;
+	ldev_prv->bdev = bdev;
+	return 0;
+}
+
+static void
+ldev_mgr_unlock_device(struct ldev_private *ldev_prv)
+{
+	struct block_device *bdev = ldev_prv->bdev;
+	ldev_prv->bdev = NULL;
+	if (!bdev) {
+		LOG_ERROR("error: NULL bdev field detected!\n");
+		BUG();
+	}
+	blkdev_put(bdev, BDEV_RAW);
+}
+
+#define DEVICE_KNOWN			1234
+#define DEVICE_UNINITIALIZED		1235
+#define DEVICE_MEDIA_NOT_PRESENT 	1236
+static int
+create_logical_disk(struct evms_logical_node **disk_list,
+		    struct gendisk *gd, int device_index)
+{
+	int rc = 0, major, minor;
+	struct evms_logical_node *new_disk = NULL;
+	struct ldev_private *ldev_prv = NULL;
+	char device_name[EVMS_VOLUME_NAME_SIZE + 1];
+
+	major = gd->major;
+	minor = device_index << gd->minor_shift;
+
+	/* skip uninitialized devices */
+	if (!blk_size[major])
+		rc = DEVICE_UNINITIALIZED;
+	else if (!blk_size[major][minor])
+		rc = DEVICE_UNINITIALIZED;
+	if (!rc) {
+		/* construct the devnode name for this device */
+		build_devnode_name(device_name, major);
+
+		/* skip devices we already know about */
+		if (is_in_device_list(gd, major, minor) == TRUE)
+			rc = DEVICE_KNOWN;
+	}
+	/* allocate the new node */
+	if (!rc) {
+		rc = evms_cs_allocate_logical_node(&new_disk);
+	}
+	/* allocate new nodes's instance data */
+	if (!rc) {
+		ldev_prv = kmalloc(sizeof(struct ldev_private), GFP_KERNEL);
+		if (!ldev_prv)
+			rc = -ENOMEM;
+	}
+	/* initialize the new node */
+	if (!rc) {
+		memset(ldev_prv, 0, sizeof(struct ldev_private));
+		new_disk->plugin = &plugin_header;
+
+		/* initialize the instance data */
+		new_disk->private = ldev_prv;
+		ldev_prv->gd = gd;
+		ldev_prv->major = major;
+		ldev_prv->minor = minor;
+		rc = ldev_mgr_lock_device(ldev_prv);
+		if (rc) {
+			LOG_ERROR("error(%d): unable to lock device(%d,%d)!\n",
+				  rc, major, minor);
+		}
+	}
+	if (!rc) {
+		/* determine hardsector size */
+		new_disk->hardsector_size = 512;
+		if (hardsect_size[major]) {
+			new_disk->hardsector_size = hardsect_size[major][minor];
+		}
+		/* save the block size */
+		new_disk->block_size = 1024;
+		if (blksize_size[major]) {
+			new_disk->block_size = blksize_size[major][minor];
+		}
+		/* obtain the device size in sectors
+		 *
+		 * try 64bit size first, if that fails
+		 * fall back on the 32bit size.
+		 */
+		/* try 64bit size */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
+		rc = evms_cs_kernel_ioctl(new_disk, BLKGETSIZE64,
+					  (ulong) & new_disk->total_vsectors);
+		if (!rc) {
+			/* convert bytes to 512 byte sectors */
+			new_disk->total_vsectors >>= EVMS_VSECTOR_SIZE_SHIFT;
+		} else
+#endif
+		{
+			/* try 32bit size */
+			ulong dev_size = 0;
+			rc = evms_cs_kernel_ioctl(new_disk, BLKGETSIZE,
+						  (ulong) & dev_size);
+			new_disk->total_vsectors = dev_size;
+		}
+		if (!rc && !new_disk->total_vsectors) {
+			rc = -ENOSPC;
+		}
+	}
+	if (!rc) {
+		/* remember removable devices */
+		if (gd->flags)
+			if (gd->flags[device_index] & GENHD_FL_REMOVABLE)
+				new_disk->flags |= EVMS_DEVICE_REMOVABLE;
+
+		/* save the devnode name for this device */
+		strcpy(new_disk->name, device_name);
+
+		/* register this device with evms */
+		evms_cs_register_device(new_disk);
+		MOD_INC_USE_COUNT;
+
+		/* append this record the linked list */
+		evms_cs_add_logical_node_to_list(disk_list, new_disk);
+		LOG_DETAILS
+		    ("added logical disk(%s) for physical disk(%u,%u,%s), size("PFU64") in 512 byte units\n",
+		     new_disk->name, major, minor, new_disk->name,
+		     new_disk->total_vsectors);
+
+	}
+	/* reset the "benign" error codes for the caller */
+	switch (rc) {
+	case DEVICE_UNINITIALIZED:
+	case DEVICE_KNOWN:
+	case DEVICE_MEDIA_NOT_PRESENT:
+		rc = 0;
+	case 0:
+		break;
+	default:
+		LOG_ERROR
+		    ("error(%d): creating logical disk for device(%d,%d).\n",
+		     rc, major, minor);
+		if (new_disk) {
+			evms_cs_deallocate_logical_node(new_disk);
+		}
+		if (ldev_prv) {
+			kfree(ldev_prv);
+		}
+		break;
+	}
+	return (rc);
+}
+
+static int
+create_logical_generic_disks(struct evms_logical_node **disk_list,
+			     struct gendisk *gd)
+{
+	int rc, i;
+
+	/* This is a generic device */
+
+	rc = 0;
+	LOG_DEBUG("major name = %s\n", gd->major_name);
+	LOG_DEBUG("number of real devices = %i\n", gd->nr_real);
+	for (i = 0; i < gd->nr_real; i++) {
+		LOG_DEBUG("device %d:\n", i);
+		rc = create_logical_disk(disk_list, gd, i);
+		if (rc)
+			break;
+	}
+	return (rc);
+}
+
+static int
+create_logical_ide_disks(struct evms_logical_node **disk_list,
+			 struct gendisk *gd)
+{
+	int rc = 0, i;
+	ide_hwif_t *ide_hwif;
+	ide_drive_t *drive;
+
+	/* This is an IDE device */
+	LOG_DEBUG("found IDE major : %i - searching for disks\n", gd->major);
+
+	ide_hwif = gd->real_devices;	/* IDE internal data */
+	for (i = 0; i < MAX_DRIVES; i++) {
+		drive = &(ide_hwif->drives[i]);
+		if (drive->present && (drive->media == ide_disk)) {
+			/* force the name index value on ide drives */
+			blk_dev_info[gd->major].devnode_name_index = i;
+			rc = create_logical_disk(disk_list, gd, i);
+		}
+		if (rc)
+			break;
+	}
+	return (rc);
+}
+
+static int
+create_logical_scsi_disks(struct evms_logical_node **disk_list,
+			  struct gendisk *gd)
+{
+	int rc = 0, i;
+	Scsi_Disk *SDisks;
+	Scsi_Device *SDev;
+
+	/* This is an SCSI device */
+	LOG_DEBUG("found SCSI major : %i - searching for disks\n", gd->major);
+	LOG_DEBUG("scsi: major name = %s\n", gd->major_name);
+	LOG_DEBUG("scsi: number of real devices = %i\n", gd->nr_real);
+	SDisks = gd->real_devices;	/* SCSI internal data */
+	for (i = 0; i < gd->nr_real; i++) {
+		SDev = SDisks[i].device;
+		LOG_DEBUG
+		    ("scsi: Channel = %i, Id = %i, Lun = %i, Capacity = %i\n",
+		     SDev->channel, SDev->id, SDev->lun, SDisks[i].capacity);
+		rc = create_logical_disk(disk_list, gd, i);
+		if (rc)
+			break;
+	}
+	return (rc);
+}
+
+static int
+create_logical_disks(struct gendisk *gd, void *p_disk_list)
+{
+	int rc = 0;
+	struct evms_logical_node **disk_list = p_disk_list;
+
+	/* create logical disks from all IDE & SCSI devices */
+	switch (blk_dev_info[gd->major].device_type) {
+	case TYPE_IDE:
+		rc = create_logical_ide_disks(disk_list, gd);
+		break;
+	case TYPE_SCSI:
+		rc = create_logical_scsi_disks(disk_list, gd);
+		break;
+	case TYPE_GENERIC:
+		rc = create_logical_generic_disks(disk_list, gd);
+		break;
+	default:
+		LOG_DEBUG("unrecognized device major : %i\n", gd->major);
+		break;
+	}
+
+	return (rc);
+}
+
+static int
+discover_disks(struct evms_logical_node **disk_list)
+{
+	int rc = 0;
+
+	MOD_INC_USE_COUNT;
+	LOG_ENTRY_EXIT("%s Entry\n", __FUNCTION__);
+
+	if (blk_dev_info == NULL) {
+		/* allocate space for device info array */
+		blk_dev_info = kmalloc(sizeof (struct blk_device_info)
+				       * (MAX_BLKDEV + 1), GFP_KERNEL);
+		if (blk_dev_info) {
+			/* initialize device info array */
+			memset(blk_dev_info, 0,
+			       sizeof (struct blk_device_info) * (MAX_BLKDEV + 1));
+			init_blk_dev_info(blk_dev_info);
+		} else {
+			rc = -ENOMEM;
+		}
+	}
+	if (!rc)
+		/* create logical disks from the raw devices */
+		rc = walk_gendisk(create_logical_disks, disk_list);
+
+	/* free blk_dev_info table and null the ptr to it */
+	kfree(blk_dev_info);
+	blk_dev_info = NULL;
+
+	LOG_ENTRY_EXIT("%s Exit\n", __FUNCTION__);
+	MOD_DEC_USE_COUNT;
+	return (rc);
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Delete function                                 */
+/********************************************************/
+
+static int
+ldev_mgr_delete(struct evms_logical_node *disk)
+{
+	struct ldev_private *ldev_prv;
+
+	/* reset any evms volume related info from
+	 * the device node, because we can't predict
+	 * how this node will be used in the future.
+	 */
+
+	/* removed the feature header if its been used
+	 */
+	if (disk->feature_header) {
+		kfree(disk->feature_header);
+		disk->feature_header = NULL;
+	}
+	/* remove the volume_info structure and flag
+	 * if this has been used directly by an evms
+	 * feature.
+	 */
+	evms_cs_deallocate_volume_info(disk);
+	/* reset the flags field to the appropriate state
+	 */
+	disk->flags &= ~EVMS_VOLUME_FLAG;
+
+	/* disk nodes only get deleted when:
+	 * 1)  there are no references to the disk node
+	 *      in memory.
+	 * 2)  the device is removable
+	 * 3)  the device reported a media change
+	 *
+	 * All three of these conditions must be true
+	 * before the disk node can be deleted. 
+	 * evms_check_for_device_changes should set
+	 * and ensure these conditions before issuing
+	 * deletes.
+	 *
+	 * Newly installed removable media will be
+	 * picked up in this modules discover code.
+	 *
+	 * OR disk nodes can will be deleted if the
+	 * devices they represent go away, for example
+	 * in the case of a hotunplugged device or a
+	 * required driver having been unloaded.
+	 */
+	if (disk->flags & (EVMS_MEDIA_CHANGED | EVMS_DEVICE_UNAVAILABLE)) {
+		LOG_DETAILS("deleting '%s'.\n", disk->name);
+
+		evms_cs_unregister_device(disk);
+		MOD_DEC_USE_COUNT;
+		ldev_prv = disk->private;
+		ldev_mgr_unlock_device(ldev_prv);
+		if (ldev_prv) {
+			kfree(ldev_prv);
+		}
+		evms_cs_deallocate_logical_node(disk);
+	}
+	return 0;
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Read function                                   */
+/********************************************************/
+
+/* 
+ * function: ldev_mgr_io_error
+ * 
+ * this function was primarily created because the function
+ * buffer_IO_error is inline and kgdb doesn't allow breakpoints
+ * to be set on inline functions. Since this was an error path
+ * and not mainline, I decided to add a trace statement to help
+ * report on the failing condition.
+ *
+ */
+static void
+ldev_mgr_io_error(struct evms_logical_node *disk, int io_flag, struct buffer_head *bh, int rc)
+{
+	if (rc == -EOVERFLOW) {
+		LOG_SERIOUS
+		    ("attempt to %s beyond boundary("PFU64") on (%s), rsector(%ld).\n",
+		     (io_flag) ? "WRITE" : "READ", disk->total_vsectors - 1,
+		     disk->name, bh->b_rsector);
+	} else if (rc == -ENXIO) {
+		LOG_SERIOUS("attempt to access a non-existent device(%s).\n",
+			    disk->name);
+	}
+	bh->b_end_io(bh, 0);
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Read function                                   */
+/********************************************************/
+
+static void
+ldev_mgr_read(struct evms_logical_node *disk, struct buffer_head *bh)
+{
+	int rc = 0;
+	request_queue_t *q;
+	struct ldev_private *ldev_prv;
+
+	ldev_prv = disk->private;
+	if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <=
+	    disk->total_vsectors) {
+		bh->b_rdev = MKDEV(ldev_prv->major, ldev_prv->minor);
+		q = blk_get_queue(bh->b_rdev);
+		if (q) {
+			disk->flags &= ~EVMS_DEVICE_UNAVAILABLE;
+			q->make_request_fn(q, READ, bh);
+			return;
+		} else {
+			rc = -ENXIO;
+			disk->flags |= EVMS_DEVICE_UNAVAILABLE;
+		}
+	} else {
+		rc = -EOVERFLOW;
+	}
+	if (rc) {
+		ldev_mgr_io_error(disk, READ, bh, rc);
+	}
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Write function                                  */
+/********************************************************/
+
+static void
+ldev_mgr_write(struct evms_logical_node *disk, struct buffer_head *bh)
+{
+	int rc = 0;
+	request_queue_t *q;
+	struct ldev_private *ldev_prv;
+
+	ldev_prv = disk->private;
+	if (bh->b_rsector + (bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT) <=
+	    disk->total_vsectors) {
+		bh->b_rdev = MKDEV(ldev_prv->major, ldev_prv->minor);
+		q = blk_get_queue(bh->b_rdev);
+		if (q) {
+			disk->flags &= ~EVMS_DEVICE_UNAVAILABLE;
+			q->make_request_fn(q, WRITE, bh);
+			return;
+		} else {
+			rc = -ENXIO;
+			disk->flags |= EVMS_DEVICE_UNAVAILABLE;
+		}
+	} else {
+		rc = -EOVERFLOW;
+	}
+	if (rc) {
+		ldev_mgr_io_error(disk, WRITE, bh, rc);
+	}
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      Init_io function & Support routines             */
+/********************************************************/
+
+/*
+ * function: allocate_bh
+ *
+ * This function obtains a buffer head from the private 
+ * buffer head pool (pre-allocated at EVMS initial 
+ * discovery time). 
+ *
+ * NOTE: All access to the buffer head pool are protected 
+ * by a private spinlock.
+ *
+ */
+static inline struct buffer_head *
+allocate_bh(void)
+{
+	struct buffer_head *bh =
+	    evms_cs_allocate_from_pool(evms_bh_pool, FALSE);
+	if (bh) {
+		init_waitqueue_head(&bh->b_wait);
+	}
+	return (bh);
+}
+
+/*
+ * function: deallocate_bh
+ *
+ * This function returns a buffer head to the private 
+ * buffer head pool (pre-allocated at EVMS initial 
+ * discovery time). 
+ *
+ * NOTE: All access to the buffer head pool are protected 
+ * by a private spinlock.
+ *
+ */
+static inline void
+deallocate_bh(struct buffer_head *bh)
+{
+	evms_cs_deallocate_to_pool(evms_bh_pool, bh);
+}
+
+/* this is the buffer head control block structure definition */
+typedef struct bh_cb_s {
+	int rc;
+	atomic_t blks_allocated;
+	wait_queue_head_t cb_wait;
+} bh_cb_t;
+
+/*
+ * function: __wait_on_bh_cb
+ *
+ * This is a worker function to wait_on_bh_cb.
+ * This function waits for a set of private buffer heads
+ * associated to the specified buffer head control block
+ * to return from I/O completion. On completion of the
+ * last buffer head, the calling function is awakened
+ * and continues running.
+ *
+ * This is the worker function to the function wait_on_bh_cb.
+ *
+ */
+static void
+__wait_on_bh_cb(bh_cb_t * bh_cb)
+{
+	struct task_struct *tsk = current;
+	DECLARE_WAITQUEUE(wait, tsk);
+
+	add_wait_queue(&bh_cb->cb_wait, &wait);
+	do {
+		run_task_queue(&tq_disk);
+		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		if (!atomic_read(&bh_cb->blks_allocated))
+			break;
+		schedule();
+	} while (atomic_read(&bh_cb->blks_allocated));
+#ifdef O1_SCHEDULER
+	set_task_state(tsk, TASK_RUNNING);
+#else
+	tsk->state = TASK_RUNNING;
+#endif
+	remove_wait_queue(&bh_cb->cb_wait, &wait);
+}
+
+/*
+ * function: wait_on_bh_cb
+ *
+ * This function waits for a set of private buffer heads
+ * associated to the specified buffer head control block
+ * to return from I/O completion. On completion of the
+ * last buffer head, the calling function is awakened
+ * and continues running.
+ *
+ */
+static void
+wait_on_bh_cb(bh_cb_t * bh_cb)
+{
+	if (atomic_read(&bh_cb->blks_allocated))
+		__wait_on_bh_cb(bh_cb);
+	else
+		/* if we ended up with no buffer heads on
+		 * this pass, lets wait a until a few buffer
+		 * heads have been freed and try again. This
+		 * should provide a reasonable delay.
+		 */
+		schedule();
+}
+
+/*
+ * function: end_bh_cb_io
+ *
+ * This is the I/O completion function that is called for
+ * each private buffer head obtained from the buffer head 
+ * pool. Control is return thru this routine so we can track
+ * all outstanding requests to know when to awaken the caller,
+ * and to regain control after all I/Os have been performed.
+ *
+ */
+static void
+end_bh_cb_io_sync(struct buffer_head *bh, int uptodate)
+{
+	bh_cb_t *bh_cb = (bh_cb_t *) bh->b_private;
+
+	/* record that errors occurred */
+	if (!uptodate) {
+		bh_cb->rc = -EIO;
+	}
+	mark_buffer_uptodate(bh, uptodate);
+	unlock_buffer(bh);
+
+	deallocate_bh(bh);
+	atomic_dec(&bh_cb->blks_allocated);
+	if (!atomic_read(&bh_cb->blks_allocated))
+		if (waitqueue_active(&bh_cb->cb_wait))
+			wake_up(&bh_cb->cb_wait);
+}
+
+/*
+ * function: ldev_partial_sector_init_io
+ *
+ * This function is a support function for ldev_init_io,
+ * which handles the cases of performing I/O to only a part
+ * of non-standard sized hardsector. This function is not 
+ * designed to be called directly, but via ldev_init_io.
+ *
+ */
+static int
+ldev_partial_sector_init_io(struct evms_logical_node *node,
+			    int io_flag,
+			    bh_cb_t * bh_cb,
+			    u64 next_lsn,
+			    u64 sector_lsn,
+			    u64 io_size,
+			    void *bufptr, unsigned char **sector_buf)
+{
+	int rc = 0;
+	struct ldev_private *ldev_prv = node->private;
+	kdev_t dev = MKDEV(ldev_prv->major, ldev_prv->minor);
+	struct buffer_head *bh;
+
+	if (*sector_buf == NULL) {
+		/* allocate buffer for incoming sector */
+		*sector_buf = kmalloc(node->hardsector_size, GFP_KERNEL);
+		if (!*sector_buf)
+			return -ENOMEM;
+	}
+	/* allocate a buffer head from the pool */
+	while ((bh = allocate_bh()) == NULL)
+		/* yielding the cpu is playing it
+		 * safe. it might be wiser to just
+		 * spin. requires more thought.
+		 */
+		schedule();
+
+	/* set up the buffer head for this sector */
+	bh->b_end_io = end_bh_cb_io_sync;
+	bh->b_size = node->hardsector_size;
+	bh->b_rdev = dev;
+	bh->b_rsector = next_lsn - sector_lsn;
+	bh->b_data = *sector_buf;
+	bh->b_page = virt_to_page(*sector_buf);	/* this isn't handling the case of a block with more than 1 sector, that spans pages */
+	bh->b_state = 0;
+	set_bit(BH_Dirty, &bh->b_state);
+	set_bit(BH_Lock, &bh->b_state);
+	set_bit(BH_Req, &bh->b_state);
+	set_bit(BH_Mapped, &bh->b_state);
+	bh->b_private = (void *) bh_cb;
+	atomic_inc(&bh_cb->blks_allocated);
+
+	/* drive the buffer head down   */
+	/* to the device                */
+	generic_make_request(READ, bh);
+
+	/* wait for all bh's I/O's to end */
+	wait_on_bh_cb(bh_cb);
+
+	/* copy data to/from user */
+	if (io_flag != WRITE)
+		/* READ */
+		memcpy(bufptr,
+		       *sector_buf + (sector_lsn << EVMS_VSECTOR_SIZE_SHIFT),
+		       io_size << EVMS_VSECTOR_SIZE_SHIFT);
+	else {
+		/* WRITE */
+		memcpy(*sector_buf + (sector_lsn << EVMS_VSECTOR_SIZE_SHIFT),
+		       bufptr, io_size << EVMS_VSECTOR_SIZE_SHIFT);
+
+		/* allocate a buffer head from the pool */
+		while ((bh = allocate_bh()) == NULL)
+			/* yielding the cpu is playing it
+			 * safe. it might be wiser to just
+			 * spin. requires more thought.
+			 */
+			schedule();
+
+		/* set up the buffer head for this sector */
+		bh->b_end_io = end_bh_cb_io_sync;
+		bh->b_size = node->hardsector_size;
+		bh->b_rdev = dev;
+		bh->b_rsector = next_lsn - sector_lsn;
+		bh->b_data = *sector_buf;
+		bh->b_page = virt_to_page(*sector_buf);	/* this isn't handling the case of a block with more than 1 sector, that spans pages */
+		bh->b_state = 0;
+		set_bit(BH_Dirty, &bh->b_state);
+		set_bit(BH_Lock, &bh->b_state);
+		set_bit(BH_Req, &bh->b_state);
+		set_bit(BH_Mapped, &bh->b_state);
+		bh->b_private = (void *) bh_cb;
+		atomic_inc(&bh_cb->blks_allocated);
+
+		/* drive the buffer head down   */
+		/* to the device                */
+		generic_make_request(WRITE, bh);
+
+		/* wait for all bh's I/O's to end */
+		wait_on_bh_cb(bh_cb);
+	}
+	return (rc);
+}
+
+/*
+ * function: ldev_init_io
+ *
+ * This function provides support for synchronous I/O 
+ * operations to the underlying devices. These I/O 
+ * operations are NOT buffered in any way including the 
+ * operating system's buffer cache.
+ *
+ * This function can work with any hardsector size that
+ * is a power of 2.
+ *
+ * node           : logical node of the target logical disk
+ * io_flag        : 0 = read, 1 = write, 2 = read-a-head
+ * starting_lsn   : the 0-based (disk relative) logical
+ *		  :  (512 byte) sector number (lsn) 
+ * num_lsns       : the total number of lsns in this I/O
+ * bufptr         : address of the memory to read/write the data
+ *
+ */
+static int
+ldev_init_io(struct evms_logical_node *node,
+	     int io_flag,
+	     u64 starting_lsn, u64 num_lsns, void *bufptr)
+{
+	int rc = 0, lsns_per_hardsector, lsns_per_blocksize;
+	unchar *sector_buf = NULL, *cur_bufptr;
+	u64 next_lsn, remaining_lsns, sector_lsn;
+	struct ldev_private *ldev_prv = node->private;
+	kdev_t dev = MKDEV(ldev_prv->major, ldev_prv->minor);
+	bh_cb_t bh_cb;
+
+	LOG_EVERYTHING
+	    ("%s Entry: Disk(%u,%u), ioflag(%u), start_lsn("PFU64"), num_lsns("PFU64"), bufptr(0x%p)\n",
+	     __FUNCTION__, ldev_prv->major, ldev_prv->minor, io_flag,
+	     starting_lsn, num_lsns, bufptr);
+
+	/* check for valid device */
+	if (!blk_size[ldev_prv->major][ldev_prv->minor]) {
+		node->flags |= EVMS_DEVICE_UNAVAILABLE;
+		return (-ENXIO);
+	}
+	/* check for 0 length request */
+	if (num_lsns == 0) {
+		LOG_ERROR("%s: error requesting 0 sectors.\n", __FUNCTION__);
+		return (-EINVAL);
+	}
+	/* check for out of bound request */
+	if ((starting_lsn + num_lsns) > node->total_vsectors) {
+		LOG_ERROR
+		    ("%s: attempted %s beyond logical disk boundary("PFU64" LSNs), requesting LSN("PFU64"), total LSNs("PFU64").\n",
+		     __FUNCTION__, (io_flag == WRITE) ? "WRITE" : "READ",
+		     node->total_vsectors, starting_lsn, num_lsns);
+		return (-EINVAL);
+	}
+	/* check for invalid io_flag value */
+	switch (io_flag) {
+	case READ:		/* read...   */
+	case WRITE:		/* write...  */
+	case READA:		/* reada...  */
+		break;
+	default:
+		return (-EINVAL);
+	}
+
+	/* compute some per device info once up-front */
+	lsns_per_hardsector = node->hardsector_size / EVMS_VSECTOR_SIZE;
+	lsns_per_blocksize = node->block_size / EVMS_VSECTOR_SIZE;
+
+	/* initialize the buffer head control block */
+	memset(&bh_cb, 0, sizeof (bh_cb_t));
+	init_waitqueue_head(&bh_cb.cb_wait);
+	bh_cb.blks_allocated = (atomic_t)ATOMIC_INIT(0);
+
+	/* only update the local copy of variables */
+	cur_bufptr = bufptr;
+	next_lsn = starting_lsn;
+	remaining_lsns = num_lsns;
+
+	/* check for a mid-sector starting offset
+	 *
+	 * if found, perform I/O on part of that
+	 * sector
+	 */
+	sector_lsn = next_lsn & (lsns_per_hardsector - 1);
+	if (sector_lsn) {
+		u64 io_size;
+
+		/* determine bytes in IO to this sector */
+		io_size = lsns_per_hardsector - sector_lsn;
+		if (io_size > remaining_lsns)
+			io_size = remaining_lsns;
+
+		/* perform the partial sector io */
+		rc = ldev_partial_sector_init_io(node, io_flag, &bh_cb,
+						 next_lsn,
+						 sector_lsn, io_size,
+						 cur_bufptr, &sector_buf);
+
+		if (!rc) {
+			/* update progress in local variables */
+			cur_bufptr += io_size << EVMS_VSECTOR_SIZE_SHIFT;
+			next_lsn += io_size;
+			remaining_lsns -= io_size;
+		}
+	}
+
+	/* continue if no errors found */
+	if (!rc) {
+		/* perform I/O on all the complete sectors
+		 * in this request.
+		 *
+		 * loop until there are no more complete sectors
+		 * to process.
+		 */
+		while (remaining_lsns >= lsns_per_hardsector) {
+			/* this inner loop attempts to drive as many
+			 * bytes (in sector size multiples) down to 
+			 * the device as possible using the available
+			 * buffer heads in the pool.
+			 */
+			while (remaining_lsns >= lsns_per_hardsector) {
+				struct buffer_head *bh;
+
+				/* allocate a buffer head from the pool */
+				bh = allocate_bh();
+				if (bh == NULL)
+					break;
+
+				/* set up the buffer head for this I/O */
+				bh->b_end_io = end_bh_cb_io_sync;
+				bh->b_size =
+				    (remaining_lsns >= lsns_per_blocksize) ?
+				    node->block_size : node->hardsector_size;
+				bh->b_data = cur_bufptr;
+				bh->b_rdev = dev;
+				bh->b_rsector = next_lsn;
+				bh->b_page = virt_to_page(cur_bufptr);	/* this isn't handling the case of a block with more than 1 sector, that spans pages */
+				bh->b_state = 0;
+				set_bit(BH_Dirty, &bh->b_state);
+				set_bit(BH_Lock, &bh->b_state);
+				set_bit(BH_Req, &bh->b_state);
+				set_bit(BH_Mapped, &bh->b_state);
+				bh->b_private = (void *) &bh_cb;
+				atomic_inc(&bh_cb.blks_allocated);
+
+				/* drive the buffer head down   */
+				/* to the device                */
+				generic_make_request(io_flag, bh);
+
+				/* update progress in local variables */
+				cur_bufptr += bh->b_size;
+				next_lsn +=
+				    bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+				remaining_lsns -=
+				    bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+			}
+			/* wait for all bh's I/O's to end */
+			wait_on_bh_cb(&bh_cb);
+		}
+	}
+
+	/* continue if no errors found */
+	if (!rc)
+		/* check for a mid-sector ending offset
+		 *
+		 * if found, perform I/O on part of that
+		 * sector
+		 */
+		if (remaining_lsns)
+			/* perform the partial sector io */
+			rc = ldev_partial_sector_init_io(node, io_flag, &bh_cb,
+							 next_lsn,
+							 0, remaining_lsns,
+							 cur_bufptr,
+							 &sector_buf);
+
+	/* free the sector buffer if it was allocated */
+	if (sector_buf)
+		kfree(sector_buf);
+
+	/* coalesce return codes */
+	rc |= bh_cb.rc;
+
+	LOG_EVERYTHING("%s Exit: rc(%u)\n", __FUNCTION__, rc);
+
+	return (rc);
+}
+
+static int
+ldev_mgr_direct_ioctl(struct inode *inode,
+		      struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = 0;
+	struct ldev_private *ldev_prv;
+	struct evms_plugin_ioctl_pkt tmp, *user_parms;
+	struct ldev_plugin_ioctl pi_data;
+	struct evms_logical_node *disk;
+
+	MOD_INC_USE_COUNT;
+
+	user_parms = (struct evms_plugin_ioctl_pkt *) arg;
+	/* copy user's parameters to kernel space */
+	if (copy_from_user(&tmp, user_parms, sizeof (tmp)))
+		rc = -EFAULT;
+
+	if (!rc) {
+		/* validate its meant for us */
+		if (tmp.feature_id != plugin_header.id) {
+			rc = -EINVAL;
+		}
+	}
+
+	if (!rc) {
+		/* copy feature ioctl data to kernel space */
+		if (copy_from_user(&pi_data, tmp.feature_ioctl_data,
+				   sizeof (pi_data))) {
+			rc = -EFAULT;
+		}
+	}
+
+	if (!rc) {
+		/* find the disk node specified by the disk_handle */
+		int done = FALSE;
+		disk = NULL;
+		while (!done) {
+			rc = evms_cs_find_next_device(disk,
+						      &disk);
+			if (rc) {
+				break;
+			}
+			if (!disk) {
+				rc = -ENODATA;
+				break;
+			}
+			if (disk ==
+			    DEV_HANDLE_TO_NODE(pi_data.disk_handle)) {
+				done = TRUE;
+			}
+		}
+	}
+
+	if (!rc) {
+		/* perform feature command */
+		ldev_prv = (struct ldev_private *) disk->private;
+		switch (tmp.feature_command) {
+			kdev_t save_dev;
+		case LDEV_MGR_BROADCAST_IOCTL_CMD:
+			save_dev = inode->i_rdev;
+			inode->i_rdev =
+			    MKDEV(ldev_prv->major, ldev_prv->minor);
+			rc = ldev_prv->bdev->bd_op->ioctl(inode, file,
+							  pi_data.cmd,
+							  pi_data.arg);
+			inode->i_rdev = save_dev;
+			break;
+		default:
+			rc = -EINVAL;
+			break;
+		}
+	}
+
+	/* return status value */
+	tmp.status = rc;
+	copy_to_user((struct evms_plugin_ioctl_pkt *) arg, &tmp, sizeof (tmp));
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/********************************************************/
+/* Required Plugin Function Table Entry Point:          */
+/*      IOCTL function & Support routines               */
+/********************************************************/
+
+static int
+ldev_mgr_ioctl(struct evms_logical_node *disk,
+	       struct inode *inode,
+	       struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = 0;
+	struct ldev_private *ldev_prv = disk->private;
+	kdev_t save_dev;
+	struct block_device *save_bdev;
+
+	if (!inode || !disk)
+		return -EINVAL;
+
+	save_dev = inode->i_rdev;
+	inode->i_rdev = MKDEV(ldev_prv->major, ldev_prv->minor);
+	save_bdev = inode->i_bdev;
+	inode->i_bdev = ldev_prv->bdev;
+	/* check device availability */
+	if (!blk_get_queue(MKDEV(ldev_prv->major, ldev_prv->minor))) {
+		disk->flags |= EVMS_DEVICE_UNAVAILABLE;
+	}
+	switch (cmd) {
+	case EVMS_QUIESCE_VOLUME:
+	case EVMS_PLUGIN_IOCTL:
+		break;
+	case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt *bmap =
+			    (struct evms_get_bmap_pkt *) arg;
+			bmap->dev = MKDEV(ldev_prv->major, ldev_prv->minor);
+			bmap->status = 0;
+		}
+		break;
+	case EVMS_OPEN_VOLUME:
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			rc = -ENXIO;
+		} else {
+			rc = ldev_prv->bdev->bd_op->open(inode, file);
+		}
+		break;
+	case EVMS_CLOSE_VOLUME:
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			rc = -ENXIO;
+		} else {
+			rc = ldev_prv->bdev->bd_op->release(inode, file);
+		}
+		break;
+	case EVMS_CHECK_MEDIA_CHANGE:
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			rc = -ENXIO;
+		} else {
+			/* once we detect that media changed
+			 * is 'set', don't send any more ioctls
+			 * down to the device, until the
+			 * media change has been 'reset' by a
+			 * revalidate disk ioctl. when already
+			 * 'set', just return a 1 w/o actually
+			 * performing another ioctl call to the
+			 * device.
+			 */
+			if (ldev_prv->media_changed == TRUE) {
+				rc = 1;
+				break;
+			}
+			rc = ldev_prv->bdev->bd_op->
+			    check_media_change(MKDEV
+					       (ldev_prv->major,
+						ldev_prv->minor));
+			if (rc == 1) {
+				ldev_prv->media_changed = TRUE;
+				disk->flags |= EVMS_MEDIA_CHANGED;
+			}
+		}
+		break;
+	case EVMS_REVALIDATE_DISK:
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			rc = -ENXIO;
+		} else {
+			/* don't actually send this ioctl down
+			 * to the device, until we know that
+			 * previous check media change ioctl
+			 * has occurred.
+			 *
+			 * when we do actually send the ioctl
+			 * down, reset the local media_changed
+			 * flag.
+			 */
+			if (ldev_prv->media_changed == FALSE)
+				break;
+			rc = ldev_prv->bdev->bd_op->
+			    revalidate(MKDEV
+				       (ldev_prv->major, ldev_prv->minor));
+			ldev_prv->media_changed = FALSE;
+		}
+		break;
+	case EVMS_GET_DISK_LIST:
+		rc = evms_cs_add_item_to_list((struct evms_list_node **) arg,
+					      disk);
+		if (rc > 0)
+			rc = 0;
+		break;
+	case EVMS_CHECK_DEVICE_STATUS:
+		if (arg) {
+			int *status = (int *) arg;
+			*status |= disk->flags;
+		}
+		break;
+	case EVMS_UPDATE_DEVICE_INFO:
+		/* determine hardsector size */
+		disk->hardsector_size = 512;
+		if (hardsect_size[ldev_prv->major]) {
+			disk->hardsector_size = hardsect_size[ldev_prv->major][ldev_prv->minor];
+		}
+		/* save the block size */
+		disk->block_size = 1024;
+		if (blksize_size[ldev_prv->major]) {
+			disk->block_size = blksize_size[ldev_prv->major][ldev_prv->minor];
+		}
+		/* device size in sectors
+		 *
+		 * try 64bit size first, if that fails
+		 * fall back on the 32bit size.
+		 */
+		/* try 64bit size */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,18)
+		rc = evms_cs_kernel_ioctl(disk, BLKGETSIZE64,
+					  (ulong) & disk->total_vsectors);
+		if (!rc) {
+			/* convert bytes to 512 byte sectors */
+			disk->total_vsectors >>= EVMS_VSECTOR_SIZE_SHIFT;
+		} else 
+#endif
+			{
+			/* try 32bit size */
+			ulong dev_size = 0;
+			rc = evms_cs_kernel_ioctl(disk, BLKGETSIZE,
+						  (ulong) & dev_size);
+			disk->total_vsectors = dev_size;
+		}
+		break;
+	default:
+		if (disk->flags & EVMS_DEVICE_UNAVAILABLE) {
+			rc = -ENXIO;
+		} else {
+			rc = ldev_prv->bdev->bd_op->ioctl(inode, file, cmd,
+							  arg);
+		}
+		break;
+	}
+	inode->i_bdev = save_bdev;
+	inode->i_rdev = save_dev;
+
+	return (rc);
+}
+
+/********************************************************/
+/* Required Module Entry Point:                         */
+/*      ldev_mgr_init                                   */
+/********************************************************/
+
+static int __init
+ldev_mgr_init(void)
+{
+	return evms_cs_register_plugin(&plugin_header);
+}
+
+static void __exit
+ldev_mgr_exit(void)
+{
+	evms_cs_unregister_plugin(&plugin_header);
+}
+
+module_init(ldev_mgr_init);
+module_exit(ldev_mgr_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/lvm_vge.c evms-2002-09-30/drivers/evms/lvm_vge.c
--- linux-2002-09-30/drivers/evms/lvm_vge.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/lvm_vge.c	Fri Sep 13 16:45:06 2002
@@ -0,0 +1,3734 @@
+/* -*- linux-c -*- */
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * linux/drivers/evms/lvm_vge.c
+ *
+ * EVMS Linux LVM Region Manager
+ */
+
+#define LOG_PREFIX "lvm: "
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/version.h>
+#include <asm/uaccess.h>
+
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_lvm.h>
+
+/* Plugin API prototypes. */
+static int lvm_discover(struct evms_logical_node ** evms_node_list);
+static int lvm_discover_end(struct evms_logical_node ** evms_node_list);
+static int lvm_delete_node(struct evms_logical_node * logical_node);
+static void lvm_read(struct evms_logical_node * node, struct buffer_head * bh);
+static void lvm_write(struct evms_logical_node * node, struct buffer_head * bh);
+static int lvm_init_io(struct evms_logical_node * node,
+		       int io_flag,
+		       u64 sect_nr,
+		       u64 num_sects,
+		       void * buf_addr);
+static int lvm_ioctl(struct evms_logical_node * logical_node,
+		     struct inode * inode,
+		     struct file * file,
+		     unsigned int cmd,
+		     unsigned long arg);
+static int lvm_direct_ioctl(struct inode * inode,
+			    struct file * file,
+			    unsigned int cmd,
+			    unsigned long args);
+
+static struct snapshot_map_entry * allocate_snapshot_map_entry(u64 org_sector,
+							       u64 snap_sector);
+
+/* LVM Plugin function table and header. */
+static struct evms_plugin_fops lvm_fops = {
+	.discover	= lvm_discover,
+	.end_discover	= lvm_discover_end,
+	.delete		= lvm_delete_node,
+	.read		= lvm_read,
+	.write		= lvm_write,
+	.init_io	= lvm_init_io,
+	.ioctl		= lvm_ioctl,
+	.direct_ioctl	= lvm_direct_ioctl
+};
+
+static struct evms_plugin_header lvm_plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_REGION_MANAGER,
+			  0x01),
+	.version = {
+		.major		= EVMS_LVM_VERSION_MAJOR,
+		.minor		= EVMS_LVM_VERSION_MINOR,
+		.patchlevel	= EVMS_LVM_VERSION_PATCH
+	},
+	.required_services_version = {
+		.major		= 0,
+		.minor		= 5,
+		.patchlevel	= 0
+	},
+	.fops = &lvm_fops
+};
+
+static struct lvm_volume_group * lvm_group_list = NULL;
+static struct proc_dir_entry * lvm_proc = NULL;
+
+
+/********** Miscellaneous Functions **********/
+
+
+/**
+ * remap sector 
+ * @node:
+ * @org_sector:		Logical sector to remap.
+ * @size:		Size (in sectors) or request to remap.
+ * @new_sector:		Remapped sector.
+ * @new_size:		New size (in sectors).
+ * @pe_start_sector:	Starting sector of PE - needed for snapshotting.
+ * @pv_entry:		New node for which new_sector is relative.
+ *
+ * Common function to remap LV lba to PV lba in appropriate PE. This
+ * function needs to deal with requests that span PEs and/or stripes. If
+ * this occurs, the request will simply be chopped off at the boundary of
+ * the first PE/stripe. It is up to the calling function to loop
+ * accordingly to finish the full remapping. This function is now partially
+ * 64-bit enabled. The striping section contains code that currently cannot
+ * eliminate at least one mod operation on 64 bit values.
+ **/
+static int remap_sector(struct evms_logical_node * node,
+			u64 org_sector,
+			u64 size,
+			u64 * new_sector,
+			u64 * new_size,
+			u64 * pe_start_sector,
+			struct lvm_physical_volume ** pv_entry)
+{
+	struct lvm_logical_volume * volume = node->private;
+	struct le_table_entry * le_entry;
+	u32 le, offset_in_le;
+
+	*new_size = size;
+
+	if ( volume->stripes > 1 ) {
+		/* Volume is striped. Reset the size if the request crosses
+		 * a stripe boundary. Striping in LVM is not 64-bit enabled.
+		 */
+		u32 column, columns, sectors_per_column;
+		u32 sector_in_column, stripe_in_column, le_in_column;
+		u32 offset_in_stripe, stripe_in_le;
+		u32 org_sector32 = org_sector;
+
+		sectors_per_column = volume->stripes * volume->pe_size;
+		column = org_sector32 / sectors_per_column;
+		sector_in_column = org_sector32 % sectors_per_column;
+		stripe_in_column = sector_in_column / volume->stripe_size;
+		le_in_column = stripe_in_column % volume->stripes;
+		columns = volume->num_le / volume->stripes;
+		le = column + (columns * le_in_column);
+
+		offset_in_stripe = org_sector32 % volume->stripe_size;
+		stripe_in_le = stripe_in_column / volume->stripes;
+		offset_in_le = offset_in_stripe +
+			       stripe_in_le * volume->stripe_size;
+
+		if ( offset_in_stripe + size > volume->stripe_size ) {
+			*new_size = volume->stripe_size - offset_in_stripe;
+		}
+	} else {
+		/* Linear volume. Just find LE and offset. Reset the size if
+		 * the request crosses an LE boundary. This path is 64-bit safe.
+		 */
+		le = org_sector >> volume->pe_size_shift;
+		offset_in_le = org_sector & (volume->pe_size - 1);
+
+		if ( offset_in_le + size > volume->pe_size ) {
+			*new_size = volume->pe_size - offset_in_le;
+		}
+	}
+
+	le_entry = &volume->le_map[le];
+	*pe_start_sector = le_entry->pe_sector_offset;
+	*new_sector = le_entry->pe_sector_offset + offset_in_le;
+	*pv_entry = le_entry->owning_pv;
+
+	return 0;
+}
+
+/**
+ * add_group_to_list
+ *
+ * Add a volume group to the end of the LVM global group list.
+ **/
+static int add_group_to_list(struct lvm_volume_group * group)
+{
+	struct lvm_volume_group ** p_group;
+
+	for ( p_group = &lvm_group_list;
+	      *p_group; p_group = &(*p_group)->next_group ) {
+		;
+	}
+
+	*p_group = group;
+	group->next_group = NULL;
+	return 0;
+}
+
+/**
+ * remove_group_from_list
+ *
+ * Remove an LVM volume group from the global LVM list.
+ **/
+static int remove_group_from_list(struct lvm_volume_group * group)
+{
+	struct lvm_volume_group ** p_group;
+
+	for ( p_group = &lvm_group_list;
+	      *p_group; p_group = &(*p_group)->next_group ) {
+		if ( *p_group == group ) {
+			*p_group = (*p_group)->next_group;
+			group->next_group = NULL;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * find_group_by_uuid
+ *
+ * Use the vg_uuid to find the desired volume group.
+ **/
+static int find_group_by_uuid(u8 * vg_uuid,
+			      struct lvm_volume_group ** group)
+{
+	struct lvm_volume_group * gp;
+
+	for ( gp = lvm_group_list; gp; gp = gp->next_group ) {
+		if ( ! memcmp(vg_uuid, gp->vg_uuid, UUID_LEN) ) {
+			*group = gp;
+			return 0;
+		}
+	}
+	*group = NULL;
+	return -EINVAL;
+}
+
+/**
+ * find_pv_by_number
+ *
+ * Search the PV list of the specified volume group, looking for the
+ * specified PV number. If found, return a pointer to that PV.
+ **/
+static struct lvm_physical_volume *
+find_pv_by_number(u32 pv_number,
+		  struct lvm_volume_group * group)
+{
+	struct lvm_physical_volume * pv_entry;
+
+	for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) {
+		if ( pv_entry->pv_number == pv_number ) {
+			return pv_entry;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * translate_lv_name
+ * @lvm_lv_name:	Input LVM-style name.
+ * @evms_node_name:	Output EVMS-style name.
+ *
+ * In LVM, volumes have names based on their dev-node, which follow the
+ * pattern /dev/group_name/volume_name. In EVMS, the same volume needs
+ * to appear as /dev/evms/lvm/group_name/volume_name. Thus, the name from
+ * the lv_disk_t needs to be translated before copying to the associated
+ * node. evms_node_name must point to a NAME_LEN sized buffer.
+ **/
+static int translate_lv_name(char * lvm_lv_name, char * evms_node_name)
+{
+	char * ptr;
+
+	memset(evms_node_name, 0, NAME_LEN);
+
+	/* Make sure the string starts with /dev/, and skip over it. */
+	ptr = strstr(lvm_lv_name, DEV_DIRECTORY);
+	if ( ptr != lvm_lv_name ) {
+		LOG_SERIOUS("Invalid LV name: %s\n", lvm_lv_name);
+		return -EINVAL;
+	}
+	ptr = &ptr[strlen(DEV_DIRECTORY)];
+
+	/* ptr now points to "group_name/volume_name".
+	 * Use this to create the name for the EVMS node.
+	 */
+	strcpy(evms_node_name, LVM_DEV_DIRECTORY);
+	strncat(evms_node_name, ptr, NAME_LEN - strlen(evms_node_name) - 1);
+
+	return 0;
+}
+
+/**
+ * check_pv_for_lv
+ *
+ * Run through all LE maps of all LVs in this group, and make sure the
+ * specified PV is not being pointed to by any LEs.
+ **/
+static int check_pv_for_lv(struct lvm_physical_volume * pv_entry,
+			   struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume * volume;
+	int i, j;
+
+	for ( i = 1; i <= MAX_LV; i++ ) {
+		if ( (volume = group->volume_list[i]) ) {
+			for ( j = 0; j < volume->num_le; j++ ) {
+				if ( volume->le_map[j].owning_pv == pv_entry ) {
+					return -EINVAL;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+
+/********** Metadata I/O Functions **********/
+
+
+/**
+ * endian_convert_pv
+ *
+ * Endian-neutral conversion for PV structures.
+ **/
+static inline void endian_convert_pv(struct pv_disk * pv)
+{
+	pv->version		= le16_to_cpup(&pv->version);
+	pv->pv_on_disk.base	= le32_to_cpup(&pv->pv_on_disk.base);
+	pv->pv_on_disk.size	= le32_to_cpup(&pv->pv_on_disk.size);
+	pv->vg_on_disk.base	= le32_to_cpup(&pv->vg_on_disk.base);
+	pv->vg_on_disk.size	= le32_to_cpup(&pv->vg_on_disk.size);
+	pv->pv_uuidlist_on_disk.base =
+		le32_to_cpup(&pv->pv_uuidlist_on_disk.base);
+	pv->pv_uuidlist_on_disk.size =
+		le32_to_cpup(&pv->pv_uuidlist_on_disk.size);
+	pv->lv_on_disk.base	= le32_to_cpup(&pv->lv_on_disk.base);
+	pv->lv_on_disk.size	= le32_to_cpup(&pv->lv_on_disk.size);
+	pv->pe_on_disk.base	= le32_to_cpup(&pv->pe_on_disk.base);
+	pv->pe_on_disk.size	= le32_to_cpup(&pv->pe_on_disk.size);
+	pv->pv_major		= le32_to_cpup(&pv->pv_major);
+	pv->pv_number		= le32_to_cpup(&pv->pv_number);
+	pv->pv_status		= le32_to_cpup(&pv->pv_status);
+	pv->pv_allocatable	= le32_to_cpup(&pv->pv_allocatable);
+	pv->pv_size		= le32_to_cpup(&pv->pv_size);
+	pv->lv_cur		= le32_to_cpup(&pv->lv_cur);
+	pv->pe_size		= le32_to_cpup(&pv->pe_size);
+	pv->pe_total		= le32_to_cpup(&pv->pe_total);
+	pv->pe_allocated	= le32_to_cpup(&pv->pe_allocated);
+	pv->pe_start		= le32_to_cpup(&pv->pe_start);
+}
+
+/**
+ * read_pv
+ *
+ * Read in the PV structure from the specified node. If it contains a
+ * valid PV signature, allocate a new struct pv_disk and copy the data.
+ **/
+static int read_pv(struct evms_logical_node * node, struct pv_disk ** pv)
+{
+	struct pv_disk * pv_buffer;
+	int rc = -ENOMEM;
+
+	*pv = NULL;
+
+	/* Buffer for reading the PV metadata. */
+	pv_buffer = kmalloc(LVM_PV_DISK_SIZE, GFP_NOIO);
+	if (!pv_buffer) {
+		LOG_CRITICAL("Error allocating PV metadata buffer for %s\n",
+			     node->name);
+		goto out;
+	}
+
+	/* Read the first two sectors. */
+	rc = INIT_IO(node, 0, evms_cs_size_in_vsectors(LVM_PV_DISK_BASE),
+		     evms_cs_size_in_vsectors(LVM_PV_DISK_SIZE), pv_buffer);
+	if (rc) {
+		LOG_SERIOUS("Error reading PV metadata from %s\n", node->name);
+		goto out_kfree;
+	}
+
+	/* Endian-neutral conversion of PV metadata. */
+	endian_convert_pv(pv_buffer);
+
+	/* Check for an LVM signature and make sure the sizes match.
+	 * Versions 1 and 2 are both valid now. Thanks LVM! :)
+	 */
+	if ( !(pv_buffer->id[0] == 'H' &&
+	       pv_buffer->id[1] == 'M' &&
+	       (pv_buffer->version == 1 || pv_buffer->version == 2) &&
+	       pv_buffer->pv_size == node->total_vsectors) ) {
+		LOG_EXTRA("%s is not an LVM PV\n", node->name);
+		rc = -EINVAL;
+		goto out_kfree;
+	}
+
+	/* This is a valid PV. Allocate a new pv_disk. */
+	*pv = kmalloc(sizeof(struct pv_disk), GFP_NOIO);
+	if (!*pv) {
+		LOG_CRITICAL("Error allocating new PV for %s\n", node->name);
+		rc = -ENOMEM;
+		goto out_kfree;
+	}
+
+	/* Copy the metadata. */
+	memcpy(*pv, pv_buffer, sizeof(struct pv_disk));
+
+out_kfree:
+	kfree(pv_buffer);
+out:
+	return rc;
+}
+
+/**
+ * endian_convert_vg
+ *
+ * Endian-neutral conversion for VG structures
+ **/
+static inline void endian_convert_vg(struct vg_disk * vg)
+{
+	vg->vg_number	= le32_to_cpup(&vg->vg_number);
+	vg->vg_access	= le32_to_cpup(&vg->vg_access);
+	vg->vg_status	= le32_to_cpup(&vg->vg_status);
+	vg->lv_max	= le32_to_cpup(&vg->lv_max);
+	vg->lv_cur	= le32_to_cpup(&vg->lv_cur);
+	vg->lv_open	= le32_to_cpup(&vg->lv_open);
+	vg->pv_max	= le32_to_cpup(&vg->pv_max);
+	vg->pv_cur	= le32_to_cpup(&vg->pv_cur);
+	vg->pv_act	= le32_to_cpup(&vg->pv_act);
+	vg->dummy	= le32_to_cpup(&vg->dummy);
+	vg->vgda	= le32_to_cpup(&vg->vgda);
+	vg->pe_size	= le32_to_cpup(&vg->pe_size);
+	vg->pe_total	= le32_to_cpup(&vg->pe_total);
+	vg->pe_allocated = le32_to_cpup(&vg->pe_allocated);
+	vg->pvg_total	= le32_to_cpup(&vg->pvg_total);
+}
+
+/**
+ * read_vg
+ *
+ * Read in the VG structure from the specified node. Allocate a new
+ * struct vg_disk and copy the data.
+ **/
+static int read_vg(struct evms_logical_node * node,
+		   struct pv_disk * pv,
+		   struct vg_disk ** vg)
+{
+	struct vg_disk * vg_buffer;
+	unsigned long vg_sectors;
+	int rc = -ENOMEM;
+
+	/* Allocate a buffer to read the VG metadata. */
+	vg_sectors = evms_cs_size_in_vsectors(pv->vg_on_disk.size);
+	vg_buffer = kmalloc(vg_sectors << EVMS_VSECTOR_SIZE_SHIFT, GFP_NOIO);
+	if (!vg_buffer) {
+		LOG_CRITICAL("Error allocating VG metadata buffer for %s\n",
+			     node->name);
+		goto out;
+	}
+
+	/* Read the VG metadata. */
+	rc = INIT_IO(node, 0, evms_cs_size_in_vsectors(pv->vg_on_disk.base),
+		     vg_sectors, vg_buffer);
+	if (rc) {
+		LOG_SERIOUS("Error reading VG metadata from %s\n", node->name);
+		goto out_kfree;
+	}
+
+	/* Endian-neutral conversion of VG metadata. */
+	endian_convert_vg(vg_buffer);
+
+	/* Allocate a new struct vg_disk. */
+	*vg = kmalloc(sizeof(struct vg_disk), GFP_NOIO);
+	if (!*vg) {
+		LOG_CRITICAL("Error allocating new VG for %s\n", node->name);
+		rc = -ENOMEM;
+		goto out_kfree;
+	}
+
+	/* Copy the metadata. */
+	memcpy(*vg, vg_buffer, sizeof(struct vg_disk));
+
+out_kfree:
+	kfree(vg_buffer);
+out:
+	return rc;
+}
+
+/**
+ * read_uuid_list
+ **/
+static int read_uuid_list(struct evms_logical_node * node,
+			  struct pv_disk * pv,
+			  struct lvm_volume_group * group)
+{
+	u64 start_sector;
+	unsigned long total_sectors;
+	unsigned char * uuid_buffer;
+	unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE;
+	unsigned long uuid_list_size;
+	int i, rc = 0;
+
+	if (group->uuid_list) {
+		LOG_EXTRA("Already read PV UUIDs for group %s\n",
+			  group->vg_name);
+		goto out;
+	}
+
+	start_sector = evms_cs_size_in_vsectors(pv->pv_uuidlist_on_disk.base);
+	total_sectors = evms_cs_size_in_vsectors(pv->pv_uuidlist_on_disk.size);
+	uuid_list_size = round_up(total_sectors * EVMS_VSECTOR_SIZE,
+				  buffer_size);
+
+	/* Allocate a buffer to perform the I/Os. */
+	uuid_buffer = kmalloc(buffer_size, GFP_NOIO);
+	if (!uuid_buffer) {
+		LOG_CRITICAL("Error allocating buffer for UUID list in group %s\n",
+			     group->vg_name);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/* Allocate memory for the UUID array for this group. */
+	group->uuid_list = vmalloc(uuid_list_size);
+	if (!group->uuid_list) {
+		LOG_CRITICAL("Error allocating UUID list for group %s\n",
+			     group->vg_name);
+		rc = -ENOMEM;
+		goto out_kfree;
+	}
+	memset(group->uuid_list, 0, uuid_list_size);
+
+	for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) {
+		rc = INIT_IO(node, 0, start_sector + i,
+			     IO_BUFFER_SECTORS, uuid_buffer);
+		if (rc) {
+			LOG_SERIOUS("Error reading PV UUID list from %s\n",
+				    node->name);
+			goto out_vfree;
+		}
+		/* Copy the I/O buffer into the UUID array. */
+		memcpy(&(group->uuid_list[i * EVMS_VSECTOR_SIZE]),
+		       uuid_buffer, buffer_size);
+	}
+
+	/* Clear out the unused portion at the end of the uuid_list. */
+	memset(&(group->uuid_list[pv->pv_uuidlist_on_disk.size]), 0,
+	       uuid_list_size - pv->pv_uuidlist_on_disk.size);
+
+out_kfree:
+	kfree(uuid_buffer);
+out:
+	return rc;
+
+out_vfree:
+	vfree(group->uuid_list);
+	group->uuid_list = NULL;
+	goto out_kfree;
+}
+
+/**
+ * endian_convert_lv
+ *
+ * Endian-neutral conversion for LV structures
+ **/
+static inline void endian_convert_lv(struct lv_disk * lv)
+{
+	lv->lv_access		= le32_to_cpup(&lv->lv_access);
+	lv->lv_status		= le32_to_cpup(&lv->lv_status);
+	lv->lv_open		= le32_to_cpup(&lv->lv_open);
+	lv->lv_dev		= le32_to_cpup(&lv->lv_dev);
+	lv->lv_number		= le32_to_cpup(&lv->lv_number);
+	lv->lv_mirror_copies	= le32_to_cpup(&lv->lv_mirror_copies);
+	lv->lv_recovery		= le32_to_cpup(&lv->lv_recovery);
+	lv->lv_schedule		= le32_to_cpup(&lv->lv_schedule);
+	lv->lv_size		= le32_to_cpup(&lv->lv_size);
+	lv->lv_snapshot_minor	= le32_to_cpup(&lv->lv_snapshot_minor);
+	lv->lv_chunk_size	= le16_to_cpup(&lv->lv_chunk_size);
+	lv->dummy		= le16_to_cpup(&lv->dummy);
+	lv->lv_allocated_le	= le32_to_cpup(&lv->lv_allocated_le);
+	lv->lv_stripes		= le32_to_cpup(&lv->lv_stripes);
+	lv->lv_stripesize	= le32_to_cpup(&lv->lv_stripesize);
+	lv->lv_badblock		= le32_to_cpup(&lv->lv_badblock);
+	lv->lv_allocation	= le32_to_cpup(&lv->lv_allocation);
+	lv->lv_io_timeout	= le32_to_cpup(&lv->lv_io_timeout);
+	lv->lv_read_ahead	= le32_to_cpup(&lv->lv_read_ahead);
+}
+
+static inline void endian_convert_lvs(struct lvm_volume_group * group)
+{
+	int i;
+	for ( i = 0; i < group->vg->lv_max; i++ ) {
+		endian_convert_lv(&(group->lv_array[i]));
+	}
+}
+
+/**
+ * read_lv
+ *
+ * Read in the LV structures for the specified group. Do the read from
+ * the first PV in the group. If that one fails, keep trying on the
+ * remaining PVs until one works. This function will allocate a buffer
+ * for the group to read in the structures.
+ **/
+static int read_lv(struct lvm_volume_group * group)
+{
+	struct lvm_physical_volume * pv_entry = group->pv_list;
+	unsigned char * lv_buffer = NULL;
+	u64 start_sector;
+	unsigned long total_sectors, lv_array_size = 0;
+	unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE;
+	int i, rc = 1;
+
+	if (group->lv_array) {
+		return 0;
+	}
+
+	if (!pv_entry) {
+		LOG_ERROR("Group %s has no PVs. Cannot read LV structures.\n",
+			  group->vg_name);
+		return -EINVAL;
+	}
+
+	/* Allocate a buffer to do the actual I/Os. */
+	lv_buffer = kmalloc(buffer_size, GFP_NOIO);
+	if (!lv_buffer) {
+		LOG_CRITICAL("Error allocating buffer for LV structs for Group %s\n",
+			     group->vg_name);
+		return -ENOMEM;
+	}
+
+	/* Read in the LV structures 4k at a time. If one PV returns errors,
+	 * start over with the next PV in the group.
+	 */
+	while (rc && pv_entry) {
+		start_sector = evms_cs_size_in_vsectors(pv_entry->pv->lv_on_disk.base);
+		total_sectors = evms_cs_size_in_vsectors(pv_entry->pv->lv_on_disk.size);
+		lv_array_size = round_up(total_sectors * EVMS_VSECTOR_SIZE,
+					 buffer_size);
+
+		/* Allocate the buffer for this group to
+		 * hold the entire LV array.
+		 */
+		if (group->lv_array) {
+			vfree(group->lv_array);
+			group->lv_array = NULL;
+		}
+		group->lv_array = vmalloc(lv_array_size);
+		if (!group->lv_array) {
+			LOG_CRITICAL("Error allocating lv_array buffer for Group %s\n",
+				     group->vg_name);
+			rc = -ENOMEM;
+			goto out_kfree;
+		}
+		memset(group->lv_array, 0, lv_array_size);
+
+		for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) {
+			rc = INIT_IO(pv_entry->logical_node, 0,
+				     start_sector + i, IO_BUFFER_SECTORS,
+				     lv_buffer);
+			if (rc) {
+				LOG_SERIOUS("Error reading LV metadata from %s in Group %s\n",
+					    pv_entry->logical_node->name,
+					    group->vg_name);
+
+				/* Try the next PV if the current one
+				 * caused any errors.
+				 */
+				pv_entry = pv_entry->next;
+				break;
+			}
+			/* Copy the I/O buffer into the lv_array. */
+			memcpy(&(((char *)(group->lv_array))[i * EVMS_VSECTOR_SIZE]),
+			       lv_buffer, buffer_size);
+		}
+	}
+
+	if (rc) {
+		LOG_SERIOUS("Unable to read LV metadata from any PV in Group %s\n",
+			    group->vg_name);
+		goto out_vfree;
+	}
+
+	/* Clear out the unused portion at the end of the lv_array. */
+	memset(&(((char *)(group->lv_array))[pv_entry->pv->lv_on_disk.size]),
+	       0, lv_array_size - pv_entry->pv->lv_on_disk.size);
+
+	/* Endian-neutral conversion of the LV metadata. */
+	endian_convert_lvs(group);
+
+out_kfree:
+	kfree(lv_buffer);
+	return rc;
+
+out_vfree:
+	vfree(group->lv_array);
+	group->lv_array = NULL;
+	goto out_kfree;
+}
+
+/**
+ * endian_convert_pe_map
+ *
+ * Endian-neutral conversion for PE structures
+ **/
+static inline void endian_convert_pe_map(struct lvm_physical_volume * pv_entry)
+{
+	int i;
+	for ( i = 0; i < pv_entry->pv->pe_total; i++ ) {
+		pv_entry->pe_map[i].lv_num =
+			le16_to_cpup(&pv_entry->pe_map[i].lv_num);
+		pv_entry->pe_map[i].le_num =
+			le16_to_cpup(&pv_entry->pe_map[i].le_num);
+	}
+}
+
+/**
+ * read_pe_map
+ *
+ * Read in the PE map for the specified PV. This function will allocate a
+ * buffer to read in the data.
+ **/
+static int read_pe_map(struct lvm_physical_volume * pv_entry)
+{
+	struct evms_logical_node * node = pv_entry->logical_node;
+	struct pv_disk * pv = pv_entry->pv;
+	unsigned char * pe_buffer;
+	u64 start_sector;
+	unsigned long total_sectors, pe_map_size;
+	unsigned long buffer_size = IO_BUFFER_SECTORS * EVMS_VSECTOR_SIZE;
+	int i, rc = -ENOMEM;
+
+	if (pv_entry->pe_map) {
+		return 0;
+	}
+
+	start_sector = evms_cs_size_in_vsectors(pv->pe_on_disk.base);
+	total_sectors = evms_cs_size_in_vsectors(pv->pe_total *
+						 sizeof(struct pe_disk));
+	pe_map_size = round_up(total_sectors * EVMS_VSECTOR_SIZE, buffer_size);
+
+	/* Allocate a buffer for performing the I/O. */
+	pe_buffer = kmalloc(buffer_size, GFP_NOIO);
+	if (!pe_buffer) {
+		LOG_CRITICAL("Error allocating buffer for PE maps for %s\n",
+			     node->name);
+		goto out;
+	}
+
+	/* Allocate a buffer to hold the PE map for this PV. */
+	pv_entry->pe_map = vmalloc(pe_map_size);
+	if (!pv_entry->pe_map) {
+		LOG_CRITICAL("Error allocating PE map for %s\n", node->name);
+		goto out_kfree;
+	}
+	memset(pv_entry->pe_map, 0, pe_map_size);
+
+	for ( i = 0; i < total_sectors; i += IO_BUFFER_SECTORS ) {
+		rc = INIT_IO(node, 0, start_sector + i,
+			     IO_BUFFER_SECTORS, pe_buffer);
+		if (rc) {
+			LOG_SERIOUS("Error reading PE maps from %s.\n",
+				    node->name);
+			goto out_vfree;
+		}
+		/* Copy the data to the actual PE map. */
+		memcpy(&(((char *)(pv_entry->pe_map))[i * EVMS_VSECTOR_SIZE]),
+		       pe_buffer, buffer_size);
+	}
+
+	/* Clear out the unused portion at the end of the PE map. */
+	memset(&(((char *)(pv_entry->pe_map))[total_sectors * EVMS_VSECTOR_SIZE]),
+	       0, pe_map_size - total_sectors * EVMS_VSECTOR_SIZE);
+
+	/* Endian-neutral conversion of the PE metadata. */
+	endian_convert_pe_map(pv_entry);
+
+out_kfree:
+	kfree(pe_buffer);
+out:
+	return rc;
+
+out_vfree:
+	vfree(pv_entry->pe_map);
+	pv_entry->pe_map = NULL;
+	goto out_kfree;
+}
+
+
+/********** Snapshot Manipulation Functions **********/
+
+
+/**
+ * snapshot_check_quiesce_original
+ *
+ * For this snapshot LV, check that both it and its original are quiesced.
+ **/
+static int
+snapshot_check_quiesce_original(struct lvm_logical_volume * snap_volume)
+{
+	struct lvm_logical_volume * org_volume = snap_volume->snapshot_org;
+
+	if ( ! (snap_volume->lv_access & EVMS_LV_QUIESCED) ) {
+		return -EINVAL;
+	}
+
+	if ( org_volume && !(org_volume->lv_access & EVMS_LV_QUIESCED) ) {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * snapshot_check_quiesce_all
+ *
+ * Go through the list of all snapshots for an original volume, and make
+ * sure everyone is in a quiesced state.
+ **/
+static int snapshot_check_quiesce_all(struct lvm_logical_volume * org_volume)
+{
+	struct lvm_logical_volume * snap;
+
+	if ( ! (org_volume->lv_access & EVMS_LV_QUIESCED) ) {
+		return -EINVAL;
+	}
+
+	for ( snap = org_volume->snapshot_next;
+	      snap; snap = snap->snapshot_next ) {
+		if ( ! (snap->lv_access & EVMS_LV_QUIESCED) ) {
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * invalidate_snapshot_volume
+ *
+ * In the event a snapshot volume becomes full or corrupted, its metadata
+ * must be altered in order to prevent it from being used again. Write some
+ * invalid data into the first entry of the COW table. If this volume is
+ * not fully deleted by the user/engine, this invalid COW entry will be
+ * detected by build_snapshot_maps(), and will cause the volume to be
+ * deleted before being exported to EVMS during discover. This is obviously
+ * a hack, but it is the same hack currently used by LVM. We're just trying
+ * to be compatible. :)
+ **/
+static int invalidate_snapshot_volume(struct lvm_logical_volume * snap_volume)
+{
+	struct evms_logical_node tmp_node;
+
+	tmp_node.private = snap_volume;
+	tmp_node.total_vsectors = snap_volume->lv_size;
+
+	if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) {
+		LOG_WARNING("Volume %s is not a snapshot. Cannot invalidate\n",
+			    snap_volume->name);
+		return -EINVAL;
+	}
+
+	LOG_WARNING("Invalidating full/corrupt snapshot %s\n",
+		    snap_volume->name);
+	LOG_WARNING("Run the EVMS administration tools to remove this snapshot.\n");
+
+	if (snap_volume->cow_table) {
+		snap_volume->cow_table[0].pv_org_rsector =
+			cpu_to_le64(((u64)1));
+		if ( lvm_init_io(&tmp_node, 4, 0, 1, snap_volume->cow_table) ) {
+			LOG_SERIOUS("Unable to invalidate snapshot %s\n",
+				    snap_volume->name);
+		}
+	} else {
+		LOG_SERIOUS("Unable to invalidate snapshot %s\n",
+			    snap_volume->name);
+	}
+
+	snap_volume->lv_status &= ~LV_ACTIVE;
+	return 0;
+}
+
+/**
+ * remove_snapshot_from_chain
+ *
+ * Remove a snapshot volume from its original's chain of snapshots. This
+ * does not delete the snapshot volume. At runtime, we cannot delete
+ * volumes at the region-manager level, because EVMS may have this volume
+ * exported, and there is no way to notify EVMS of the deletion. It will
+ * eventually need to be deleted in the engine, which will then tell the
+ * EVMS kernel services to delete the volume in the kernel.
+ **/
+static int remove_snapshot_from_chain(struct lvm_logical_volume * snap_volume)
+{
+	struct lvm_logical_volume * org_volume = snap_volume->snapshot_org;
+	struct lvm_logical_volume ** p_volume;
+
+	if (org_volume) {
+		for ( p_volume = &org_volume->snapshot_next;
+		      *p_volume;
+		      p_volume = &(*p_volume)->snapshot_next ) {
+			if ( *p_volume == snap_volume ) {
+				*p_volume = snap_volume->snapshot_next;
+				break;
+			}
+		}
+	}
+
+	snap_volume->snapshot_org = NULL;
+	snap_volume->snapshot_next = NULL;
+	return 0;
+}
+
+/**
+ * snapshot_hash
+ *
+ * The snapshot hash tables are NEVER going to have 4 billion entries, so
+ * we can safely cast the org_sector to 32 bits and just mod it by the
+ * hash table size.
+ **/
+static u32 snapshot_hash(u64 org_sector,
+			 struct lvm_logical_volume * snap_volume)
+{
+	return (((u32)org_sector) % snap_volume->hash_table_size);
+}
+
+/**
+ * snapshot_search_hash_chain
+ *
+ * Search the hash chain that is anchored at the specified head pointer.
+ * If the sector number is found, the result pointer is set to that entry
+ * in the chain, and a 1 is returned. If the sector is not found, the
+ * result pointer is set to the previous entry and 0 is returned. If the
+ * result pointer is NULL, this means either the list is empty, or the
+ * specified sector should become the first list item.
+ **/
+static int snapshot_search_hash_chain(u64 org_sector,
+				      struct snapshot_map_entry * head,
+				      struct snapshot_map_entry ** result)
+{
+	struct snapshot_map_entry * curr = head;
+	struct snapshot_map_entry * prev = head;
+	while ( curr && curr->org_sector < org_sector ) {
+		prev = curr;
+		curr = curr->next;
+	}
+	if (!curr) {
+		/* Either an empty chain or went off the end of the chain. */
+		*result = prev;
+		return 0;
+	} else if ( curr->org_sector != org_sector ) {
+		*result = curr->prev;
+		return 0;
+	} else {
+		/* Found the desired sector. */
+		*result = curr;
+		return 1;
+	}
+}
+
+/**
+ * insert_snapshot_map_entry
+ *
+ * Insert a new entry into a snapshot hash chain, immediately following the
+ * specified entry. This function should not be used to add an entry into
+ * an empty list, or as the first entry in an existing list. For that case,
+ * use insert_snapshot_map_entry_at_head().
+ **/
+static int insert_snapshot_map_entry(struct snapshot_map_entry * entry,
+				     struct snapshot_map_entry * base)
+{
+	entry->next = base->next;
+	entry->prev = base;
+	base->next = entry;
+	if (entry->next) {
+		entry->next->prev = entry;
+	}
+	return 0;
+}
+
+/**
+ * insert_snapshot_map_entry_at_head
+ *
+ * Insert a new entry into a snapshot chain as the first entry.
+ **/
+static int insert_snapshot_map_entry_at_head(struct snapshot_map_entry * entry,
+					     struct snapshot_map_entry ** head)
+{
+	entry->next = *head;
+	entry->prev = NULL;
+	*head = entry;
+	if (entry->next) {
+		entry->next->prev = entry;
+	}
+	return 0;
+}
+
+/**
+ * add_cow_entry_to_snapshot_map
+ *
+ * Convert a cow table entry (from the on-disk data) into an appropriate
+ * entry for the snapshot map. Insert this new entry into the appropriate
+ * map for the specified volume.
+ *
+ * The cow_entry passed into this function must have already been
+ * endian-converted from disk-order to cpu-order.
+ **/
+static int add_cow_entry_to_snapshot_map(struct lv_COW_table_disk * cow_entry,
+					 struct lvm_logical_volume * volume)
+{
+	struct snapshot_map_entry * new_entry, * target_entry;
+	struct snapshot_map_entry ** hash_table, * chain_head;
+	u32 hash_value;
+
+	if ( cow_entry->pv_org_number == 0 ) {
+		return -EINVAL;
+	}
+
+	new_entry = allocate_snapshot_map_entry(cow_entry->pv_org_rsector,
+						cow_entry->pv_snap_rsector);
+	if (!new_entry) {
+		return -ENOMEM;
+	}
+
+	new_entry->snap_pv = find_pv_by_number(cow_entry->pv_snap_number,
+					       volume->group);
+	if (!new_entry->snap_pv) {
+		kfree(new_entry);
+		return -EINVAL;
+	}
+
+	hash_value = snapshot_hash(new_entry->org_sector, volume);
+	hash_table = volume->snapshot_map[cow_entry->pv_org_number];
+	chain_head = hash_table[hash_value];
+	if ( snapshot_search_hash_chain(new_entry->org_sector,
+					chain_head, &target_entry) ) {
+		/* In general, we should not find this entry in the snapshot
+		 * map already. However, it could happen on a re-discover, but
+		 * the build_snapshot_maps function should weed out those cases.
+		 * In either event, we can simply ignore duplicates.
+		 */
+		LOG_WARNING("Detected a duplicate snapshot map entry\n");
+		LOG_WARNING("Snap PV "PFU64":"PFU64", Org PV "PFU64":"PFU64"\n",
+			    cow_entry->pv_snap_number,
+			    cow_entry->pv_snap_rsector,
+			    cow_entry->pv_org_number,
+			    cow_entry->pv_org_rsector);
+		kfree(new_entry);
+	} else {
+		if (target_entry) {
+			insert_snapshot_map_entry(new_entry, target_entry);
+		} else {
+			insert_snapshot_map_entry_at_head(new_entry,
+							  &hash_table[hash_value]);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * snapshot_remap_sector
+ *
+ * Perform a sector remap on a snapshot volume. This should be called from
+ * the I/O read path, after the LE-to-PE translation has already been
+ * performed. First, determine the base sector of the chunk containing the
+ * specified sector, and save the remainder. Then, perform a search through
+ * the snapshot map for the specified volume. If an match is found, change
+ * the PV and sector numbers to the new values. If no match is found, leave
+ * the values alone, meaning the read should proceed down the original
+ * volume.
+ **/
+static void
+snapshot_remap_sector(struct lvm_logical_volume * snap_volume,
+		      u64 pe_start_sector,
+		      u64 * sector,
+		      struct lvm_physical_volume ** pv_entry)
+{
+	struct snapshot_map_entry ** hash_table;
+	struct snapshot_map_entry * chain_head, * result;
+	u32 hash_value;
+	u64 chunk_sector, remainder;
+
+	if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) {
+		return;
+	}
+
+	chunk_sector = ((*sector - pe_start_sector) &
+			((u64)(~(snap_volume->chunk_size - 1)))) +
+		       pe_start_sector;
+	remainder = *sector - chunk_sector;
+	hash_value = snapshot_hash(chunk_sector, snap_volume);
+	hash_table = snap_volume->snapshot_map[(*pv_entry)->pv_number];
+	chain_head = hash_table[hash_value];
+
+	if ( snapshot_search_hash_chain(chunk_sector, chain_head, &result) ) {
+		*pv_entry = result->snap_pv;
+		*sector = result->snap_sector + remainder;
+	}
+}
+
+/**
+ * snapshot_read_write_chunk
+ *
+ * This function takes care of reading one chunk of data from the
+ * original, and writing it to the snapshot. Since the original now has
+ * a fixed sized buffer for this data, we may have to loop to get the
+ * whole chunk copied.
+ **/
+static int snapshot_read_write_chunk(struct lvm_logical_volume * org_volume,
+				     struct lvm_physical_volume * org_pv,
+				     u64 chunk_sector,
+				     struct lvm_logical_volume * snap_volume,
+				     struct lvm_physical_volume ** snap_pv,
+				     u64 * snap_sector)
+{
+	u32 io_size = snap_volume->chunk_size;
+	u64 snap_pe_start_sector, size;
+	int i, iterations = 1;
+
+	if ( org_volume->chunk_size < snap_volume->chunk_size ) {
+		iterations = snap_volume->chunk_size / org_volume->chunk_size;
+		io_size = org_volume->chunk_size;
+	}
+
+	remap_sector(snap_volume->volume_node, snap_volume->next_free_chunk, 1,
+		     snap_sector, &size, &snap_pe_start_sector, snap_pv);
+
+	/* Check for an incomplete volume. */
+	if (!*snap_sector || !*snap_pv) {
+		invalidate_snapshot_volume(snap_volume);
+		return -1;
+	}
+
+	for ( i = 0; i < iterations; i++ ) {
+
+		/* Read the chunk from the original volume. This is a physical
+		 * read, not logical. Thus, stripe boundary considerations are
+		 * unnecessary. Also, chunks are always aligned with PEs, so PE
+		 * boundary considerations are unnecessary.
+		 */
+		if ( INIT_IO(org_pv->logical_node, 0,
+			     chunk_sector + i * io_size, io_size,
+			     org_volume->chunk_data_buffer) ) {
+			return 1;
+		}
+
+		/* Write this chunk to the snapshot volume. This does duplicate
+		 * the local init_io code, but we need to have the remapped
+		 * sector later on, so this is slightly more efficient. Snapshot
+		 * volumes cannot be striped, so there is no need to consider
+		 * stripe-boundary conditions. And just like the read in the
+		 * previous line, chunks are always aligned with PEs, so we
+		 * don't have to consider PE-boundary conditions.
+		 */
+		if ( INIT_IO((*snap_pv)->logical_node, 1,
+			     *snap_sector + i * io_size, io_size,
+			     org_volume->chunk_data_buffer) ) {
+			/* An error writing the chunk to the snapshot is the
+			 * same situation as the snapshot being full.
+			 */
+			invalidate_snapshot_volume(snap_volume);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * snapshot_copy_data
+ *
+ * On a write to a snapshotted volume, check all snapshots to see if the
+ * specified chunk has already been remapped. If it has not, read the
+ * original data from the volume, write the data to the next available
+ * chunk on the snapshot, update the COW table, write the COW table to
+ * the snapshot, and insert a new entry into the snapshot map.
+ *
+ * Now converted to copy data to a single snapshot. The looping is left
+ * up to lvm_write.
+ **/
+static int snapshot_copy_data(struct lvm_logical_volume * org_volume,
+			      struct lvm_logical_volume * snap_volume,
+			      u64 pe_start_sector,
+			      u64 org_sector,
+			      struct lvm_physical_volume * org_pv)
+{
+	struct lvm_physical_volume * snap_pv;
+	struct snapshot_map_entry ** hash_table, * chain_head;
+	struct snapshot_map_entry * target_entry, * new_map_entry;
+	u64 chunk_sector, snap_sector;
+	u32 hash_value;
+	int rc = 0;
+
+	/* Lock out this snapshot while we are remapping. */
+	down(&snap_volume->snap_semaphore);
+
+	/* Make sure the snapshot has not been deactivated. */
+	if ( ! (snap_volume->lv_status & LV_ACTIVE) ) {
+		goto out;
+	}
+
+	/* Search the hash table to see if this sector has already been
+	 * remapped on this snapshot.
+	 */
+	chunk_sector = ((org_sector - pe_start_sector) &
+			((u64)(~(snap_volume->chunk_size - 1)))) +
+		       pe_start_sector;
+	hash_value = snapshot_hash(chunk_sector, snap_volume);
+	hash_table = snap_volume->snapshot_map[org_pv->pv_number];
+	chain_head = hash_table[hash_value];
+
+	if ( snapshot_search_hash_chain(chunk_sector,
+					chain_head, &target_entry) ) {
+		/* Chunk is already remapped. */
+		goto out;
+	}
+
+	/* Is there room on the snapshot to remap this chunk? */
+	if ( snap_volume->next_free_chunk >= snap_volume->lv_size ) {
+		/* At this point, the snapshot is full. Any further
+		 * writes to the original will cause the snapshot to
+		 * become "corrupt" because they can't be remapped.
+		 * Take this snapshot permanently offline.
+		 */
+		goto out_invalidate;
+	}
+
+	rc = snapshot_read_write_chunk(org_volume, org_pv, chunk_sector,
+				       snap_volume, &snap_pv, &snap_sector);
+	if (rc) {
+		rc = (rc > 0) ? -EIO : 0;
+		goto out;
+	}
+
+	/* Fill in the appropriate COW table entry and write that
+	 * metadata sector back to the snapshot volume. Since we are
+	 * only writing one sector, there are no boundary conditions.
+	 * Must endian-convert each entry as it is added.
+	 */
+	snap_volume->cow_table[snap_volume->next_cow_entry].pv_org_number =
+		cpu_to_le64((u64)(org_pv->pv_number));
+	snap_volume->cow_table[snap_volume->next_cow_entry].pv_org_rsector =
+		cpu_to_le64p(&chunk_sector);
+	snap_volume->cow_table[snap_volume->next_cow_entry].pv_snap_number =
+		cpu_to_le64((u64)(snap_pv->pv_number));
+	snap_volume->cow_table[snap_volume->next_cow_entry].pv_snap_rsector =
+		cpu_to_le64p(&snap_sector);
+
+	if ( lvm_init_io(snap_volume->volume_node, 4,
+			 snap_volume->current_cow_sector,
+			 1, snap_volume->cow_table) ) {
+		/* The data was written to the snapshot, but
+		 * writing the metadata failed.
+		 */
+		goto out_invalidate;
+	}
+
+	snap_volume->next_cow_entry++;
+	if ( snap_volume->next_cow_entry >=
+	     (EVMS_VSECTOR_SIZE / sizeof (struct lv_COW_table_disk)) ) {
+		snap_volume->next_cow_entry = 0;
+		snap_volume->current_cow_sector++;
+		memset(snap_volume->cow_table, 0, EVMS_VSECTOR_SIZE);
+		if ( lvm_init_io(snap_volume->volume_node, 4,
+				 snap_volume->current_cow_sector,
+				 1, snap_volume->cow_table) ) {
+			/* Can't clear out the next sector of metadata. */
+			goto out_invalidate;
+		}
+	}
+	snap_volume->next_free_chunk += snap_volume->chunk_size;
+
+	/* Create a new snapshot map entry and add it in the appropriate
+	 * place in the map.
+	 */
+	new_map_entry = allocate_snapshot_map_entry(chunk_sector, snap_sector);
+	if (!new_map_entry) {
+		rc = -ENOMEM;
+		goto out_invalidate;
+	}
+	new_map_entry->snap_pv = snap_pv;
+	if (target_entry) {
+		insert_snapshot_map_entry(new_map_entry, target_entry);
+	} else {
+		insert_snapshot_map_entry_at_head(new_map_entry,
+						  &(hash_table[hash_value]));
+	}
+
+out:
+	up(&snap_volume->snap_semaphore);
+	return rc;
+
+out_invalidate:
+	invalidate_snapshot_volume(snap_volume);
+	goto out;
+}
+
+/**
+ * get_snapshot_stats
+ **/
+static int get_snapshot_stats(struct lvm_snapshot_stat_ioctl * snap_stats)
+{
+	struct lvm_logical_volume * volume;
+	struct lvm_volume_group * group;
+
+	/* Make sure the parameters are in range. */
+	if ( snap_stats->lv_number < 1 || snap_stats->lv_number > MAX_LV ) {
+		return 1;
+	}
+
+	/* Make sure the specified group and volume exist, and that
+	 * this is a snapshot volume.
+	 */
+	find_group_by_uuid(snap_stats->vg_uuid, &group);
+	if ( ! group ||
+	     ! (volume = group->volume_list[snap_stats->lv_number]) ||
+	     ! (volume->lv_access & LV_SNAPSHOT) ) {
+		return 1;
+	}
+
+	/* Return the starting LBA of the next available chunk. */
+	snap_stats->next_free_chunk = volume->next_free_chunk;
+	snap_stats->lv_status = volume->lv_status;
+
+	return 0;
+}
+
+
+/********** Memory Allocation/Deallocation Functions **********/
+
+
+/**
+ * deallocate_physical_volume
+ *
+ * Free the memory used by this physical volume. Do not delete the EVMS
+ * node in this function, since this could be called during an error
+ * path when we want to save the logical node.
+ **/
+static int deallocate_physical_volume(struct lvm_physical_volume * pv_entry)
+{
+	if (pv_entry->pv) {
+		kfree(pv_entry->pv);
+		pv_entry->pv = NULL;
+	}
+
+	if (pv_entry->pe_map) {
+		vfree(pv_entry->pe_map);
+		pv_entry->pe_map = NULL;
+	}
+
+	kfree(pv_entry);
+	return 0;
+}
+
+/**
+ * allocate_physical_volume
+ *
+ * Create a new struct lvm_physical_volume for the specified volume group.
+ * Initialize the new PV with the evms node and lvm pv information.
+ **/
+static struct lvm_physical_volume *
+allocate_physical_volume(struct evms_logical_node * node, struct pv_disk * pv)
+{
+	struct lvm_physical_volume * new_pv;
+
+	new_pv = kmalloc(sizeof(struct lvm_physical_volume), GFP_NOIO);
+	if (!new_pv) {
+		LOG_CRITICAL("Error allocating physical volume for %s.\n",
+			     node->name);
+		kfree(pv);
+		goto out;
+	}
+
+	/* Initialize the PV. */
+	memset(new_pv, 0, sizeof(struct lvm_physical_volume));
+	new_pv->logical_node = node;
+	new_pv->pv = pv;
+	new_pv->pv_number = pv->pv_number;
+
+out:
+	return new_pv;
+}
+
+/**
+ * allocate_snapshot_map_entry
+ *
+ * Allocate memory for a new entry in the snapshot map and fill in the
+ * sector values. The PV pointer is not filled in here, but can easily
+ * be found by using the find_pv_by_number function.
+ **/
+static struct snapshot_map_entry * allocate_snapshot_map_entry(u64 org_sector,
+							       u64 snap_sector)
+{
+	struct snapshot_map_entry * new_entry;
+
+	new_entry = kmalloc(sizeof(struct snapshot_map_entry), GFP_NOIO);
+	if (!new_entry) {
+		goto out;
+	}
+	memset(new_entry, 0, sizeof(struct snapshot_map_entry));
+	new_entry->org_sector = org_sector;
+	new_entry->snap_sector = snap_sector;
+out:
+	return new_entry;
+}
+
+/**
+ * deallocate_snapshot_map
+ *
+ * This function will delete one hash table, which is part of the whole
+ * snapshot remapping structure. Each hash table is an array of pointers
+ * to linked lists of struct snapshot_map_entry's.
+ **/
+static int deallocate_snapshot_map(struct snapshot_map_entry ** table,
+				   u32 table_size)
+{
+	struct snapshot_map_entry * entry, * next;
+	int i;
+
+	if (table) {
+		for ( i = 0; i < table_size; i++ ) {
+			for ( entry = table[i]; entry; entry = next ) {
+				next = entry->next;
+				kfree(entry);
+			}
+		}
+		vfree(table);
+	}
+	return 0;
+}
+
+/**
+ * deallocate_logical_volume
+ *
+ * Delete the in-memory representation of a single LVM logical volume,
+ * including its PE map and any snapshot data. Do not alter the parent
+ * volume group, except to remove this volume from its volume list.
+ **/
+static int deallocate_logical_volume(struct lvm_logical_volume * volume)
+{
+	struct lvm_volume_group * group = volume->group;
+	struct lvm_logical_volume * org_volume, * snap_volume;
+	int i;
+
+	if ( volume->lv_access & LV_SNAPSHOT ) {
+		/* This volume is a snapshot. Remove it from the linked
+		 * list of volumes that are snapshotting the original.
+		 * First, the original volume must be quiesced.
+		 */
+		org_volume = volume->snapshot_org;
+
+		if ( snapshot_check_quiesce_original(volume) ) {
+			return -EINVAL;
+		}
+
+		remove_snapshot_from_chain(volume);
+
+		/* If the snapshot that was just removed was the last/only
+		 * volume snapshotting the original, then mark the original
+		 * as no longer being snapshotted.
+		 */
+		if ( org_volume && !org_volume->snapshot_next ) {
+			org_volume->lv_access &= ~LV_SNAPSHOT_ORG;
+		}
+	} else if ( volume->lv_access & LV_SNAPSHOT_ORG ) {
+		/* If this volume is a snapshot original, all of its snapshots
+		 * must also be deleted. However, Those deletions need to be
+		 * taken care of by the engine. So just check that they have
+		 * all been quiesced before removing the original.
+		 */
+		if ( snapshot_check_quiesce_all(volume) ) {
+			return -EINVAL;
+		}
+
+		/* In case there are any snapshots remaining, we must clear out
+		 * their pointers to this original to prevent errors when those
+		 * snapshots are accessed or deleted.
+		 */
+		for ( snap_volume = volume->snapshot_next;
+		      snap_volume; snap_volume = snap_volume->snapshot_next ) {
+			snap_volume->snapshot_org = NULL;
+		}
+	}
+
+	if (volume->name) {
+		LOG_DEBUG("Deleting volume %s\n", volume->name);
+	}
+
+	/* Free all the memory. This includes the LE-to-PE map, any snapshot
+	 * hash tables, the COW table, and chunk data buffer.
+	 */
+	if (volume->le_map) {
+		vfree(volume->le_map);
+		volume->le_map = NULL;
+	}
+	if (volume->snapshot_map) {
+		for ( i = 1; i <= group->pv_count; i++ ) {
+			deallocate_snapshot_map(volume->snapshot_map[i],
+						volume->hash_table_size);
+		}
+		kfree(volume->snapshot_map);
+		volume->snapshot_map = NULL;
+	}
+	if (volume->cow_table) {
+		kfree(volume->cow_table);
+		volume->cow_table = NULL;
+	}
+	if (volume->chunk_data_buffer) {
+		kfree(volume->chunk_data_buffer);
+		volume->chunk_data_buffer = NULL;
+	}
+
+	/* Remove this volume from the group's list. */
+	if ( group && group->volume_list[volume->lv_number] == volume ) {
+		group->volume_list[volume->lv_number] = NULL;
+		group->volume_count--;
+	}
+
+	kfree(volume);
+	return 0;
+}
+
+/**
+ * allocate_logical_volume
+ *
+ * Allocate space for a new LVM logical volume, including space for the
+ * LE-to-PE map and any necessary snapshot data.
+ **/
+static struct lvm_logical_volume *
+allocate_logical_volume(struct lv_disk * lv, struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume * new_volume;
+	u32 table_entries_per_chunk, table_chunks;
+	int i;
+
+	/* Allocate space for the new logical volume. */
+	new_volume = kmalloc(sizeof(struct lvm_logical_volume), GFP_NOIO);
+	if (!new_volume) {
+		LOG_CRITICAL("Error allocating new logical volume %s\n",
+			     lv->lv_name);
+		goto out;
+	}
+	memset(new_volume, 0, sizeof(struct lvm_logical_volume));
+
+	/* Allocate space for the LE to PE mapping table. */
+	new_volume->le_map = vmalloc(lv->lv_allocated_le *
+				     sizeof(struct le_table_entry));
+	if (!new_volume->le_map) {
+		LOG_CRITICAL("Error creating LE map for logical volume %s\n",
+			     lv->lv_name);
+		goto error;
+	}
+	memset(new_volume->le_map, 0, 
+	       lv->lv_allocated_le * sizeof(struct le_table_entry));
+
+	/* Initialize the rest of the new volume.
+	 * Need the +1 on lv_number to match the PE Map entries on the PV.
+	 */
+	new_volume->lv_number = lv->lv_number + 1;
+	new_volume->lv_size = lv->lv_size;
+	new_volume->lv_access = lv->lv_access | EVMS_LV_NEW | EVMS_LV_QUIESCED;
+	new_volume->lv_status = lv->lv_status | LV_ACTIVE;
+	new_volume->lv_minor = MINOR(lv->lv_dev);
+	new_volume->stripes = lv->lv_stripes;
+	new_volume->stripe_size = lv->lv_stripesize;
+	new_volume->stripe_size_shift = evms_cs_log2(lv->lv_stripesize);
+	new_volume->pe_size = group->vg->pe_size;
+	new_volume->pe_size_shift = evms_cs_log2(group->vg->pe_size);
+	new_volume->num_le = lv->lv_allocated_le;
+	new_volume->group = group;
+	/* Different naming scheme for EVMS nodes. */
+	if ( translate_lv_name(lv->lv_name, new_volume->name) ) {
+		goto error;
+	}
+
+	if ( new_volume->lv_access & LV_SNAPSHOT ) {
+		/* This volume is a snapshot, initialize the remaining data,
+		 * and allocate space for the remapping structures, and one
+		 * sector's worth of COW tables.
+		 */
+		new_volume->chunk_size = lv->lv_chunk_size;
+		new_volume->num_chunks = lv->lv_size / lv->lv_chunk_size;
+		new_volume->snap_org_minor = lv->lv_snapshot_minor;
+		new_volume->next_cow_entry = 0;
+		new_volume->current_cow_sector = 0;
+		table_entries_per_chunk = (new_volume->chunk_size <<
+					   EVMS_VSECTOR_SIZE_SHIFT) /
+					  sizeof(struct lv_COW_table_disk);
+		table_chunks = (new_volume->num_chunks +
+				table_entries_per_chunk - 1) /
+			       table_entries_per_chunk;
+		new_volume->next_free_chunk = table_chunks *
+					      new_volume->chunk_size;
+		new_volume->hash_table_size = (lv->lv_size / lv->lv_chunk_size /
+					       MAX_HASH_CHAIN_ENTRIES) + 1;
+
+		new_volume->cow_table = kmalloc(EVMS_VSECTOR_SIZE, GFP_NOIO);
+		if (!new_volume->cow_table) {
+			LOG_CRITICAL("Error allocating COW table for logical volume %s\n",
+				     lv->lv_name);
+			goto error;
+		}
+		memset(new_volume->cow_table, 0, EVMS_VSECTOR_SIZE);
+
+		new_volume->snapshot_map = kmalloc((group->pv_count + 1) *
+						   sizeof(struct snapshot_map_entry **),
+						   GFP_NOIO);
+		if (!new_volume->snapshot_map) {
+			LOG_CRITICAL("Error allocating snapshot map for logical volume %s\n",
+				     lv->lv_name);
+			goto error;
+		}
+
+		new_volume->snapshot_map[0] = NULL;
+		for ( i = 1; i <= group->pv_count; i++ ) {
+			new_volume->snapshot_map[i] =
+				vmalloc(new_volume->hash_table_size *
+					sizeof(struct snapshot_map_entry *));
+			if (!new_volume->snapshot_map[i]) {
+				LOG_CRITICAL("Error allocating snapshot sub-map for logical volume %s\n",
+					     lv->lv_name);
+				goto error;
+			}
+			memset(new_volume->snapshot_map[i], 0,
+			       new_volume->hash_table_size *
+			       sizeof(struct snapshot_map_entry *));
+		}
+		init_MUTEX(&new_volume->snap_semaphore);
+	} else if ( new_volume->lv_access & LV_SNAPSHOT_ORG ) {
+		/* This volume is a snapshot original, allocate space to use for
+		 * copying snapshot chunks. This will now be a fixed size
+		 * instead of being based on the chunk size of the snapshots.
+		 */
+		new_volume->chunk_size = CHUNK_DATA_BUFFER_SIZE;
+		new_volume->chunk_data_buffer =
+			kmalloc(new_volume->chunk_size <<
+				EVMS_VSECTOR_SIZE_SHIFT, GFP_NOIO);
+		if (!new_volume->chunk_data_buffer) {
+			LOG_SERIOUS("Error allocating snapshot chunk buffer for logical volume %s\n",
+				    lv->lv_name);
+			goto error;
+		}
+		memset(new_volume->chunk_data_buffer, 0,
+		       new_volume->chunk_size << EVMS_VSECTOR_SIZE_SHIFT);
+	}
+
+out:
+	return new_volume;
+error:
+	deallocate_logical_volume(new_volume);
+	new_volume = NULL;
+	goto out;
+}
+
+/**
+ * deallocate_volume_group
+ *
+ * Delete the entire in-memory representation of an LVM volume group,
+ * including all PVs and logical volumes. If this group is on LVM's
+ * volume group list, remove it.
+ **/
+static int deallocate_volume_group(struct lvm_volume_group * group)
+{
+	struct lvm_physical_volume * pv_entry, * next_pv;
+	int i;
+
+	LOG_DEBUG("Deleting volume group %s\n", group->vg_name);
+
+	/* Remove the group from the global list. */
+	remove_group_from_list(group);
+
+	/* Delete the LV metadata array. */
+	if (group->lv_array) {
+		vfree(group->lv_array);
+		group->lv_array = NULL;
+	}
+
+	/* Delete the PV UUID list. */
+	if (group->uuid_list) {
+		vfree(group->uuid_list);
+		group->uuid_list = NULL;
+	}
+
+	/* Delete all logical volumes. */
+	for ( i = 1; i <= MAX_LV; i++ ) {
+		if (group->volume_list[i]) {
+			deallocate_logical_volume(group->volume_list[i]);
+			group->volume_list[i] = NULL;
+		}
+	}
+
+	/* Delete all PVs from the group's list. */
+	for ( pv_entry = group->pv_list; pv_entry; pv_entry = next_pv ) {
+		next_pv = pv_entry->next;
+		if (pv_entry->logical_node) {
+			/* Send a delete command down to the segment manager. */
+			LOG_DEBUG("Deleting PV %s from group %s\n",
+				  pv_entry->logical_node->name, group->vg_name);
+			DELETE(pv_entry->logical_node);
+			pv_entry->logical_node = NULL;
+		}
+		deallocate_physical_volume(pv_entry);
+	}
+
+	/* Delete the VG metadata. */
+	if (group->vg) {
+		kfree(group->vg);
+		group->vg = NULL;
+	}
+
+	kfree(group);
+	return 0;
+}
+
+/**
+ * allocate_volume_group
+ *
+ * Allocate space for a new LVM volume group and all of its sub-fields.
+ * Initialize the appropriate fields.
+ * vg parameter should already have an allocate/initialized struct vg_disk.
+ **/
+static struct lvm_volume_group * allocate_volume_group(struct vg_disk * vg,
+						       u8 * vg_name)
+{
+	struct lvm_volume_group * new_group;
+
+	/* The volume group itself. */
+	new_group = kmalloc(sizeof(struct lvm_volume_group), GFP_NOIO);
+	if (!new_group) {
+		kfree(vg);
+		goto out;
+	}
+
+	/* Initialize the new group. */
+	memset(new_group, 0, sizeof(struct lvm_volume_group));
+	memcpy(new_group->vg_uuid, vg->vg_uuid, UUID_LEN);
+	strncpy(new_group->vg_name, vg_name, NAME_LEN - 1);
+	new_group->vg = vg;
+	/* Default sector and block sizes. */
+	new_group->hard_sect_size = 512;
+	new_group->block_size = 1024;
+	new_group->flags = EVMS_VG_DIRTY;
+
+	LOG_DETAILS("Discovered volume group %s\n", new_group->vg_name);
+
+out:
+	return new_group;
+}
+
+/**
+ * remove_pv_from_group
+ *
+ * In the engine, when a PV is removed from a group (on a vgreduce), that
+ * same PV must be removed from that group in the kernel. Otherwise, when
+ * the rediscover occurs, that PV will still appear in the group, and
+ * will cause segfaults when we try to read metadata from it.
+ **/
+static int remove_pv_from_group(int pv_number, unsigned char * vg_uuid)
+{
+	struct lvm_volume_group * group;
+	struct lvm_physical_volume * pv_entry;
+	struct lvm_physical_volume ** p_pv_entry;
+
+	/* Make sure the numbers are in range. */
+	if ( pv_number < 0 || pv_number > MAX_PV ) {
+		return 0;
+	}
+
+	/* Make sure the group exists. */
+	find_group_by_uuid(vg_uuid, &group);
+	if (!group) {
+		return 0;
+	}
+
+	/* Make sure the PV is in this group. */
+	pv_entry = find_pv_by_number(pv_number, group);
+	if (!pv_entry) {
+		LOG_WARNING("Did not find PV %d in group %s\n",
+			    pv_number, group->vg_name);
+		return 0;
+	}
+
+	/* Make sure the PV is not in use by any volumes. */
+	if ( check_pv_for_lv(pv_entry, group) ) {
+		LOG_SERIOUS("PV %d in group %s still contains LVs\n",
+			    pv_number, group->vg_name);
+		return -EINVAL;
+	}
+
+	/* Take this PV out of the group's list. */
+	for ( p_pv_entry = &group->pv_list;
+	      *p_pv_entry; p_pv_entry = &(*p_pv_entry)->next ) {
+		if ( *p_pv_entry == pv_entry ) {
+			*p_pv_entry = (*p_pv_entry)->next;
+			pv_entry->next = NULL;
+			break;
+		}
+	}
+
+	group->pv_count--;
+
+	/* There is no way that this PV was the last in this group, so the
+	 * group never needs to be deleted at this point. The only way this
+	 * group will exist in the kernel is if there are volumes exported from
+	 * it. If this was the last PV, then those volumes must be on that PV,
+	 * and it wouldn't be allowed to be removed from the group (above).
+	 */
+
+	/* Free up the memory for this PV. Just drop the node. */
+	deallocate_physical_volume(pv_entry);
+
+	LOG_DEBUG("PV %d removed from group %s\n", pv_number, group->vg_name);
+	return 0;
+}
+
+
+/********** Consistency Checking Functions **********/
+
+
+/**
+ * clear_le_entries_for_missing_pv
+ *
+ * In the event that a PV turns up missing during a rediscover, we
+ * need to erase any LE map entries that might point to it.
+ **/
+static void
+clear_le_entries_for_missing_pv(struct lvm_volume_group * group,
+				struct lvm_physical_volume * pv_entry)
+{
+	struct lvm_logical_volume * volume;
+	int i, j;
+
+	for ( i = 1; i <= MAX_LV; i++ ) {
+		if (group->volume_list[i]) {
+			volume = group->volume_list[i];
+			for ( j = 0; j < volume->num_le; j++ ) {
+				if ( volume->le_map[j].owning_pv == pv_entry ) {
+					volume->le_map[j].owning_pv = NULL;
+					volume->le_map[j].pe_sector_offset = 0;
+				}
+			}
+		}
+	}
+}
+
+/**
+ * check_volume_groups
+ *
+ * This function performs some simple consistency checks on all dirty
+ * volume groups. Any groups that have no PVs are deleted. If any metadata
+ * structures (PV or VG) are missing, they are read in from disk.
+ **/
+static int check_volume_groups(void)
+{
+	struct lvm_volume_group * group, * next_group;
+	struct lvm_physical_volume * pv_entry, * next_pv;
+	int rc = 0;
+
+	for ( group = lvm_group_list; group; group = next_group ) {
+		next_group = group->next_group;
+
+		LOG_DEBUG("Checking Group %s\n", group->vg_name);
+
+		/* If a group has no PVs, it can be safely deleted,
+		 * because we can't find any volumes on it.
+		 */
+		if (!group->pv_count) {
+			LOG_WARNING("No PVs found for Group %s.\n",
+				    group->vg_name);
+			if (!group->volume_count) {
+				deallocate_volume_group(group);
+			}
+			continue;
+		}
+
+		/* Make sure all metadata for the PVs is present. On a
+		 * rediscover, it may be missing, because we delete it at the
+		 * end of discovery. If any is missing, read it in from disk.
+		 * This is only necessary in the kernel. It can't happen in
+		 * the engine.
+		 */
+		for ( pv_entry = group->pv_list;
+		      pv_entry; pv_entry = next_pv ) {
+			next_pv = pv_entry->next;
+			if (!pv_entry->pv) {
+				LOG_DEBUG("Re-reading PV metadata for %s\n",
+					  pv_entry->logical_node->name);
+				rc = read_pv(pv_entry->logical_node,
+					     &pv_entry->pv);
+				if (rc) {
+					/* What happens if we can't re-read the
+					 * PV metadata? This PV must be removed
+					 * from the group. Need to also clear
+					 * all LE entries in all LVs that are
+					 * pointing to this PV before it can be
+					 * removed from the list.
+					 */
+					LOG_SERIOUS("PV metadata is missing or cannot be read from %s\n",
+						    pv_entry->logical_node->name);
+					clear_le_entries_for_missing_pv(group,
+									pv_entry);
+					remove_pv_from_group(pv_entry->pv_number,
+							     group->vg_uuid);
+					continue;
+				}
+				pv_entry->pv_number = pv_entry->pv->pv_number;
+
+				/* Check for a "stale" PV. This case should be
+				 * already be covered, as long as the Engine is
+				 * calling the PV_REMOVE ioctl when it does a
+				 * vgreduce or a pvremove. If this is the last
+				 * PV in the group, the group will be deleted.
+				 */
+				if (!pv_entry->pv_number) {
+					remove_pv_from_group(0, group->vg_uuid);
+					continue;
+				}
+			}
+
+			if (!pv_entry->pe_map) {
+				LOG_DEBUG("Re-reading PE maps for %s\n",
+					  pv_entry->logical_node->name);
+				rc = read_pe_map(pv_entry);
+				if (rc) {
+					LOG_WARNING("Error reading PE maps for %s\n",
+						    pv_entry->logical_node->name);
+					LOG_WARNING("Any volumes residing on %s will be incomplete!\n",
+						    pv_entry->logical_node->name);
+				}
+			}
+		}
+
+		/* Make sure the metadata for the VG is present. If it's
+		 * missing, read it in from the first PV in the VG.
+		 */
+		if (!group->vg && group->pv_count) {
+			LOG_DEBUG("Re-reading VG metadata for Group %s\n",
+				  group->vg_name);
+			pv_entry = group->pv_list;
+			rc = read_vg(pv_entry->logical_node,
+				     pv_entry->pv, &group->vg);
+			if (rc) {
+				/* What happens if we can't re-read the
+				 * VG metadata? It's definitely bad
+				 * news. Should we delete the VG?
+				 */
+				continue;
+			}
+		}
+
+		/* Display a warning if the number of PVs found for the group
+		 * doesn't match the number of PVs recorded for the VG.
+		 */
+		if ( group->vg && group->pv_count != group->vg->pv_cur ) {
+			LOG_WARNING("Group %s is incomplete.\n",
+				    group->vg_name);
+			LOG_WARNING("     Only %d of %d PVs found.\n",
+				    group->pv_count, group->vg->pv_cur);
+			LOG_WARNING("     Volumes in this group may be incomplete.\n");
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * check_le_maps
+ *
+ * Make sure all volumes in this group have valid LE-to-PE maps. Any
+ * volume that doesn't is marked as incomplete. This is safe for
+ * re-discovery because only new volumes could have corrupted LE maps.
+ **/
+static int check_le_maps(struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume * volume;
+	int i, j, count;
+
+	for ( i = 1; i <= MAX_LV; i++ ) {
+		volume = group->volume_list[i];
+		if (!volume) {
+			continue;
+		}
+
+		if (!volume->le_map) {
+			/* No point in keeping the volume around if it has
+			 * no LE map at all.
+			 */
+			LOG_SERIOUS("Volume %s has no LE map.\n", volume->name);
+			deallocate_logical_volume(volume);
+			continue;
+		}
+
+		/* If any entries in the LE map are missing, mark this volume
+		 * as incomplete.
+		 */
+		for ( j = 0, count = 0; j < volume->num_le; j++ ) {
+			if ( !volume->le_map[j].owning_pv ||
+			     !volume->le_map[j].pe_sector_offset) {
+				count++;
+			}
+		}
+		if (count) {
+			LOG_SERIOUS("Volume %s has incomplete LE map.\n",
+				    volume->name);
+			LOG_SERIOUS("       Missing %d out of %d LEs.\n",
+				    count, volume->num_le);
+			volume->lv_access |= EVMS_LV_INCOMPLETE;
+		}
+	}
+	return 0;
+}
+
+/**
+ * check_snapshot_map
+ *
+ * For snapshot volumes, make sure the snapshot map is intact, and that
+ * any existing entries in the map are in the correct order and there
+ * are no duplicate entries.
+ **/
+static int check_snapshot_map(struct lvm_logical_volume * snap_volume)
+{
+	struct snapshot_map_entry ** table, * curr;
+	int i, j;
+
+	if ( ! (snap_volume->lv_access & LV_SNAPSHOT) ) {
+		return 0;
+	}
+	if (!snap_volume->snapshot_map) {
+		snap_volume->lv_access |= EVMS_LV_INVALID;
+		return -EINVAL;
+	}
+
+	for ( i = 1; i <= snap_volume->group->pv_count; i++ ) {
+		if (!snap_volume->snapshot_map[i]) {
+			snap_volume->lv_access |= EVMS_LV_INVALID;
+			return -EINVAL;
+		}
+		table = snap_volume->snapshot_map[i];
+		for ( j = 0; j < snap_volume->hash_table_size; j++ ) {
+			for ( curr = table[j]; curr; curr = curr->next ) {
+				if ( curr->next &&
+				     curr->org_sector >=
+				     curr->next->org_sector) {
+					snap_volume->lv_access |=
+						EVMS_LV_INVALID;
+					return -EINVAL;
+				}
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * check_logical_volumes
+ *
+ * Perform a consistency check on all of the logical volumes that have been
+ * discovered. Any volume that has any inconsistencies will be marked as
+ * incomplete or invalid, depending on the severity of the problem. At the
+ * end, all invalid volumes are deleted. If the deleted_incompletes
+ * parameter is set, those will also be deleted.
+ **/
+static int check_logical_volumes(int final_discovery)
+{
+	struct lvm_volume_group * group;
+	struct lvm_logical_volume * volume, * snap, * next;
+	int count, i, j;
+
+	/* Check every valid, dirty volume group. */
+	for ( group = lvm_group_list; group; group = group->next_group ) {
+		if ( ! (group->flags & EVMS_VG_DIRTY) ) {
+			continue;
+		}
+		/* Check every valid volume in this group. */
+		for ( i = 1; i <= MAX_LV; i++ ) {
+			volume = group->volume_list[i];
+			if (!volume) {
+				continue;
+			}
+
+			LOG_DEBUG("Checking logical volume %s\n", volume->name);
+
+			if (!volume->group) {
+				volume->group = group;
+			}
+
+			/* All LE-map entries must have valid values. The I/O
+			 * paths now detect missing LE entries.
+			 */
+			if (volume->le_map) {
+				for ( j = 0, count = 0;
+				      j < volume->num_le; j++ ) {
+					if ( !volume->le_map[j].owning_pv ||
+					     !volume->le_map[j].pe_sector_offset ) {
+						count++;
+					}
+				}
+				if (count) {
+					LOG_SERIOUS("Volume %s has incomplete LE map.\n",
+						    volume->name);
+					LOG_SERIOUS("      Missing %d out of %d LEs.\n",
+						    count, volume->num_le);
+					volume->lv_access |= EVMS_LV_INCOMPLETE;
+				} else {
+					/* In case this volume was previously
+					 * marked incomplete.
+					 */
+					volume->lv_access &=
+						~EVMS_LV_INCOMPLETE;
+				}
+			} else {
+				/* This should only ever happen due to
+				 * memory corruption.
+				 */
+				LOG_SERIOUS("Volume %s has no LE map.\n",
+					    volume->name);
+				volume->lv_access |= EVMS_LV_INVALID;
+			}
+
+			if ( volume->lv_access & LV_SNAPSHOT_ORG ) {
+				/* For a snapshot original, check all snapshots
+				 * in the chain, to make sure they point back to
+				 * the original. Also, make sure there is memory
+				 * for the chunk buffer.
+				 */
+				for ( snap = volume->snapshot_next, count = 0;
+				      snap;
+				      snap = snap->snapshot_next, count++ ) {
+					if ( snap->snapshot_org != volume ) {
+						LOG_SERIOUS("Snapshot volume %s not pointing at correct original\n",
+							    volume->name);
+						snap->snapshot_org = NULL;
+						snap->lv_access |=
+							EVMS_LV_INVALID;
+					}
+				}
+				if (!count) {
+					LOG_WARNING("No snapshots found for volume %s\n",
+						    volume->name);
+					if (final_discovery) {
+						volume->lv_access &=
+							~LV_SNAPSHOT_ORG;
+					}
+				} else if (!volume->chunk_data_buffer) {
+					volume->lv_access |= EVMS_LV_INVALID;
+				}
+			} else if ( volume->lv_access & LV_SNAPSHOT ) {
+				/* For a snapshot volume, make sure it points
+				 * back to its original. Also make sure there is
+				 * memory for the cow table, and that any
+				 * existing snapshot entries in the snapshot map
+				 * are correctly ordered.
+				 */
+				/* Is there a COW table? */
+				if (!volume->cow_table) {
+					LOG_SERIOUS("Snapshot volume %s has no COW table\n",
+						    volume->name);
+					volume->lv_access |= EVMS_LV_INVALID;
+				}
+				/* Is the snapshot map in order? */
+				if ( check_snapshot_map(volume) ) {
+					LOG_SERIOUS("Snapshot volume %s has snapshot map inconsistency\n",
+						    volume->name);
+					volume->lv_access |= EVMS_LV_INVALID;
+				}
+				/* Is there an original volume? This is only
+				 * a real problem during final discovery.
+				 */
+				if (!volume->snapshot_org) {
+					LOG_SERIOUS("Snapshot volume %s not pointing at an original\n",
+						    volume->name);
+					if (final_discovery) {
+						volume->lv_access |=
+							EVMS_LV_INVALID;
+					}
+				}
+				/* Is the original the correct one? */
+				else if ( volume->snap_org_minor !=
+					  volume->snapshot_org->lv_minor ) {
+					LOG_SERIOUS("Snapshot volume %s not pointing at correct original\n",
+						    volume->name);
+					volume->lv_access |= EVMS_LV_INVALID;
+				}
+			}
+			/* Delete any invalid volumes from use. Delete
+			 * incomplete volumes as well if this is not final
+			 * discovery. If a snapshot original is bad, delete all
+			 * of its snapshots.
+			 */
+			if ( volume->lv_access & EVMS_LV_INVALID ||
+			     (!final_discovery &&
+			      (volume->lv_access & EVMS_LV_INCOMPLETE) &&
+			      (volume->lv_access & EVMS_LV_NEW)) ) {
+				if ( volume->lv_access & LV_SNAPSHOT_ORG ) {
+					for ( snap = volume->snapshot_next;
+					      snap; snap = next ) {
+						next = snap->snapshot_next;
+						snap->snapshot_next = NULL;
+						snap->snapshot_org = NULL;
+						invalidate_snapshot_volume(snap);
+						deallocate_logical_volume(snap);
+					}
+					volume->snapshot_next = NULL;
+				} else if ( volume->lv_access & LV_SNAPSHOT ) {
+					invalidate_snapshot_volume(volume);
+				}
+				deallocate_logical_volume(volume);
+			}
+		}
+	}
+
+	return 0;
+}
+
+
+/********** Volume Group Discovery Functions **********/
+
+
+/**
+ * find_group_for_pv
+ *
+ * This is a discover-time function. It reads the VG metadata info for the
+ * specified node, and locates the appropriate group that owns that
+ * node. If that group does not already exist, it is created and
+ * initialized.
+ **/
+static int find_group_for_pv(struct evms_logical_node * node,
+			     struct pv_disk * pv,
+			     struct lvm_volume_group ** group)
+{
+	struct vg_disk * vg;
+	int rc;
+
+	*group = NULL;
+
+	/* Check for an unassigned PV. */
+	if ( pv->vg_name[0] == 0 ) {
+		return 0;
+	}
+
+	/* Read the VG on-disk info for this PV. If this succeeds, it
+	 * allocates a new VG metadata structure.
+	 */
+	rc = read_vg(node, pv, &vg);
+	if (rc) {
+		return rc;
+	}
+
+	/* Use the UUID from the VG metadata to determine if this group
+	 * has already been discovered and constructed.
+	 */
+	find_group_by_uuid(vg->vg_uuid, group);
+
+	if (!*group) {
+		/* Create a new group entry and add to the global list. */
+		*group = allocate_volume_group(vg, pv->vg_name);
+		if (!*group) {
+			return -ENOMEM;
+		}
+		add_group_to_list(*group);
+	} else if (!(*group)->vg) {
+		/* On a rediscover, the VG metadata for an existing group might
+		 * be missing. Fill it in if necessary. This check is also not
+		 * necessary in the engine, since the metadata is never deleted.
+		 */
+/* Should we re-copy vg_name? (vg_uuid can not be allowed to change).
+ * Or should vg_name changes be done through direct ioctl only?
+ */
+		(*group)->vg = vg;
+	} else {
+		kfree(vg);
+	}
+
+	/* Read in the UUID list for this group, if it isn't present. */
+	rc = read_uuid_list(node, pv, *group);
+	if (rc) {
+		LOG_WARNING("Error reading UUID list for group %s.\n",
+			    (*group)->vg_name);
+		LOG_WARNING("May not be able to verify PV UUIDs for group %s\n",
+			    (*group)->vg_name);
+	}
+
+	/* In the kernel, any time we even see a PV for a group, that group
+	 * must be marked dirty so its volumes will be re-exported.
+	 */
+	(*group)->flags |= EVMS_VG_DIRTY;
+
+	return 0;
+}
+
+/**
+ * check_for_duplicate_pv
+ *
+ * Search the list of PVs in the specified volume group. If the
+ * specified node already exists in the list, we can discard it.
+ **/
+static int check_for_duplicate_pv(struct evms_logical_node * node,
+				  struct pv_disk * pv,
+				  struct lvm_volume_group * group)
+{
+	struct lvm_physical_volume * pv_entry;
+
+	/* For re-discovery, we need to search all existing PVs in this VG to
+	 * make sure we didn't get a duplicate from the plugin below us. The
+	 * plugins below us should be re-exporting the same node on
+	 * re-discovery, instead of creating a new node to represent the same
+	 * objects, so just check the memory location.
+	 */
+	for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) {
+		if ( pv_entry->logical_node == node ) {
+
+			/* We found a duplicate. Just ignore the duplicate. */
+			LOG_DEBUG("PV %s is already in Group %s.\n",
+				  node->name, group->vg_name);
+
+			/* Even if the node was a duplicate, we may need to
+			 * fill in the pv entry for this partition, since we
+			 * always delete those at the end of discovery.
+			 */
+			if (!pv_entry->pv) {
+				pv_entry->pv = pv;
+				pv_entry->pv_number = pv->pv_number;
+			} else {
+				kfree(pv);
+			}
+
+			return 1;
+		}
+	}
+
+	/* No duplicate was found. */
+	return 0;
+}
+
+/**
+ * verify_pv_uuid
+ *
+ * Verify that the specified PV belongs in the specified group by
+ * searching for the PV's UUID in the group's list.
+ **/
+static int verify_pv_uuid(struct lvm_physical_volume * pv_entry,
+			  struct lvm_volume_group * group)
+{
+	int i;
+
+	/* Obviously the UUID list must be present in order to search. */
+	if (!group->uuid_list) {
+		LOG_WARNING("UUID list is missing from group %s.\n",
+			    group->vg_name);
+		LOG_WARNING("Cannot verify UUID for PV %s\n",
+			    pv_entry->logical_node->name);
+		return 0;
+	}
+
+	/* Start with the UUID entry for this PV's number. */
+	if ( ! memcmp(pv_entry->pv->pv_uuid,
+		      &(group->uuid_list[(pv_entry->pv_number - 1) * NAME_LEN]),
+		      UUID_LEN) ) {
+		return 0;
+	}
+
+	/* If it wasn't found there, then search the entire group's list. */
+	for ( i = 0; i < group->vg->pv_cur; i++ ) {
+		if ( ! memcmp(pv_entry->pv->pv_uuid,
+			      &(group->uuid_list[i * NAME_LEN]), UUID_LEN) ) {
+			/* Found the UUID.  */
+			LOG_WARNING("Detected UUID mismatch for PV %s!\n",
+				    pv_entry->logical_node->name);
+			LOG_WARNING("PV %s is recorded as being at index %d,\n",
+				    pv_entry->logical_node->name,
+				    pv_entry->pv_number);
+			LOG_WARNING(" but Group %s has it recorded at index %d.\n",
+				    group->vg_name, i + 1);
+			LOG_WARNING("Run the EVMS Engine to correct the problem.\n");
+			LOG_WARNING("If you have any snapshot regions in group %s\n",
+				    group->vg_name);
+			LOG_WARNING(" it is recommended that you delete them immediately!\n");
+			return 0;
+		}
+	}
+
+	LOG_SERIOUS("Could not find UUID for PV %s in group %s\n",
+		    pv_entry->logical_node->name, group->vg_name);
+	return -EINVAL;
+}
+
+/**
+ * add_pv_to_group
+ *
+ * Adds the physical volume to the appropriate volume group. The PV
+ * passed into this function MUST be part of a valid VG.
+ **/
+static int add_pv_to_group(struct lvm_physical_volume * pv_entry,
+			   struct lvm_volume_group * group)
+{
+	int rc;
+
+	/* Make sure this PV's UUID is listed in the group. */
+	rc = verify_pv_uuid(pv_entry, group);
+	if (rc) {
+		LOG_SERIOUS("PV %s does not belong in group %s!\n",
+			    pv_entry->logical_node->name, group->vg_name);
+		return rc;
+	}
+
+	/* Add this PV to the beginning of its group's list. */
+	pv_entry->next = group->pv_list;
+	group->pv_list = pv_entry;
+	group->pv_count++;
+
+	/* Update the group's block and hardsector sizes as appropriate. */
+	group->block_size = max(pv_entry->logical_node->block_size,
+				group->block_size);
+	group->hard_sect_size = max(pv_entry->logical_node->hardsector_size,
+				    group->hard_sect_size);
+
+	/* Check for the Partial or Removable flag on the PV. */
+	if ( pv_entry->logical_node->flags & EVMS_VOLUME_PARTIAL ) {
+		group->flags |= EVMS_VG_PARTIAL_PVS;
+	}
+	if ( pv_entry->logical_node->flags & EVMS_DEVICE_REMOVABLE ) {
+		group->flags |= EVMS_VG_REMOVABLE_PVS;
+	}
+
+	LOG_DETAILS("PV %s added to Group %s\n",
+		    pv_entry->logical_node->name, group->vg_name);
+
+	return 0;
+}
+
+/**
+ * discover_volume_groups
+ *
+ * Examine the list of logical nodes. Any node that contains a valid PV
+ * structure is consumed and added to the appropriate volume group. PVs
+ * which do not belong to any group are deleted. Everything else is left
+ * on the discovery list.
+ **/
+static int discover_volume_groups(struct evms_logical_node ** evms_node_list)
+{
+	struct evms_logical_node * node, * next_node;
+	struct pv_disk * pv;
+	struct lvm_volume_group * group;
+	struct lvm_physical_volume * pv_entry;
+	int rc;
+
+	LOG_EXTRA("Searching for PVs in the node list.\n");
+
+	/* Run through the discovery list. */
+	for ( node = *evms_node_list; node; node = next_node ) {
+		/* Save the next node. We may remove this one from the list. */
+		next_node = node->next;
+
+		/* Read the PV metadata. This will also create a new struct pv_disk
+		 * if it finds the correct LVM signatures.
+		 */
+		rc = read_pv(node, &pv);
+		if (rc) {
+			/* This node is not an LVM PV, or an error occurred.
+			 * Just leave the node on the discovery list.
+			 */
+			continue;
+		}
+
+		rc = find_group_for_pv(node, pv, &group);
+		if (rc) {
+			/* Error getting the group for this PV. */
+			kfree(pv);
+			continue;
+		}
+
+		if (!group) {
+			/* This node is an unassigned PV. */
+			LOG_DETAILS("PV %s is unassigned.\n", node->name);
+			kfree(pv);
+			continue;
+		}
+
+		rc = check_for_duplicate_pv(node, pv, group);
+		if (rc) {
+			/* This node is already in the group. This check is also
+			 * only in the kernel because the engine has no notion
+			 * of rediscover, and thus can never get a duplicate.
+			 */
+			evms_cs_remove_logical_node_from_list(evms_node_list,
+							      node);
+			continue;
+		}
+
+		/* Allocate a PV entry for this node. */
+		pv_entry = allocate_physical_volume(node, pv);
+		if (!pv_entry) {
+			continue;
+		}
+
+		/* Add this PV to the appropriate volume group. */
+		rc = add_pv_to_group(pv_entry, group);
+		if (rc) {
+			deallocate_physical_volume(pv_entry);
+			continue;
+		}
+
+		rc = read_pe_map(pv_entry);
+		if (rc) {
+			LOG_WARNING("Error reading PE maps for node %s\n",
+				    node->name);
+			LOG_WARNING("Any volumes residing on this node will be incomplete!\n");
+		}
+
+		evms_cs_remove_logical_node_from_list(evms_node_list, node);
+	}
+
+	LOG_EXTRA("Group discovery complete.\n");
+	return 0;
+}
+
+
+/********** Logical Volume Discovery Functions **********/
+
+
+/**
+ * build_le_maps
+ *
+ * After all logical volumes have been discovered, the mappings from
+ * logical extents to physical extents must be constructed. Each PV
+ * contains a map on-disk of its PEs. Each PE map entry contains the
+ * logical volume number and the logical extent number on that volume.
+ * Our internal map is the reverse of this map for each volume, listing
+ * the PV node and sector offset for every logical extent on the volume.
+ **/
+static int build_le_maps(struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume ** volume_list = group->volume_list;
+	struct lvm_physical_volume * pv_entry;
+	struct evms_logical_node * node;
+	struct pv_disk * pv;
+	struct pe_disk * pe_map;
+	u64 offset;
+	u32 lv_number, le_number, first_pe_sector;
+	int i;
+
+	LOG_DEBUG("Building LE maps for new volumes in group %s.\n",
+		  group->vg_name);
+
+	/* For every PV in this VG. */
+	for ( pv_entry = group->pv_list; pv_entry; pv_entry = pv_entry->next ) {
+		node = pv_entry->logical_node;
+		pv = pv_entry->pv;
+		pe_map = pv_entry->pe_map;
+
+		/* Version 1 metadata uses pe_on_disk.base + .size to find start
+		 * of first PE. Version 2 uses pe_start.
+		 */
+		if (pv->version == 1) {
+			first_pe_sector =
+				evms_cs_size_in_vsectors(pv->pe_on_disk.base +
+							 pv->pe_on_disk.size);
+		} else {
+			first_pe_sector = pv->pe_start;
+			if (!first_pe_sector) {
+				first_pe_sector =
+					evms_cs_size_in_vsectors(pv->pe_on_disk.base +
+								 pv->pe_on_disk.size);
+			}
+		}
+
+		/* For every entry in the PE map, calculate the PE's sector offset
+		 * and update the correct LV's PE map. LV number of 0 marks an unused PE.
+		 * For re-discovery, only compute entries for new volumes. If a PV
+		 * is read-only, all LVs on that PV will also be read-only.
+		 */
+		for ( i = 0; i < pv->pe_total; i++ ) {
+			lv_number = pe_map[i].lv_num;
+			if ( lv_number &&
+			     volume_list[lv_number] &&
+			     volume_list[lv_number]->lv_access &
+			     (EVMS_LV_NEW | EVMS_LV_INCOMPLETE) ) {
+				le_number = pe_map[i].le_num;
+				offset = i * pv->pe_size + first_pe_sector;
+				volume_list[lv_number]->le_map[le_number].owning_pv =
+					pv_entry;
+				volume_list[lv_number]->le_map[le_number].pe_sector_offset =
+					offset;
+				if ( node->flags & EVMS_VOLUME_SET_READ_ONLY ) {
+					volume_list[lv_number]->lv_access &=
+						~LV_WRITE;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * build_snapshot_maps
+ *
+ * For every volume in this group that is a snapshot, read all of the
+ * existing entries in the COW table, and build up the snapshot mapping
+ * structures accordingly.
+ *
+ * For reference, the COW tables attached to the snapshot volumes  will
+ * always be in disk-order (little-endian), so that it can always be
+ * immediately written to disk. Therefore, endian conversions are necessary
+ * any time the COW table is accessed. This function will make a local
+ * copy of each COW table sector, and convert the local copy before
+ * building the snapshot maps.
+ **/
+static int build_snapshot_maps(struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume * volume;
+	struct evms_logical_node tmp_node;
+	struct lv_COW_table_disk cow_table[EVMS_VSECTOR_SIZE /
+					   sizeof(struct lv_COW_table_disk)];
+	unsigned long max_entries = EVMS_VSECTOR_SIZE /
+				    sizeof(struct lv_COW_table_disk);
+	int i, j;
+
+	/* Check every volume in the group to see if it is a snapshot. Also
+	 * check to make sure it is a new volume in the case of re-discovery.
+	 */
+	for ( i = 1; i <= MAX_LV; i++ ) {
+
+		/* The volume must exist, must be new, and must be a snapshot.
+		 */
+		volume = group->volume_list[i];
+		if ( !volume ||
+		     !(volume->lv_access & EVMS_LV_NEW) ||
+		     !(volume->lv_access & LV_SNAPSHOT)) {
+			continue;
+		}
+
+		/* Set up a temporary EVMS node. */
+		tmp_node.private = volume;
+
+		LOG_DEBUG("Building snapshot map for volume %s\n",
+			  volume->name);
+
+		while (1) {
+			/* Read in one sector's worth of COW tables. */
+			if ( lvm_init_io(&tmp_node, 0,
+					 volume->current_cow_sector,
+					 1, volume->cow_table) ) {
+				goto error;
+			}
+
+			/* Endian-conversion of this COW table
+			 * to a local table.
+			 */
+			for ( j = 0; j < max_entries; j++ ) {
+				cow_table[j].pv_org_number =
+					le64_to_cpu(volume->cow_table[j].pv_org_number);
+				cow_table[j].pv_org_rsector =
+					le64_to_cpu(volume->cow_table[j].pv_org_rsector);
+				cow_table[j].pv_snap_number =
+					le64_to_cpu(volume->cow_table[j].pv_snap_number);
+				cow_table[j].pv_snap_rsector =
+					le64_to_cpu(volume->cow_table[j].pv_snap_rsector);
+			}
+
+			/* Translate every valid COW table entry into
+			 * a snapshot map entry.
+			 */
+			for ( volume->next_cow_entry = 0;
+			      volume->next_cow_entry < max_entries &&
+			      cow_table[volume->next_cow_entry].pv_org_number;
+			      volume->next_cow_entry++ ) {
+				/* org_rsector must be a valid sector number,
+				 * i.e. it can't be within a PVs metadata. This
+				 * is how we detect invalidated snapshots.
+				 */
+				if ( cow_table[volume->next_cow_entry].pv_org_rsector < 10 ||
+				     cow_table[volume->next_cow_entry].pv_org_number > group->pv_count ||
+				     add_cow_entry_to_snapshot_map(&(cow_table[volume->next_cow_entry]), volume) ) {
+					/* This volume either has an invalid COW entry,
+					 * or had an error adding that COW entry to the
+					 * snapshot map. This snapshot is done.
+					 */
+					goto error;
+				}
+				volume->next_free_chunk += volume->chunk_size;
+			}
+
+			/* Move on to the next sector if necessary. */
+			if ( volume->next_cow_entry == max_entries ) {
+				volume->current_cow_sector++;
+			} else {
+				break;
+			}
+		}
+	}
+
+out:
+	return 0;
+error:
+	invalidate_snapshot_volume(volume);
+	deallocate_logical_volume(volume);
+	goto out;
+}
+
+/**
+ * link_snapshot_volumes
+ *
+ * This function examines the list of logical volumes in this group and
+ * sets up the necessary pointers to link snapshots and their originals.
+ * A singly-linked list is created starting with the original volume. Also,
+ * all snapshot volumes point directly back to their original. This
+ * function should not be run until all volumes have been discovered.
+ * In the case of re-discovery, all of these links/lists get rebuilt as if
+ * they were not already there. Currently this should not pose a problem.
+ **/
+static int link_snapshot_volumes(struct lvm_volume_group * group)
+{
+	struct lvm_logical_volume * org_volume, * snap_volume;
+	u32 org_minor, buffer_size = 0;
+	int i, j;
+
+	for ( i = 1; i <= MAX_LV; i++ ) {
+
+		/* Only process snapshot-originals. */
+		org_volume = group->volume_list[i];
+		if ( !org_volume || !(org_volume->lv_access & LV_SNAPSHOT_ORG) ) {
+			continue;
+		}
+
+		/* For snapshot-originals, look for all other volumes that
+		 * claim to be snapshotting it. For each one that is found,
+		 * insert it at the start of the original's list of snapshots.
+		 * Need to start with a NULL snapshot_next, otherwise could
+		 * wind up with circular lists.
+		 */
+		org_minor = org_volume->lv_minor;
+		org_volume->snapshot_next = NULL;
+
+		for ( j = 1; j <= MAX_LV; j++ ) {
+			snap_volume = group->volume_list[j];
+			if ( snap_volume &&
+			     snap_volume->lv_access & LV_SNAPSHOT &&
+			     (snap_volume->snap_org_minor == org_minor) ) {
+				snap_volume->snapshot_org = org_volume;
+				snap_volume->snapshot_next =
+					org_volume->snapshot_next;
+				org_volume->snapshot_next = snap_volume;
+				if ( snap_volume->chunk_size > buffer_size ) {
+					buffer_size = snap_volume->chunk_size;
+				}
+				LOG_DEBUG("Linking snapshot (%s) to original (%s)\n",
+					  snap_volume->name, org_volume->name);
+			}
+		}
+
+		/* If no snapshots were found for a volume that claims to be
+		 * under snapshot, mark the group dirty. If this is final
+		 * discovery, the original will have the snapshot flag turned
+		 * off in check_logical_volumes().
+		 */
+		if (!org_volume->snapshot_next) {
+			LOG_WARNING("No snapshots found for original (%s)\n",
+				    org_volume->name);
+			group->flags |= EVMS_VG_DIRTY;
+		}
+	}
+	return 0;
+}
+
+/**
+ * discover_volumes_in_group
+ **/
+static int discover_volumes_in_group(struct lvm_volume_group * group)
+{
+	struct lv_disk * lv_array = group->lv_array;
+	struct lvm_logical_volume * new_volume;
+	int i;
+
+	/* Search through the LV structs for valid LV entries. */
+	for ( i = 0; i < group->vg->lv_max; i++ ) {
+
+		/* Only discover valid, active volumes. */
+		if ( !lv_array[i].lv_name[0] ||
+		     lv_array[i].lv_number >= MAX_LV ) {
+			continue;
+		}
+
+		/* Make sure this volume isn't already in the list. */
+		if (group->volume_list[lv_array[i].lv_number + 1]) {
+			continue;
+		}
+
+		/* Create a new logical volume and place it in the appropriate
+		 * spot in this VG's volume list.
+		 */
+		new_volume = allocate_logical_volume(&(lv_array[i]), group);
+		if (!new_volume) {
+			/* This volume will be missing, but other
+			 * volumes in this group can still be built.
+			 */
+			LOG_CRITICAL("Error allocating LV %s in Group %s\n",
+				     lv_array[i].lv_name, group->vg_name);
+			continue;
+		}
+
+		group->volume_list[new_volume->lv_number] = new_volume;
+		group->volume_count++;
+		group->flags |= EVMS_VG_DIRTY;
+
+		LOG_DEBUG("Discovered volume %s in group %s.\n",
+			  new_volume->name, group->vg_name);
+	}
+
+	return 0;
+}
+
+/**
+ * discover_logical_volumes
+ *
+ * After all PVs have been claimed and added to the appropriate VG list,
+ * the volumes for each VG must be constructed. For each group, read all
+ * the LV structs off the first PV in the list. Search this list of
+ * structs for valid LVs. For each valid LV, create a new volume and add
+ * it to the group.
+ **/
+static int discover_logical_volumes(int final_discovery)
+{
+	struct lvm_volume_group *group;
+	int rc;
+
+	/* Look for volumes in each valid VG entry. We even need to check ones
+	 * that aren't dirty - We could have deleted an incomplete volume on
+	 * the previous pass, and need to rediscover it in case this is final
+	 * discovery and we now want to export it.
+	 */
+	for ( group = lvm_group_list; group; group = group->next_group ) {
+
+		if ( ! group->vg ||
+		     (! final_discovery &&
+		      ! (group->flags & EVMS_VG_DIRTY)) ) {
+			continue;
+		}
+
+		LOG_DEBUG("Searching for volumes in group %s\n",
+			  group->vg_name);
+
+		/* Read in the LV array from disk if necessary. */
+		rc = read_lv(group);
+		if (rc) {
+			LOG_WARNING("Unable to read LV metadata for group %s\n",
+				    group->vg_name);
+			LOG_WARNING("No regions can be discovered for group %s\n",
+				    group->vg_name);
+			continue;
+		}
+
+		/* Assemble each volume in the group. */
+		discover_volumes_in_group(group);
+
+		/* Build the LE map for each LV discovered in this group. This
+		 * must be done after all LVS in the group are discovered.
+		 */
+		build_le_maps(group);
+		check_le_maps(group);
+
+		/* Set up all of the initial snapshot maps. Only the kernel
+		 * keeps track of the snapshot maps.
+		 */
+		build_snapshot_maps(group);
+
+		/* Set up the pointers to link snapshot volumes
+		 * with their originals.
+		 */
+		link_snapshot_volumes(group);
+	}
+
+	return 0;
+}
+
+/**
+ * export_volumes
+ *
+ * The last thing the plugin must do is take each newly constructed volume
+ * and place it on the evms logical node list. A zero return-code from
+ * this function means nothing new was added to the list, and a positive
+ * return code means that many new items were added to the list.
+ **/
+static int export_volumes(struct evms_logical_node ** evms_node_list,
+			  int final_discover)
+{
+	struct lvm_volume_group * group;
+	struct evms_logical_node * new_node;
+	struct lvm_logical_volume * volume;
+	int i, count = 0;
+
+	LOG_EXTRA("Exporting volumes\n");
+
+	/* For every valid, dirty volume group. */
+	for ( group = lvm_group_list; group; group = group->next_group ) {
+		if ( ! (group->flags & EVMS_VG_DIRTY) ) {
+			continue;
+		}
+
+		/* Export every valid volume in the group. For re-discovery,
+		 * we re-export the same logical node.
+		 */
+		for ( i = 1; i <= MAX_LV; i++ ) {
+			volume = group->volume_list[i];
+			if (!volume) {
+				continue;
+			}
+
+			/* For new volumes, create a new EVMS node and 
+			 * initialize the appropriate fields.
+			 */
+			if ( volume->lv_access & EVMS_LV_NEW ) {
+				if ( evms_cs_allocate_logical_node(&new_node) ) {
+					continue;
+				}
+				MOD_INC_USE_COUNT;
+
+				volume->volume_node = new_node;
+				volume->lv_access &= (~EVMS_LV_QUIESCED &
+						      ~EVMS_LV_NEW);
+				new_node->hardsector_size =
+					group->hard_sect_size;
+				new_node->block_size = group->block_size;
+				new_node->plugin = &lvm_plugin_header;
+				new_node->private = volume;
+				memcpy(new_node->name, volume->name, NAME_LEN);
+
+				/* Snapshot volumes should report the
+				 * size of their original.
+				 */
+				new_node->total_vsectors =
+					(volume->lv_access & LV_SNAPSHOT) ?
+					volume->snapshot_org->lv_size :
+					volume->lv_size;
+
+				/* Is the volume read-only? */
+				if ( ! (volume->lv_access & LV_WRITE) ) {
+					new_node->flags |=
+						EVMS_VOLUME_READ_ONLY;
+					LOG_DEBUG("LVM volume %s is read-only\n",
+						  volume->name);
+				}
+
+				/* Is the volume incomplete? */
+				if ( volume->lv_access & EVMS_LV_INCOMPLETE ) {
+					new_node->flags |=
+						(EVMS_VOLUME_READ_ONLY |
+						 EVMS_VOLUME_PARTIAL);
+					LOG_DEBUG("LVM volume %s is incomplete\n",
+						  volume->name);
+				}
+
+				/* Does the volume group contain any partial or
+				 * removable PVs?
+				 */
+				if ( group->flags & EVMS_VG_PARTIAL_PVS ) {
+					new_node->flags |= EVMS_VOLUME_PARTIAL;
+				}
+				if ( group->flags & EVMS_VG_REMOVABLE_PVS ) {
+					new_node->flags |=
+						EVMS_DEVICE_REMOVABLE;
+				}
+			}
+
+			/* Export the node, only if it hasn't been exported
+			 * during this full EVMS discover.
+			 */
+			if ( ! (volume->lv_access & EVMS_LV_EXPORTED) ) {
+				if ( ! evms_cs_add_logical_node_to_list(evms_node_list,
+									volume->volume_node) ) {
+					LOG_DETAILS("Exporting LVM volume %s\n",
+						    volume->name);
+					volume->lv_access |= EVMS_LV_EXPORTED;
+					count++;
+				}
+			}
+
+			if (final_discover) {
+				volume->lv_access &= ~EVMS_LV_EXPORTED;
+			}
+		}
+
+		/* The group is clean now. */
+		group->flags &= ~EVMS_VG_DIRTY;
+	}
+
+	return count;
+}
+
+/**
+ * lvm_cleanup
+ *
+ * This function runs through the entire lvm data structure, removing
+ * all items that are not needed at runtime. Currently, this is just the
+ * struct vg_disk structure and the struct pv_disk structure for each PV.
+ * Also, any groups that don't contain any volumes are deleted. All of the
+ * other volume_group, logical_volume and evms_logical_node structures will
+ * be kept around at run-time.
+ **/
+static int lvm_cleanup(void)
+{
+	struct lvm_volume_group * group, * next_group;
+	struct lvm_physical_volume * pv_entry;
+
+	for ( group = lvm_group_list; group; group = next_group ) {
+		next_group = group->next_group;
+
+		/* Delete groups with no volumes. */
+		if (!group->volume_count) {
+			LOG_WARNING("Group %s contains no logical volumes. Deleting.\n",
+				    group->vg_name);
+			remove_group_from_list(group);
+			deallocate_volume_group(group);
+			/* Need to go back to the start of the list,
+			 * just to be safe. :)
+			 */
+			next_group = lvm_group_list;
+			continue;
+		}
+
+		/* Delete data structures that aren't used at runtime. */
+		if (group->vg) {
+			kfree(group->vg);
+			group->vg = NULL;
+		}
+
+		for ( pv_entry = group->pv_list;
+		      pv_entry; pv_entry = pv_entry->next) {
+			if (pv_entry->pv) {
+				kfree(pv_entry->pv);
+				pv_entry->pv = NULL;
+			}
+			if (pv_entry->pe_map) {
+				vfree(pv_entry->pe_map);
+				pv_entry->pe_map = NULL;
+			}
+		}
+		if (group->lv_array) {
+			vfree(group->lv_array);
+			group->lv_array = NULL;
+		}
+		if (group->uuid_list) {
+			vfree(group->uuid_list);
+			group->uuid_list = NULL;
+		}
+	}
+	return 0;
+}
+
+/**
+ * lvm_get_bmap
+ *
+ * Support for the BMAP ioctl used by LILO to translate filesystem blocks
+ * to disk blocks to map kernel images for boot time.
+ **/
+static int lvm_get_bmap(struct evms_logical_node * node,
+			struct evms_get_bmap_pkt * bmap,
+			struct evms_logical_node ** pv_node)
+{
+	struct lvm_logical_volume * volume = node->private;
+	struct lvm_physical_volume * pv_entry;
+	u64 pe_start_sector, new_sector = 0, new_size = 0;
+	int rc = 0;
+
+	/* No kernel images allowed on snapshot LVs. */
+	if ( volume->lv_access & LV_SNAPSHOT ) {
+		return -EINVAL;
+	}
+
+	/* Range check. */
+	if ( bmap->rsector >= volume->lv_size ) {
+		return -EINVAL;
+	}
+
+	rc = remap_sector(node, bmap->rsector, 1, &new_sector,
+			  &new_size, &pe_start_sector, &pv_entry);
+
+	if (rc || !pv_entry || !new_sector) {
+		return -EINVAL;
+	}
+
+	bmap->rsector = new_sector;
+	*pv_node = pv_entry->logical_node;
+
+	return 0;
+}
+
+/**
+ * lvm_global_proc_read
+ *
+ * A callback function for the lvm-global proc-fs entry. This will print
+ * general info about all LVM VGs, PVs, and LVs.
+ **/
+static int lvm_global_proc_read(char * page, char ** start, off_t off,
+				int count, int * eof, void * data)
+{
+	struct lvm_volume_group * group;
+	struct lvm_physical_volume * pv_entry;
+	struct lvm_logical_volume * volume, * snap;
+	int vgs = 0, lvs = 0, pvs = 0;
+	int i, sz = 0;
+
+	PROCPRINT("Enterprise Volume Management System: LVM Plugin\n");
+	PROCPRINT("Plugin ID: %x.%x.%x\n",
+		  GetPluginOEM(lvm_plugin_header.id),
+		  GetPluginType(lvm_plugin_header.id),
+		  GetPluginID(lvm_plugin_header.id));
+	PROCPRINT("Plugin Version: %d.%d.%d\n",
+		  lvm_plugin_header.version.major,
+		  lvm_plugin_header.version.minor,
+		  lvm_plugin_header.version.patchlevel);
+	PROCPRINT("Required EVMS Services Version: %d.%d.%d\n",
+		  lvm_plugin_header.required_services_version.major,
+		  lvm_plugin_header.required_services_version.minor,
+		  lvm_plugin_header.required_services_version.patchlevel);
+
+	/* Count all existing items. */
+	for ( group = lvm_group_list; group; group = group->next_group ) {
+		lvs += group->volume_count;
+		pvs += group->pv_count;
+		vgs++;
+	}
+
+	PROCPRINT("\n");
+	PROCPRINT("Total: %d VGs  %d PVs  %d LVs\n", vgs, pvs, lvs);
+
+	/* Print out specifics about each VG. */
+	for ( group = lvm_group_list; group; group = group->next_group ) {
+		PROCPRINT("\n");
+		PROCPRINT("VG:  %s  [%d PV, %d LV]\n",
+			  group->vg_name, group->pv_count, group->volume_count);
+		PROCPRINT("PVs:\n");
+		for ( pv_entry = group->pv_list;
+		      pv_entry; pv_entry = pv_entry->next ) {
+			if (pv_entry->logical_node) {
+				PROCPRINT("\t%s\t%10Ld KB\n",
+					  pv_entry->logical_node->name,
+					  (long long)pv_entry->logical_node->total_vsectors / 2);
+			}
+		}
+		PROCPRINT("LVs:\n");
+		for ( i = 1; i <= MAX_LV; i++ ) {
+			if (group->volume_list[i]) {
+				volume = group->volume_list[i];
+				PROCPRINT("\t%s\t%10Ld KB / %5d LEs",
+					  volume->name,
+					  (long long)volume->lv_size / 2,
+					  volume->num_le);
+				if ( volume->lv_access & LV_SNAPSHOT ) {
+					PROCPRINT("\tSnapshot of : ");
+					if (volume->snapshot_org) {
+						PROCPRINT("%s : ",
+							  volume->snapshot_org->name);
+					} else {
+						PROCPRINT("(unknown) : ");
+					}
+					PROCPRINT("%ld%% full : ",
+						  (long)(volume->next_free_chunk) *
+						  100 / (long)(volume->lv_size));
+					if ( volume->lv_status & LV_ACTIVE ) {
+						PROCPRINT("active");
+					} else {
+						PROCPRINT("disabled");
+					}
+				} else if ( volume->lv_access & LV_SNAPSHOT_ORG ) {
+					PROCPRINT("\tSnapshotted by : ");
+					for ( snap = volume->snapshot_next;
+					      snap;
+					      snap = snap->snapshot_next ) {
+						PROCPRINT("%s  ", snap->name);
+					}
+				}
+				PROCPRINT("\n");
+			}
+		}
+	}
+
+out:
+	*start = page + off;
+	sz -= off;
+	if (sz < 0)
+		sz = 0;
+	return sz > count ? count : sz;
+}
+
+
+/********** Required EVMS Plugin Functions **********/
+
+
+/**
+ * lvm_discover
+ *
+ * This is the entry point into the LVM discovery process. It is a three
+ * phase process. First, the list of nodes are examined for PVs, and the
+ * appropriate volume groups are created. Then each volume group is
+ * examined to find all available logical volumes. Finally, each LVM
+ * logical volume has a new EVMS node created for it, and added to the
+ * list of nodes.
+ **/
+static int lvm_discover(struct evms_logical_node ** evms_node_list)
+{
+	int rc;
+
+	MOD_INC_USE_COUNT;
+	LOG_EXTRA("Beginning discovery.\n");
+
+	discover_volume_groups(evms_node_list);
+
+	check_volume_groups();
+
+	discover_logical_volumes(FALSE);
+
+	check_logical_volumes(FALSE);
+
+	rc = export_volumes(evms_node_list, FALSE);
+
+	LOG_EXTRA("Discovery complete.\n");
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/**
+ * lvm_discover_end
+ *
+ * The discovery process at the region-manager level is now iterative,
+ * much like the EVMS feature level. This allows the ability to stack
+ * LVM on top of MD, or vice-versa. To accomplish this correctly, and
+ * also to accomplish partial volume discovery, a second discover
+ * entry point is needed, so EVMS can tell the region managers that
+ * discovery is over, and to finish up any discovery that is not yet
+ * complete. When this function is called, it should be assumed that
+ * the node list has had nothing new added to it since the last call
+ * of the regular discover function. Therefore, when this function is
+ * called, we do not need to try to discovery any additional volume
+ * groups. We will, however, look for logical volumes once more. This
+ * gives us the ability to export (read-only) volumes that have
+ * partially corrupted LE maps due to missing PVs in their VG.
+ **/
+static int lvm_discover_end(struct evms_logical_node ** evms_node_list)
+{
+	int rc;
+
+	MOD_INC_USE_COUNT;
+	LOG_EXTRA("Beginning final discovery\n");
+
+	discover_volume_groups(evms_node_list);
+
+	check_volume_groups();
+
+	discover_logical_volumes(TRUE);
+
+	check_logical_volumes(TRUE);
+
+	rc = export_volumes(evms_node_list, TRUE);
+
+	lvm_cleanup();
+
+	LOG_EXTRA("Final discovery complete.\n");
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/**
+ * lvm_delete_node
+ *
+ * This function deletes the in-memory representation of an LVM logical volume.
+ **/
+static int lvm_delete_node(struct evms_logical_node * logical_node)
+{
+	struct lvm_logical_volume * volume = logical_node->private;
+	struct lvm_volume_group * group = volume->group;
+
+	LOG_DEBUG("Deleting LVM node %s\n", logical_node->name);
+
+	if ( deallocate_logical_volume(volume) ) {
+		return -EINVAL;
+	}
+
+	/* If we just removed the last volume from this group, the entire group
+	 * must also be deleted.
+	 */
+	if ( group && group->volume_count == 0 ) {
+		remove_group_from_list(group);
+		deallocate_volume_group(group);
+	}
+
+	/* Free the logical node. */
+	evms_cs_deallocate_logical_node(logical_node);
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+/**
+ * lvm_read
+ **/
+static void lvm_read(struct evms_logical_node * node,
+		     struct buffer_head * bh)
+{
+	struct lvm_logical_volume * volume = node->private;
+	struct lvm_physical_volume * pv_entry;
+	u64 size = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	u64 new_sector, new_size, pe_start_sector;
+
+	/* If this volume is a snapshot, lock the volume, and do
+	 * the LE-PE translation on its original volume.
+	 */
+	if ( volume->lv_access & LV_SNAPSHOT ) {
+		down(&volume->snap_semaphore);
+		if (!volume->snapshot_org) {
+			goto out_error;
+		}
+		node = volume->snapshot_org->volume_node;
+	}
+
+	/* Make sure the volume is active and readable. */
+	if ( !(volume->lv_access & LV_READ &&
+	       volume->lv_status & LV_ACTIVE) ) {
+		goto out_error;
+	}
+
+	/* Check if I/O goes past end of logical volume. Must use the
+	 * node, not the volume, so snapshots will work correctly.
+	 */
+	if ( bh->b_rsector + size > node->total_vsectors ) {
+		goto out_error;
+	}
+
+	/* Logical-to-Physical remapping. Check for incomplete volumes.
+	 * Check intermediate boundary conditions as well.
+	 */
+	if ( remap_sector(node, bh->b_rsector, size, &new_sector,
+			  &new_size, &pe_start_sector, &pv_entry) ||
+	     !pe_start_sector || !pv_entry ||
+	     size != new_size ) {
+		goto out_error;
+	}
+
+	/* For snapshot volumes, check if this sector's chunk has been
+	 * remapped. If it has, new_sector and pv_entry will be changed
+	 * accordingly. If not, they remain the same.
+	 */
+	if ( volume->lv_access & LV_SNAPSHOT ) {
+		snapshot_remap_sector(volume, pe_start_sector,
+				      &new_sector, &pv_entry);
+	}
+
+	bh->b_rsector = new_sector;
+	R_IO(pv_entry->logical_node, bh);
+
+out:
+	/* Unlock the snapshot. */
+	if ( volume->lv_access & LV_SNAPSHOT ) {
+		up(&volume->snap_semaphore);
+	}
+	return;
+
+out_error:
+	bh->b_end_io(bh, 0);
+	goto out;
+}
+
+/**
+ * lvm_write
+ **/
+static void lvm_write(struct evms_logical_node * node,
+		      struct buffer_head * bh)
+{
+	struct lvm_logical_volume * volume = node->private;
+	struct lvm_logical_volume * snap_volume;
+	struct lvm_physical_volume * pv_entry;
+	u64 size = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	u64 new_sector, new_size, pe_start_sector;
+
+	/* Make sure the volume is active and writable. */
+	if ( !(volume->lv_access & LV_WRITE &&
+	       volume->lv_status & LV_ACTIVE) ) {
+		goto out_error;
+	}
+
+	/* Check if I/O goes past end of logical volume. */
+	if ( bh->b_rsector + size > node->total_vsectors ) {
+		goto out_error;
+	}
+
+	/* Logical-to-Physical remapping. Check for incomplete volumes.
+	 * Check intermediate boundary conditions as well.
+	 */
+	if ( remap_sector(node, bh->b_rsector, size, &new_sector,
+			  &new_size, &pe_start_sector, &pv_entry) ||
+	     !pe_start_sector || !pv_entry ||
+	     size != new_size ) {
+		goto out_error;
+	}
+
+	/* Copy-on-write for snapshotting. */
+	if ( volume->lv_access & LV_SNAPSHOT_ORG ) {
+		/* Originals can be snapshotted multiple times. */
+		for ( snap_volume = volume->snapshot_next;
+		      snap_volume; snap_volume = snap_volume->snapshot_next ) {
+			if ( snapshot_copy_data(volume, snap_volume,
+						pe_start_sector, new_sector,
+						pv_entry) ) {
+				goto out_error;
+			}
+		}
+	}
+
+	bh->b_rsector = new_sector;
+	W_IO(pv_entry->logical_node, bh);
+out:
+	return;
+out_error:
+	bh->b_end_io(bh, 0);
+	goto out;
+}
+
+/**
+ * lvm_init_io
+ *
+ * Init_io on a snapshot volume treats it like a regular volume.
+ **/
+static int lvm_init_io(struct evms_logical_node * node,
+		       int io_flag,
+		       u64 sect_nr,
+		       u64 num_sects,
+		       void * buf_addr)
+{
+	struct lvm_logical_volume * volume = node->private;
+	struct lvm_physical_volume * pv_entry;
+	u64 pe_start_sector, new_sector, new_size;
+	int rc = 0;
+
+	/* Only allow internal writes to snapshots (io_flag==4). Disallow
+	 * writes to snapshot originals.
+	 */
+	if ( io_flag == WRITE &&
+	     volume->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG) ) {
+		return -EINVAL;
+	}
+
+	/* The node for a snapshot reports the size of the original. If a
+	 * request comes in in that range, just return.
+	 */
+	else if ( volume->lv_access & LV_SNAPSHOT &&
+		  sect_nr >= volume->lv_size &&
+		  sect_nr < node->total_vsectors ) {
+		if ( io_flag == READ ) {
+			memset(buf_addr, 0,
+			       num_sects << EVMS_VSECTOR_SIZE_SHIFT);
+		}
+		return 0;
+	}
+
+	/* Regular range check. */
+	else if ( sect_nr + num_sects > volume->lv_size ) {
+		return -EINVAL;
+	}
+
+	if ( io_flag == 4 ) {
+		io_flag = WRITE;
+	}
+
+	/* Init IO needs to deal with the possibility of a request that spans
+	 * PEs or stripes. This is possible because there is no limit on
+	 * num_sects. To handle this, we loop through remap_sector and
+	 * INIT_IO until num_sects reaches zero.
+	 */
+	while (num_sects) {
+		if ( remap_sector(node, sect_nr, num_sects, &new_sector,
+				  &new_size, &pe_start_sector, &pv_entry) ) {
+			return -EIO;
+		}
+
+		/* If the volume is incomplete, clear the buffer (on a read). */
+		if (!pe_start_sector || !pv_entry) {
+			if ( io_flag == READ ) {
+				memset(buf_addr, 0,
+				       new_size << EVMS_VSECTOR_SIZE_SHIFT);
+			}
+		} else {
+			rc = INIT_IO(pv_entry->logical_node, io_flag,
+				     new_sector, new_size, buf_addr);
+		}
+		num_sects -= new_size;
+		sect_nr += new_size;
+		buf_addr = (void *)(((unsigned long) buf_addr) +
+			            (unsigned long)(new_size << EVMS_VSECTOR_SIZE_SHIFT));
+	}
+
+	return rc;
+}
+
+/**
+ * lvm_ioctl
+ **/
+static int lvm_ioctl(struct evms_logical_node * logical_node,
+		     struct inode * inode,
+		     struct file * file,
+		     unsigned int cmd,
+		     unsigned long arg)
+{
+	struct lvm_logical_volume * volume = logical_node->private;
+	int rc = 0;
+
+	LOG_ENTRY_EXIT("Ioctl %d\n", cmd);
+
+	switch (cmd) {
+
+	case HDIO_GETGEO:
+		{
+			/* Fixed geometry for all LVM volumes. */
+			unsigned char heads = 64;
+			unsigned char sectors = 32;
+			short cylinders;
+			long start = 0;
+			struct hd_geometry * hd = (struct hd_geometry *)arg;
+			cylinders = logical_node->total_vsectors;
+			cylinders = (cylinders / heads) / sectors;
+
+			if (!hd) {
+				return -EINVAL;
+			}
+
+			if ( copy_to_user((char *)(&hd->heads),
+					  &heads, sizeof(heads)) ||
+			     copy_to_user((char *)(&hd->sectors),
+					  &sectors, sizeof(sectors)) ||
+			     copy_to_user((short *)(&hd->cylinders),
+					  &cylinders, sizeof(cylinders)) ||
+			     copy_to_user((long *)(&hd->start),
+					  &start, sizeof(start)) ) {
+				return -EFAULT;
+			}
+		}
+		break;
+
+	case EVMS_QUIESCE_VOLUME:
+		{
+			struct evms_quiesce_vol_pkt * tmp =
+				(struct evms_quiesce_vol_pkt *)arg;
+			if (tmp->command) {
+				volume->lv_access |= EVMS_LV_QUIESCED;
+			} else {
+				volume->lv_access &= ~EVMS_LV_QUIESCED;
+			}
+		}
+		break;
+
+	case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt * bmap =
+			    (struct evms_get_bmap_pkt *)arg;
+			struct evms_logical_node * pv_node;
+
+			rc = lvm_get_bmap(logical_node, bmap, &pv_node);
+			if (!rc) {
+				rc = IOCTL(pv_node, inode, file, cmd,
+					   (unsigned long) bmap);
+			}
+		}
+		break;
+
+	case EVMS_GET_DISK_LIST:
+	case EVMS_CHECK_MEDIA_CHANGE:
+	case EVMS_REVALIDATE_DISK:
+	case EVMS_OPEN_VOLUME:
+	case EVMS_CLOSE_VOLUME:
+	case EVMS_CHECK_DEVICE_STATUS:
+		{
+			/* These five ioctl all need to
+			 * be broadcast to all PVs.
+			 */
+			struct lvm_volume_group * group = volume->group;
+			struct lvm_physical_volume * pv_entry;
+			for ( pv_entry = group->pv_list;
+			      pv_entry; pv_entry = pv_entry->next ) {
+				rc |= IOCTL(pv_entry->logical_node, inode,
+					    file, cmd, arg);
+			}
+		}
+		break;
+
+	default:
+		/* Currently LVM does not send any ioctl's down to the
+		 * PVs. Which PV would they go to? What would we do with
+		 * the return codes?
+		 */
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+/**
+ * lvm_direct_ioctl
+ *
+ * This function provides a method for user-space to communicate directly
+ * with a plugin in the kernel.
+ **/
+static int lvm_direct_ioctl(struct inode * inode,
+			    struct file * file,
+			    unsigned int cmd,
+			    unsigned long args)
+{
+	struct evms_plugin_ioctl_pkt pkt, * user_pkt;
+	struct lvm_pv_remove_ioctl pv_remove, * user_pv_remove;
+	struct lvm_snapshot_stat_ioctl snap_stats, * user_snap_stats;
+	int rc = 0;
+
+	MOD_INC_USE_COUNT;
+
+	user_pkt = (struct evms_plugin_ioctl_pkt *)args;
+
+	/* Copy user's parameters to kernel space. */
+	if ( copy_from_user(&pkt, user_pkt, sizeof(pkt)) ) {
+		MOD_DEC_USE_COUNT;
+		return -EFAULT;
+	}
+
+	/* Make sure this is supposed to be our ioctl. */
+	if ( pkt.feature_id != lvm_plugin_header.id ) {
+		MOD_DEC_USE_COUNT;
+		return -EINVAL;
+	}
+
+	switch (pkt.feature_command) {
+
+	case EVMS_LVM_PV_REMOVE_IOCTL:
+		user_pv_remove =
+			(struct lvm_pv_remove_ioctl *)pkt.feature_ioctl_data;
+		if ( copy_from_user(&pv_remove, user_pv_remove,
+				    sizeof(pv_remove)) ) {
+			rc = -EINVAL;
+			break;
+		}
+		rc = remove_pv_from_group(pv_remove.pv_number,
+					  pv_remove.vg_uuid);
+		break;
+
+	case EVMS_LVM_SNAPSHOT_STAT_IOCTL:
+		user_snap_stats =
+			(struct lvm_snapshot_stat_ioctl *)pkt.feature_ioctl_data;
+		if ( copy_from_user(&snap_stats, user_snap_stats,
+				    sizeof(snap_stats)) ) {
+			rc = -EINVAL;
+			break;
+		}
+		rc = get_snapshot_stats(&snap_stats);
+		if ( copy_to_user(user_snap_stats, &snap_stats,
+				  sizeof(snap_stats)) ) {
+			rc = -EINVAL;
+			break;
+		}
+		break;
+
+	default:
+		rc = -EINVAL;
+		break;
+	}
+
+	pkt.status = rc;
+	copy_to_user(user_pkt, &pkt, sizeof(pkt));
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+/**
+ * lvm_vge_init
+ **/
+int __init lvm_vge_init(void)
+{
+	struct proc_dir_entry *pde;
+
+	lvm_group_list = NULL;
+	lvm_proc = NULL;
+
+	/* Register the global proc-fs entries. */
+	pde = evms_cs_get_evms_proc_dir();
+	if (pde) {
+		lvm_proc = create_proc_entry(LVM_PROC_NAME, S_IFDIR, pde);
+		if (lvm_proc) {
+			create_proc_read_entry(LVM_PROC_GLOBAL_NAME, S_IFREG,
+					       lvm_proc, lvm_global_proc_read,
+					       NULL);
+		}
+	}
+
+	/* Register this plugin with EVMS. */
+	return evms_cs_register_plugin(&lvm_plugin_header);
+}
+
+/**
+ * lvm_vge_exit
+ **/
+void __exit lvm_vge_exit(void)
+{
+	struct lvm_volume_group * group, * next_group;
+	struct proc_dir_entry * pde;
+	int i;
+
+	/* If LVM is called for module_exit, that means the reference
+	 * count must be zero, which means there should be no volumes,
+	 * and thus no volume groups. But, check anyway and delete
+	 * any volumes and groups that are still hanging around.
+	 */
+	if (lvm_group_list) {
+		LOG_SERIOUS("Called for module_exit, but group list is not empty!\n");
+	}
+
+	for ( group = lvm_group_list; group; group = next_group ) {
+		next_group = group->next_group;
+
+		LOG_SERIOUS("In module_exit: deleting all volumes from group %s.\n",
+			    group->vg_name);
+
+		for ( i = 1; i <= MAX_LV; i++ ) {
+			if (group->volume_list[i]) {
+				lvm_delete_node(group->volume_list[i]->volume_node);
+			}
+		}
+	}
+
+	/* Unregister the proc-fs entries. */
+	pde = evms_cs_get_evms_proc_dir();
+	if (pde) {
+		remove_proc_entry(LVM_PROC_GLOBAL_NAME, lvm_proc);
+		remove_proc_entry(LVM_PROC_NAME, pde);
+	}
+
+	/* Unregister this plugin from EVMS. */
+	evms_cs_unregister_plugin(&lvm_plugin_header);
+}
+
+module_init(lvm_vge_init);
+module_exit(lvm_vge_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
+
diff -Naur linux-2002-09-30/drivers/evms/md_core.c evms-2002-09-30/drivers/evms/md_core.c
--- linux-2002-09-30/drivers/evms/md_core.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/md_core.c	Sun Sep 29 23:25:48 2002
@@ -0,0 +1,3633 @@
+/*
+ *   Copyright (c) International Business Machines  Corp., 2000
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program;  if not, write to the Free Software
+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * linux/drivers/evms/md_core.c
+ *
+ * EVMS Linux MD Region Manager
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/genhd.h>
+#include <linux/string.h>
+#include <linux/blk.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/evms/evms.h>
+#include <linux/evms/evms_md.h>
+#include <linux/sysctl.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+#define LOG_PREFIX "md core: "
+
+/*
+ * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
+ * is 100 KB/sec, so the extra system load does not show up that much.
+ * Increase it if you want to have more _guaranteed_ speed. Note that
+ * the RAID driver will use the maximum available bandwith if the IO
+ * subsystem is idle. There is also an 'absolute maximum' reconstruction
+ * speed limit - in case reconstruction slows down your system despite
+ * idle IO detection.
+ *
+ * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
+ */
+
+static MD_LIST_HEAD(all_raid_disks);
+static MD_LIST_HEAD(pending_raid_disks);
+
+static int sysctl_speed_limit_min = 100;
+static int sysctl_speed_limit_max = 100000;
+
+
+static mdk_personality_t *pers[MAX_PERSONALITY];
+
+static int md_blocksizes[MAX_MD_DEVS];
+static int md_hardsect_sizes[MAX_MD_DEVS];
+int evms_md_size[MAX_MD_DEVS];
+static struct evms_thread *evms_md_recovery_thread = NULL;
+
+/*
+ * Enables to iterate over all existing md arrays
+ */
+static LIST_HEAD(all_mddevs);
+static LIST_HEAD(incomplete_mddevs);
+static LIST_HEAD(running_mddevs);
+
+/*
+ * The mapping between kdev and mddev is not necessary a simple
+ * one! Eg. HSM uses several sub-devices to implement Logical
+ * Volumes. All these sub-devices map to the same mddev.
+ */
+struct dev_mapping evms_mddev_map[MAX_MD_DEVS];
+
+
+/* Support functions for discovery */
+static mdk_rdev_t * evms_md_find_rdev_all (struct evms_logical_node *node);
+static mddev_t * evms_md_find_mddev_all (struct evms_logical_node *node);
+static int evms_md_import_device (struct evms_logical_node **discover_list,
+				  struct evms_logical_node *node);
+static void evms_md_autostart_arrays(struct evms_logical_node **discover_list);
+static void evms_md_run_devices (struct evms_logical_node **discover_list);
+static int evms_md_run_array (struct evms_logical_node ** discover_list,
+			      mddev_t *mddev);
+static void evms_md_run_incomplete_array (struct evms_logical_node ** discover_list,
+					 mddev_t *mddev);
+static int evms_md_create_logical_node(struct evms_logical_node **discover_list,
+				       mddev_t *mddev, uint flags);
+static int evms_md_read_disk_sb (mdk_rdev_t * rdev);
+static int evms_md_analyze_sbs (mddev_t * mddev);
+static mddev_t * alloc_mddev (kdev_t dev);
+static void free_mddev(mddev_t * mddev);
+static void evms_md_create_recovery_thread(void);
+static void evms_md_destroy_recovery_thread(void);
+static int do_md_run (mddev_t * mddev);
+static int do_md_stop (mddev_t * mddev, int ro);
+
+static void evms_md_export_rdev (mdk_rdev_t * rdev, int delete_node);
+static void kick_rdev_from_array (mdk_rdev_t * rdev);
+static mdp_disk_t *evms_md_find_disk(mddev_t *mddev, kdev_t dev);
+static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb);
+
+/* Plugin API prototypes */
+static int md_discover( struct evms_logical_node ** discover_list );
+static int md_end_discover( struct evms_logical_node ** discover_list );
+static int md_delete( struct evms_logical_node * node);
+static void md_read(	struct evms_logical_node	* node,
+			struct buffer_head	* bh);
+static void md_write(	struct evms_logical_node	* node,
+			struct buffer_head	* bh);
+static int md_sync_io(	struct evms_logical_node *node,
+			int rw,
+			u64 sect_nr,
+			u64 num_sects,
+			void *data);
+static int md_ioctl(	struct evms_logical_node *node,
+			struct inode *inode,
+			struct file *file,
+			unsigned int cmd,
+			unsigned long arg);
+static int md_ioctl_cmd_broadcast(
+	struct evms_logical_node *node,
+	struct inode *inode,
+	struct file *file,
+	unsigned long cmd,
+	unsigned long arg);
+			
+static int md_direct_ioctl(
+	struct inode *inode,
+	struct file *file,
+	unsigned int cmd,
+	unsigned long arg);
+
+/* global MD data structures */
+static struct evms_plugin_fops md_fops = {
+	.discover	= md_discover,
+	.end_discover	= md_end_discover,
+	.delete		= md_delete,
+	.read		= md_read,
+	.write		= md_write,
+	.init_io	= md_sync_io,
+	.ioctl		= md_ioctl,
+	.direct_ioctl	= md_direct_ioctl
+};
+
+static struct evms_plugin_header md_plugin_header = {
+	.id = SetPluginID(IBM_OEM_ID,
+			  EVMS_REGION_MANAGER,
+			  EVMS_MD_ID),
+	.version = {
+		.major		= EVMS_MD_MAJOR_VERSION,
+		.minor		= EVMS_MD_MINOR_VERSION,
+		.patchlevel	= EVMS_MD_PATCHLEVEL_VERSION
+	},
+	.required_services_version = {
+		.major		= EVMS_MD_COMMON_SERVICES_MAJOR,
+		.minor		= EVMS_MD_COMMON_SERVICES_MINOR,
+		.patchlevel	= EVMS_MD_COMMON_SERVICES_PATCHLEVEL
+	},
+	.fops = &md_fops
+};
+
+/* global variables */
+static int exported_nodes;      /* total # of exported devices
+                                 * produced during this discovery.
+                                 */
+static struct evms_logical_node **cur_discover_list = NULL;
+
+/**********************************************************/
+/* SYSCTL - EVMS/RAID folder				  */
+/**********************************************************/
+
+#ifdef CONFIG_PROC_FS
+static struct ctl_table_header *md_table_header;
+
+static ctl_table md_table[] = {
+	{DEV_EVMS_MD_SPEED_LIMIT_MIN, "speed_limit_min",
+	 &sysctl_speed_limit_min, sizeof(int), 0644, NULL, &proc_dointvec},
+	{DEV_EVMS_MD_SPEED_LIMIT_MAX, "speed_limit_max",
+	 &sysctl_speed_limit_max, sizeof(int), 0644, NULL, &proc_dointvec},
+	{0}
+};
+
+static ctl_table md_dir_table[] = {
+	{DEV_EVMS_MD, "md", NULL, 0, 0555, md_table},
+	{0}
+};
+
+static ctl_table evms_dir_table[] = {
+	{DEV_EVMS, "evms", NULL, 0, 0555, md_dir_table},
+	{0}
+};
+
+static ctl_table dev_dir_table[] = {
+	{CTL_DEV, "dev", NULL, 0, 0555, evms_dir_table},
+	{0}
+};
+#endif  
+/********** Required EVMS Plugin Functions **********/
+
+/*
+ * Function: md_discover
+ *	We should only export complete MD device nodes
+ */
+static int md_discover( struct evms_logical_node ** discover_list )
+{
+	MOD_INC_USE_COUNT;
+        LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__);
+
+        /* initialize global variable */
+        exported_nodes = 0;
+	cur_discover_list = discover_list;
+	evms_md_autostart_arrays(discover_list);
+
+	LOG_ENTRY_EXIT("%s: EXIT (exported nodes: %d)\n", __FUNCTION__,exported_nodes);
+	cur_discover_list = NULL;
+	MOD_DEC_USE_COUNT;
+        return(exported_nodes);
+}
+
+static mddev_t * evms_md_find_incomplete_array(int level)
+{
+	mddev_t *mddev;
+	struct list_head *tmp,*tmp2;
+	mdk_rdev_t *rdev;
+
+	ITERATE_INCOMPLETE_MDDEV(mddev,tmp) {
+		ITERATE_RDEV(mddev, rdev, tmp2) {
+			if (rdev->sb && rdev->sb->level == level)
+				return mddev;
+		}
+	}
+	return NULL;
+}
+
+/*
+ * Function: md_end_discover
+ */
+static int md_end_discover( struct evms_logical_node ** discover_list )
+{
+	int rc = 0;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+	mddev_t *mddev;
+	struct evms_logical_node *node;
+	int done = FALSE;
+
+	MOD_INC_USE_COUNT;
+        LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__);
+	rc = md_discover(discover_list);
+
+	do {
+		done = TRUE;
+		if ( (mddev = evms_md_find_incomplete_array(5)) != NULL) {
+			evms_md_run_incomplete_array(discover_list, mddev);
+			done = FALSE;
+			continue;
+		}
+		if ( (mddev = evms_md_find_incomplete_array(1)) != NULL) {
+			evms_md_run_incomplete_array(discover_list, mddev);
+			done = FALSE;
+			continue;
+		}
+		if ( (mddev = evms_md_find_incomplete_array(0)) != NULL) {
+			evms_md_run_incomplete_array(discover_list, mddev);
+			done = FALSE;
+			continue;
+		}
+		if ( (mddev = evms_md_find_incomplete_array(-1)) != NULL) {
+			evms_md_run_incomplete_array(discover_list, mddev);
+			done = FALSE;
+			continue;
+		}
+
+	} while (!done);
+
+
+	/*
+	 * At this point, delete all mddevs which did not start.
+	 */
+	ITERATE_MDDEV(mddev,tmp) {
+		if (mddev->pers == NULL) {
+			LOG_WARNING("%s: deleting md%d\n", __FUNCTION__, mdidx(mddev));
+			free_mddev(mddev);
+		}
+	}
+
+
+	/*
+	 * At this point, delete all rdevs which do not belong to any of discovered MD arrays.
+	 */
+	ITERATE_RDEV_ALL(rdev, tmp) {
+		if (!rdev->mddev) {
+			node = rdev->node;
+			if (node) {
+				if (node->plugin->id == md_plugin_header.id)
+					evms_md_export_rdev(rdev, FALSE);
+				else
+					evms_md_export_rdev(rdev, TRUE);
+			}
+		}
+	}
+
+        LOG_ENTRY_EXIT("%s: EXIT\n", __FUNCTION__);
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+
+/*
+ * Function: md_delete_node
+ */
+static int md_delete( struct evms_logical_node * node)
+{
+	struct evms_md *evms_md;
+	mddev_t *mddev;
+
+	evms_md = node->private;
+	mddev = evms_md->mddev;
+	LOG_DEFAULT("md_delete() [%s]\n", evms_md_partition_name(node));
+ 
+	if (mddev)
+		do_md_stop(mddev,0);
+	if (evms_md) {
+		if (evms_md->instance_plugin_hdr.fops)
+			kfree(evms_md->instance_plugin_hdr.fops);
+		kfree(evms_md);
+	}
+
+	evms_cs_deallocate_logical_node(node);
+	return 0;
+}
+
+
+/*
+ * Function: md_read
+ */
+static void md_read(	struct evms_logical_node	* node,
+			struct buffer_head	* bh)
+{
+	struct evms_md *evms_md;
+	mddev_t *mddev;
+
+	evms_md = node->private;
+	mddev = evms_md->mddev;
+	if (evms_md_check_boundary(node, bh)) return;
+	if (mddev && mddev->pers)
+		mddev->pers->read(node, bh);
+}
+
+
+/*
+ * Function: md_write
+ */
+static void md_write(	struct evms_logical_node	* node,
+			struct buffer_head	* bh)
+{
+	struct evms_md *evms_md;
+	mddev_t *mddev;
+
+	evms_md = node->private;
+	mddev = evms_md->mddev;
+	if (evms_md_check_boundary(node, bh)) return;
+	if (mddev->ro) {
+		LOG_ERROR("%s: read-only is set for [%s]\n", __FUNCTION__, node->name);
+		bh->b_end_io(bh, 0);
+		return;
+	}
+	if (mddev && mddev->pers)
+		mddev->pers->write(node, bh);
+}
+
+/*
+ * Function: md_sync_io
+ */
+static int md_sync_io(
+	struct evms_logical_node *node,
+	int rw,
+	u64 sect_nr,
+	u64 num_sects,
+	void *buf_addr)
+{
+	struct evms_md *evms_md;
+	mddev_t *mddev;
+	int rc = 0;
+
+	evms_md = node->private;
+	mddev = evms_md->mddev;
+
+	if (sect_nr + num_sects > node->total_vsectors) {
+		LOG_ERROR("%s: attempt to %s beyond MD device(%s) boundary("PFU64") with sect_nr("PFU64") and num_sects("PFU64")\n",
+			__FUNCTION__,
+			rw ? "WRITE" : "READ", 
+			node->name,
+			node->total_vsectors,
+			sect_nr,num_sects);
+		rc = -EINVAL;
+	}
+
+	if ((mddev->ro) && (rw != READ)) {
+		LOG_ERROR("%s: read-only is set for [%s]\n", __FUNCTION__, node->name);
+		return -EINVAL;
+	}
+
+	if (!rc && mddev && mddev->pers) {
+		/*
+		 * Check if the personality can handle synchronous I/O,
+		 * otherwise use the generic function.
+		 */
+		if (mddev->pers->sync_io)
+			rc = mddev->pers->sync_io(mddev, rw, sect_nr, num_sects, buf_addr);
+		else
+			rc = evms_md_sync_io(node, rw, sect_nr, num_sects, buf_addr);
+	} else
+		rc = -EINVAL;
+	return rc;
+}
+
+/**
+ * md_end_sync_request - End IO handler for synchronous I/O functions
+ **/
+static void md_end_sync_request(struct buffer_head *bh, int uptodate)
+{
+	struct evms_md_sync_cb * cb = (struct evms_md_sync_cb *) bh->b_private;
+	
+	if (!uptodate)
+		cb->rc |= -EIO;
+	/* we are done with the bh */
+	evms_cs_deallocate_to_pool(evms_bh_pool, bh);
+		
+	if (atomic_dec_and_test(&cb->io_count))	{
+		if (waitqueue_active(&cb->wait))
+			wake_up(&cb->wait);
+	}
+}
+
+/**
+ * md_sync_request_submit_bh - submit a page-size bh
+ *	@node - target MD node
+ *	@bh - pointer to the buffer head
+ *	@sector	- the sector number
+ *	@data - pointer to buffer
+ *	@rw - READ/WRITE
+ *	@cb - MD synchronous I/O control block
+ **/
+static inline void md_sync_request_submit_bh(
+	struct evms_logical_node *node,
+	struct buffer_head *bh,
+	unsigned long sector,
+	char *data,
+	int rw,
+	struct evms_md_sync_cb *cb)
+{
+
+	bh->b_this_page = (struct buffer_head *)1;
+	bh->b_rsector = sector;
+	bh->b_size = PAGE_SIZE;
+	bh->b_state = 0;
+	set_bit(BH_Dirty, &bh->b_state);
+	set_bit(BH_Lock, &bh->b_state);
+	set_bit(BH_Req, &bh->b_state);
+	set_bit(BH_Mapped, &bh->b_state);
+	atomic_set(&bh->b_count, 1);
+	bh->b_data = data;
+	bh->b_page = virt_to_page(data);
+	bh->b_list = BUF_LOCKED;
+	bh->b_end_io = md_end_sync_request;
+	bh->b_private = cb;
+	atomic_inc(&cb->io_count);
+	if (rw == READ)
+		R_IO(node,bh);
+	else
+		W_IO(node,bh);
+}
+
+/**
+ * evms_md_allocate_bh
+ *
+ *	Note that this function will not return unless we got a free bh
+ **/
+static inline struct buffer_head *evms_md_allocate_bh(void)
+{
+	struct buffer_head *bh;
+
+	while ((bh = evms_cs_allocate_from_pool(evms_bh_pool, FALSE)) == NULL)
+		schedule(); /* just yield for a someone to deallocate a bh */
+	init_waitqueue_head(&bh->b_wait);
+	bh->b_count = (atomic_t)ATOMIC_INIT(0);
+	return(bh);
+}
+
+/**
+ * md_partial_sync_io - 
+ *	This function handles synchronous I/O when sector is not page aligned
+ *	@node - evms node for the MD array
+ *	@rw - READ/WRITE
+ *      @sector - the sector
+ *	@nsects - on input, the total sectors for the request
+ *	@nsects - on output, number of sectors completed
+ *	@data - data buffer
+ **/
+int evms_md_partial_sync_io(
+	struct evms_logical_node *node,
+	int rw,
+	u64 sector,
+	u32 *nsects,
+	void *data)
+{
+	int rc;
+	u32 offset, size;
+	struct buffer_head *bh;
+	struct evms_md_sync_cb cb;
+	char *page;
+
+	size = (u32)(*nsects << EVMS_VSECTOR_SIZE_SHIFT);
+
+	/* calculate byte offset */
+	offset = (u32)((sector & (EVMS_MD_SECTS_PER_PAGE-1)) << EVMS_VSECTOR_SIZE_SHIFT);
+	if (!offset && (*nsects >= EVMS_MD_SECTS_PER_PAGE)) {
+		*nsects = 0;
+		return 0; /* Nothing to do */
+	}
+
+	page = NULL;
+	rc = 0;
+
+	page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!page) {
+		LOG_ERROR("%s: no memory!\n", __FUNCTION__);
+		rc = -ENOMEM;
+	}
+
+	bh = evms_md_allocate_bh();
+
+	if (!rc) {
+		memset(&cb, 0, sizeof(cb));
+		init_waitqueue_head(&cb.wait);
+		cb.io_count = (atomic_t)ATOMIC_INIT(0);
+		md_sync_request_submit_bh(
+			node, bh,
+			(unsigned long)(sector & EVMS_MD_SECTS_PER_PAGE_MASK),
+			page, READ, &cb);
+		wait_disk_event(cb.wait, !atomic_read(&cb.io_count));
+		rc |= cb.rc;
+	}
+
+	if (!rc) {
+		size = (size <= (PAGE_SIZE - offset)) ? size : (PAGE_SIZE - offset);
+
+		switch (rw) {
+		case READ:
+			/* copy data and return */
+			memcpy(data, page+offset, size);
+			break;
+		case WRITE:
+			/* copy data and then write */
+			memcpy(page+offset, data, size);
+
+			bh = evms_md_allocate_bh();
+
+			md_sync_request_submit_bh(
+				node, bh,
+				(unsigned long)(sector & EVMS_MD_SECTS_PER_PAGE_MASK),
+				page, WRITE, &cb);
+			wait_disk_event(cb.wait, !atomic_read(&cb.io_count));
+			rc |= cb.rc;
+			break;
+		default:
+			rc = -EINVAL;
+		}
+	}
+	
+	if (page)
+		kfree(page);
+
+	if (!rc)
+		*nsects = (u64)(size >> EVMS_VSECTOR_SIZE_SHIFT);
+	else
+		*nsects = 0;
+	return rc;
+}
+
+/**
+ * evms_md_sync_io - This function handles synchronous I/O
+ **/
+int evms_md_sync_io(
+	struct evms_logical_node *node,
+        int rw,
+        u64 sector,
+        u64 total_nr_sects,
+        void *data )
+{
+	int rc = 0;
+	u64 total_nr_pages, size;
+	u32 nsects;
+	struct buffer_head *bh;
+	struct evms_md_sync_cb cb;
+
+	if (sector % EVMS_MD_SECTS_PER_PAGE) {
+		nsects = total_nr_sects;
+		rc = evms_md_partial_sync_io(node, rw, sector, &nsects, data);
+		if (!rc) {
+			total_nr_sects -= nsects;
+			sector += nsects;
+			data += (nsects << EVMS_VSECTOR_SIZE_SHIFT);
+			if (total_nr_sects == 0)
+				return rc;
+		} else {
+			return rc;
+		}
+	}
+
+	total_nr_pages = total_nr_sects / EVMS_MD_SECTS_PER_PAGE;
+	size = total_nr_sects << EVMS_VSECTOR_SIZE_SHIFT;
+
+	memset(&cb, 0, sizeof(cb));
+	init_waitqueue_head(&cb.wait);
+	cb.io_count = (atomic_t)ATOMIC_INIT(0);
+
+	while (!rc && total_nr_pages) {
+
+		bh = evms_md_allocate_bh();
+
+		md_sync_request_submit_bh(node, bh,(unsigned long)sector, data, rw, &cb);
+
+		sector += EVMS_MD_SECTS_PER_PAGE;
+		size -= PAGE_SIZE;
+		total_nr_pages--;
+		data += PAGE_SIZE;
+	}
+	if (!rc) {
+		wait_disk_event(cb.wait, !atomic_read(&cb.io_count));
+		rc |= cb.rc;
+	}
+
+	if (!rc && size) {
+		nsects = size >> EVMS_VSECTOR_SIZE_SHIFT;
+		rc = evms_md_partial_sync_io(node, rw, sector, &nsects, data);
+	}
+
+	return(rc);
+}
+
+/*
+ * Function: md_ioctl
+ */
+static int md_ioctl(
+	struct evms_logical_node	* node,
+	struct inode		* inode,
+	struct file		* file,
+	unsigned int		cmd,
+	unsigned long		arg)
+{
+	struct evms_md	* evms_md = node->private;
+	mddev_t *mddev;
+	int rc = 0;
+
+        if ((!inode) || (!evms_md) )
+                rc = -EINVAL;
+
+        if (!rc) {
+                switch (cmd) {
+			/*
+			 * We have a problem here : there is no easy way to give a CHS
+			 * virtual geometry. We currently pretend that we have a 2 heads
+			 * 4 sectors (with a BIG number of cylinders...). This drives
+			 * dosfs just mad... ;-)
+			 */
+
+                        case HDIO_GETGEO:
+			{
+				struct hd_geometry hdgeo;
+                                hdgeo.heads = 2;
+                                hdgeo.sectors = 4;
+                                hdgeo.cylinders = ((unsigned int)node->total_vsectors) /
+                                        hdgeo.heads / hdgeo.sectors;
+                                hdgeo.start = 0;
+                                if (copy_to_user((int *)arg,
+                                                 &hdgeo,
+                                                 sizeof(hdgeo)))
+                                        rc = -EFAULT;
+			}
+				break;
+			case EVMS_QUIESCE_VOLUME:
+			case EVMS_GET_DISK_LIST:
+			case EVMS_CHECK_MEDIA_CHANGE:
+			case EVMS_REVALIDATE_DISK:
+			case EVMS_OPEN_VOLUME:
+			case EVMS_CLOSE_VOLUME:
+			case EVMS_CHECK_DEVICE_STATUS:
+                                rc = md_ioctl_cmd_broadcast(
+                                        node, inode, file, cmd, arg);
+                                break;
+                        case EVMS_PLUGIN_IOCTL:
+                                rc = md_direct_ioctl(
+                                        inode, file, cmd, arg);
+                                break;
+			default:
+				mddev = evms_md->mddev;
+				if (mddev == NULL) {
+					rc = -ENODEV;
+				} else if (mddev->pers->evms_ioctl == NULL) {
+					rc = -ENOSYS;
+				} else {
+					rc = mddev->pers->evms_ioctl(mddev, inode, file, cmd, arg);
+				}
+                }
+        }
+        return(rc);
+}
+
+static int md_ioctl_cmd_broadcast(
+	struct evms_logical_node 	*node,
+	struct inode 		*inode,
+	struct file 		*file,
+	unsigned long 		cmd,
+	unsigned long 		arg)
+{
+        int rc = 0;
+	struct evms_md *evms_md;
+	mddev_t *mddev;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	evms_md = node->private;
+	mddev = evms_md->mddev;
+
+        /* broadcast this cmd to all children */
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (!rdev->mddev) {
+			MD_BUG();
+			continue;
+		}
+		if (!rdev->virtual_spare) {
+			rc |= IOCTL(rdev->node, inode, file, cmd, arg);
+		}
+	}
+	return (rc);
+}
+
+
+static int evms_md_add_virtual_spare (mddev_t *mddev, kdev_t dev)
+{
+	mdk_rdev_t *rdev;
+	mdp_disk_t *disk = NULL;
+	int i;
+
+	if (evms_md_find_rdev(mddev,dev))
+		return -EEXIST;
+
+	LOG_ENTRY_EXIT("%s ENTRY\n", __FUNCTION__);
+	if ((rdev = kmalloc(sizeof(*rdev),GFP_KERNEL)) == NULL)
+		return -ENOMEM;
+
+	memset(rdev, 0, sizeof(*rdev));
+
+	for (i = mddev->sb->raid_disks; i < MD_SB_DISKS; i++) {
+		disk = mddev->sb->disks + i;
+		if (!disk->major && !disk->minor)
+			break;
+		if (disk_removed(disk))
+			break;
+	}
+	if (i == MD_SB_DISKS) {
+		LOG_WARNING("%s : [md%d]can not hot-add to full array!\n", __FUNCTION__, mdidx(mddev));
+		kfree(rdev);
+		return -EBUSY;
+	}
+
+	if (disk_removed(disk)) {
+		/*
+		 * reuse slot
+		 */
+		if (disk->number != i) {
+			MD_BUG();
+			kfree(rdev);
+			return -EINVAL;
+		}
+	} else {
+		disk->number = i;
+	}
+
+	disk->raid_disk = disk->number;
+	disk->major = MAJOR(dev);
+	disk->minor = MINOR(dev);
+
+	mark_disk_spare(disk);
+
+	rdev->mddev = mddev;
+	rdev->dev = dev;
+	rdev->desc_nr = disk->number;
+	rdev->virtual_spare = 1;
+
+	/* bind rdev to mddev array */
+	list_add(&rdev->all, &all_raid_disks);
+	list_add(&rdev->same_set, &mddev->disks);
+	MD_INIT_LIST_HEAD(&rdev->pending);
+
+	mddev->sb->nr_disks++;
+	mddev->sb->spare_disks++;
+	mddev->sb->working_disks++;
+	mddev->nb_dev++;
+
+	mddev->sb_dirty = 1;
+
+	evms_md_update_sb(mddev);
+
+	return 0;
+}
+
+static int evms_md_remove_disk(mddev_t *mddev, kdev_t dev)
+{
+	mdk_rdev_t *rdev = NULL;
+	mdp_disk_t *disk;
+	int rc = 0;
+
+	disk = evms_md_find_disk(mddev,dev);
+	if (!disk)
+		return -ENODEV;
+
+	rdev = evms_md_find_rdev(mddev,dev);
+
+	if (rdev && !rdev->faulty) {
+		/*
+		 * The disk is active in the array,
+		 * must ask the personality to do it
+		 */
+		if (mddev->pers && mddev->pers->diskop) {
+			/* Assume spare, try to remove it first. */
+			rc = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_SPARE);
+			if (rc)
+				rc = mddev->pers->diskop(mddev, &disk, DISKOP_HOT_REMOVE_DISK);
+		} else
+			rc = -ENOSYS;
+	}
+
+	if (!rc) {
+		remove_descriptor(disk,mddev->sb);
+		if (rdev)
+			kick_rdev_from_array(rdev);
+		mddev->sb_dirty = 1;
+		evms_md_update_sb(mddev);
+
+	}
+	return rc;
+}
+
+
+/*
+ * Function: md_direct_ioctl
+ *
+ *	This function provides a method for user-space to communicate directly
+ *	with a plugin in the kernel.
+ */
+static int md_direct_ioctl(
+	struct inode		* inode,
+	struct file		* file,
+	unsigned int		cmd,
+	unsigned long		args )
+{
+	struct evms_plugin_ioctl_pkt argument;
+	kdev_t			md_kdev;
+	mddev_t			*mddev = NULL;
+	struct evms_md_ioctl	ioctl_arg;
+	struct evms_md_kdev	device;
+	struct evms_md_array_info array_info, *usr_array_info;
+	int			rc = 0;
+
+	MOD_INC_USE_COUNT;
+
+        // Copy user's parameters to kernel space
+        if ( copy_from_user(&argument, (struct evms_plugin_ioctl_pkt*)args, sizeof(argument)) ) {
+		MOD_DEC_USE_COUNT;
+                return -EFAULT;
+	}
+
+	// Make sure this is supposed to be our ioctl.
+	if ( argument.feature_id != md_plugin_header.id ) {
+		MOD_DEC_USE_COUNT;
+		return -EINVAL;
+	}
+
+	// Copy user's md ioclt parmeters to kernel space
+	if ( copy_from_user(&ioctl_arg,
+			    (struct evms_md_ioctl*)argument.feature_ioctl_data,
+			    sizeof(ioctl_arg)) )
+		rc = -EFAULT;
+	else {
+		if (ioctl_arg.mddev_idx < MAX_MD_DEVS) {
+			md_kdev = MKDEV(MD_MAJOR, ioctl_arg.mddev_idx);
+			mddev = kdev_to_mddev(md_kdev);
+			if (mddev == NULL)
+				rc = -ENODEV;
+		} else
+			rc = -ENODEV;
+	}
+
+	if (!rc) {
+		switch(argument.feature_command) {
+		case EVMS_MD_PERS_IOCTL_CMD:
+			if (mddev->pers->md_pers_ioctl == NULL) {
+				MOD_DEC_USE_COUNT;
+				return -ENOSYS;
+			}
+			rc = mddev->pers->md_pers_ioctl(mddev,
+							ioctl_arg.cmd,
+							ioctl_arg.arg);
+			copy_to_user((struct evms_md_ioctl*)argument.feature_ioctl_data,
+				     &ioctl_arg,
+				     sizeof(ioctl_arg));
+			break;
+
+		case EVMS_MD_ADD:
+			if ( copy_from_user(&device,
+					    (struct evms_md_kdev *)ioctl_arg.arg,
+					    sizeof(device)) )
+				rc = -EFAULT;
+			else
+				rc = evms_md_add_virtual_spare(mddev,MKDEV(device.major, device.minor));
+			break;
+
+		case EVMS_MD_REMOVE:
+			if ( copy_from_user(&device,
+					    (struct evms_md_kdev *)ioctl_arg.arg,
+					    sizeof(device)) )
+				rc = -EFAULT;
+			else
+				rc = evms_md_remove_disk(mddev,MKDEV(device.major, device.minor));
+			break;
+
+		case EVMS_MD_ACTIVATE:
+			rc = -ENOSYS;
+			break;
+
+		case EVMS_MD_DEACTIVATE:
+			rc = -ENOSYS;
+			break;
+
+		case EVMS_MD_GET_ARRAY_INFO:
+
+			usr_array_info = (struct evms_md_array_info *)ioctl_arg.arg;
+			if ( copy_from_user(&array_info, usr_array_info,
+					    sizeof(array_info)) )
+				rc = -EFAULT;
+			else {
+				array_info.state = 0;
+				if (mddev->curr_resync)
+					array_info.state |= EVMS_MD_ARRAY_SYNCING;
+				copy_to_user(&usr_array_info->state, &array_info.state,
+					     sizeof(usr_array_info->state));
+				if (copy_to_user(array_info.sb, mddev->sb,
+						 sizeof(mdp_super_t)))
+					rc = -EFAULT;
+			}
+			break;
+		default:
+			rc = -ENOSYS;
+			break;
+		}
+	}
+
+	argument.status = rc;
+	copy_to_user((struct evms_plugin_ioctl_pkt*)args, &argument, sizeof(argument));
+	MOD_DEC_USE_COUNT;
+	return rc;
+}
+
+
+
+
+void evms_md_add_mddev_mapping (mddev_t * mddev, kdev_t dev, void *data)
+{
+	unsigned int minor = MINOR(dev);
+
+	if (MAJOR(dev) != MD_MAJOR) {
+		MD_BUG();
+		return;
+	}
+	if (evms_mddev_map[minor].mddev != NULL) {
+		MD_BUG();
+		return;
+	}
+	evms_mddev_map[minor].mddev = mddev;
+	evms_mddev_map[minor].data = data;
+}
+
+void evms_md_del_mddev_mapping (mddev_t * mddev, kdev_t dev)
+{
+	unsigned int minor = MINOR(dev);
+
+	if (MAJOR(dev) != MD_MAJOR) {
+		MD_BUG();
+		return;
+	}
+	if (evms_mddev_map[minor].mddev != mddev) {
+		MD_BUG();
+		return;
+	}
+	evms_mddev_map[minor].mddev = NULL;
+	evms_mddev_map[minor].data = NULL;
+}
+
+static mddev_t * alloc_mddev (kdev_t dev)
+{
+	mddev_t *mddev;
+
+	if (MAJOR(dev) != MD_MAJOR) {
+		MD_BUG();
+		return 0;
+	}
+	mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL);
+	if (!mddev)
+		return NULL;
+		
+	memset(mddev, 0, sizeof(*mddev));
+
+	mddev->__minor = MINOR(dev);
+	init_MUTEX(&mddev->reconfig_sem);
+	init_MUTEX(&mddev->recovery_sem);
+	init_MUTEX(&mddev->resync_sem);
+	INIT_LIST_HEAD(&mddev->disks);
+	INIT_LIST_HEAD(&mddev->all_mddevs);
+	INIT_LIST_HEAD(&mddev->incomplete_mddevs);
+	INIT_LIST_HEAD(&mddev->running_mddevs);
+	mddev->active = (atomic_t)ATOMIC_INIT(0);
+	mddev->recovery_active = (atomic_t)ATOMIC_INIT(0);
+
+	/*
+	 * The 'base' mddev is the one with data NULL.
+	 * personalities can create additional mddevs
+	 * if necessary.
+	 */
+	evms_md_add_mddev_mapping(mddev, dev, 0);
+	list_add(&mddev->all_mddevs, &all_mddevs);
+
+	MOD_INC_USE_COUNT;
+	evms_md_create_recovery_thread();
+
+	return mddev;
+}
+
+mdk_rdev_t * evms_md_find_rdev_nr(mddev_t *mddev, int nr)
+{
+	mdk_rdev_t * rdev;
+	struct list_head *tmp;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->desc_nr == nr)
+			return rdev;
+	}
+	return NULL;
+}
+
+
+mdk_rdev_t * evms_md_find_rdev(mddev_t * mddev, kdev_t dev)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->dev == dev)
+			return rdev;
+	}
+	return NULL;
+}
+
+mdk_rdev_t * evms_md_find_rdev_from_node(mddev_t * mddev, struct evms_logical_node * node)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->node == node)
+			return rdev;
+	}
+	return NULL;
+}
+
+static MD_LIST_HEAD(device_names);
+
+static char * org_partition_name (kdev_t dev)
+{
+	struct gendisk *hd;
+	static char nomem [] = "<nomem>";
+	dev_name_t *dname;
+	struct list_head *tmp = device_names.next;
+
+	while (tmp != &device_names) {
+		dname = list_entry(tmp, dev_name_t, list);
+		if (dname->dev == dev)
+			return dname->name;
+		tmp = tmp->next;
+	}
+
+	dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL);
+
+	if (!dname)
+		return nomem;
+	/*
+	 * ok, add this new device name to the list
+	 */
+	hd = get_gendisk (dev);
+	dname->name = NULL;
+	if (hd)
+		dname->name = disk_name (hd, MINOR(dev), dname->namebuf);
+	if (!dname->name) {
+		sprintf (dname->namebuf, "[dev %s]", kdevname(dev));
+		dname->name = dname->namebuf;
+	}
+
+	dname->dev = dev;
+	MD_INIT_LIST_HEAD(&dname->list);
+	list_add(&dname->list, &device_names);
+
+	return dname->name;
+}
+
+
+#define EVMS_MD_NULL_PARTITION_NAME "<EVMS_NODE_NO_NAME>"
+char * evms_md_partition_name (struct evms_logical_node *node)
+{
+	if (node && node->name)
+		return node->name;
+	else
+		return EVMS_MD_NULL_PARTITION_NAME;
+}
+
+static char * get_partition_name (mdk_rdev_t *rdev)
+{
+	if (rdev->node)
+		return evms_md_partition_name(rdev->node);
+	else
+		return org_partition_name(rdev->dev);
+}
+
+/*
+ * Function: evms_md_calc_dev_sboffset
+ * 	return the LSN for md super block.
+ */
+static u64 evms_md_calc_dev_sboffset (struct evms_logical_node *node,mddev_t *mddev, int persistent)
+{
+	u64 size = 0;
+
+	size = node->total_vsectors;
+	if (persistent) {
+		size = MD_NEW_SIZE_SECTORS(size);
+	}
+	return size; /* size in sectors */
+}
+
+/*
+ * Function: evms_md_calc_dev_size
+ *	return data size (in blocks) for an "extended" device.
+ */
+static unsigned long evms_md_calc_dev_size (struct evms_logical_node *node,
+					   mddev_t *mddev,
+					   int persistent)
+{
+	unsigned long size;
+	u64 size_in_sectors;
+
+	size_in_sectors = evms_md_calc_dev_sboffset(node, mddev, persistent);
+	size = size_in_sectors >> 1;
+	if (!mddev->sb) {
+		MD_BUG();
+		return size;
+	}
+	if (mddev->sb->chunk_size)
+		size &= ~(mddev->sb->chunk_size/1024 - 1);
+	return size;
+}
+
+static unsigned int zoned_raid_size (mddev_t *mddev)
+{
+	unsigned int mask;
+	mdk_rdev_t * rdev;
+	struct list_head *tmp;
+
+	if (!mddev->sb) {
+		MD_BUG();
+		return -EINVAL;
+	}
+	/*
+	 * do size and offset calculations.
+	 */
+	mask = ~(mddev->sb->chunk_size/1024 - 1);
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		rdev->size &= mask;
+		evms_md_size[mdidx(mddev)] += rdev->size;
+	}
+	return 0;
+}
+
+/*
+ * We check wether all devices are numbered from 0 to nb_dev-1. The
+ * order is guaranteed even after device name changes.
+ *
+ * Some personalities (raid0, linear) use this. Personalities that
+ * provide data have to be able to deal with loss of individual
+ * disks, so they do their checking themselves.
+ */
+int evms_md_check_ordering (mddev_t *mddev)
+{
+	int i, c;
+	mdk_rdev_t *rdev;
+	struct list_head *tmp;
+
+	/*
+	 * First, all devices must be fully functional
+	 */
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->faulty) {
+			LOG_ERROR("evms_md_check_ordering() md%d's device %s faulty, aborting.\n",
+				   mdidx(mddev), get_partition_name(rdev));
+			goto abort;
+		}
+	}
+
+	c = 0;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		c++;
+	}
+	if (c != mddev->nb_dev) {
+		MD_BUG();
+		goto abort;
+	}
+	if (mddev->nb_dev != mddev->sb->raid_disks) {
+		LOG_ERROR("%s: [md%d] array needs %d disks, has %d, aborting.\n",
+			   __FUNCTION__, mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev);
+		goto abort;
+	}
+	/*
+	 * Now the numbering check
+	 */
+	for (i = 0; i < mddev->nb_dev; i++) {
+		c = 0;
+		ITERATE_RDEV(mddev,rdev,tmp) {
+			if (rdev->desc_nr == i)
+				c++;
+		}
+		if (!c) {
+			LOG_ERROR("md%d, missing disk #%d, aborting.\n",mdidx(mddev), i);
+			goto abort;
+		}
+		if (c > 1) {
+			LOG_ERROR("md%d, too many disks #%d, aborting.\n",mdidx(mddev), i);
+			goto abort;
+		}
+	}
+	return 0;
+abort:
+	return 1;
+}
+
+static void remove_descriptor (mdp_disk_t *disk, mdp_super_t *sb)
+{
+	if (disk_active(disk)) {
+		sb->working_disks--;
+	} else {
+		if (disk_spare(disk)) {
+			sb->spare_disks--;
+			sb->working_disks--;
+		} else	{
+			sb->failed_disks--;
+		}
+	}
+	sb->nr_disks--;
+	disk->major = disk->minor = 0;
+	mark_disk_removed(disk);
+}
+
+#define BAD_MINOR \
+"%s: invalid raid minor (%x)\n"
+
+#define NO_SB \
+"disabled device %s, could not read superblock.\n"
+
+#define BAD_CSUM \
+"invalid superblock checksum on %s\n"
+
+
+static int alloc_array_sb (mddev_t * mddev)
+{
+	if (mddev->sb) {
+		MD_BUG();
+		return 0;
+	}
+
+	mddev->sb = (mdp_super_t *) __get_free_page (GFP_KERNEL);
+	if (!mddev->sb) {
+		LOG_ERROR("%s: Out of memory!\n", __FUNCTION__);
+		return -ENOMEM;
+	}
+	md_clear_page(mddev->sb);
+	return 0;
+}
+
+static int alloc_disk_sb (mdk_rdev_t * rdev)
+{
+	if (rdev->sb)
+		MD_BUG();
+
+	rdev->sb = (mdp_super_t *) __get_free_page(GFP_KERNEL);
+	if (!rdev->sb) {
+		LOG_ERROR("%s: Out of memory!\n", __FUNCTION__);
+		return -EINVAL;
+	}
+	md_clear_page(rdev->sb);
+
+	return 0;
+}
+
+/*
+ * Function: free_disk_sb
+ *
+ */
+static void free_disk_sb (mdk_rdev_t * rdev)
+{
+	if (rdev->sb) {
+		free_page((unsigned long) rdev->sb);
+		rdev->sb = NULL;
+		rdev->sb_offset = 0;
+		rdev->size = 0;
+	} else {
+		if (!rdev->virtual_spare && !rdev->faulty)
+			MD_BUG();
+	}
+}
+
+/*
+ * Function: evms_md_read_disk_sb
+ *	Read the MD superblock.
+ */
+static int evms_md_read_disk_sb (mdk_rdev_t * rdev)
+{
+	int rc = 0;
+	struct evms_logical_node *node = rdev->node;
+	u64 sb_offset_in_sectors;
+
+	if (!rdev->sb) {
+		MD_BUG();
+		return -EINVAL;
+	}
+	if (node->total_vsectors <= MD_RESERVED_SECTORS) {
+		LOG_DETAILS("%s is too small, total_vsectors("PFU64")\n",
+			   evms_md_partition_name(node), node->total_vsectors);
+		return -EINVAL;
+	}
+	
+	/*
+	 * Calculate the position of the superblock,
+	 * it's at the end of the disk
+	 */
+	sb_offset_in_sectors = evms_md_calc_dev_sboffset(node, rdev->mddev, 1);
+	rdev->sb_offset = (unsigned long)(sb_offset_in_sectors >> 1);
+	LOG_DEBUG("(read) %s's sb offset("PFU64") total_vsectors("PFU64")\n",
+		   evms_md_partition_name(node), sb_offset_in_sectors, node->total_vsectors);
+
+	/*
+	 * Read superblock
+	 */
+	rc = INIT_IO(node, 0, sb_offset_in_sectors, MD_SB_SECTORS, rdev->sb);
+
+	return rc;
+}
+
+static unsigned int calc_sb_csum (mdp_super_t * sb)
+{
+	unsigned int disk_csum, csum;
+
+	disk_csum = sb->sb_csum;
+	sb->sb_csum = 0;
+	csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
+	sb->sb_csum = disk_csum;
+	return csum;
+}
+
+
+
+/*
+ * Check one RAID superblock for generic plausibility
+ */
+
+static int check_disk_sb (mdk_rdev_t * rdev)
+{
+	mdp_super_t *sb;
+	int ret = -EINVAL;
+
+	sb = rdev->sb;
+	if (!sb) {
+		MD_BUG();
+		goto abort;
+	}
+
+	if (sb->md_magic != MD_SB_MAGIC) {
+		goto abort;
+	}
+
+	if (sb->md_minor >= MAX_MD_DEVS) {
+		LOG_ERROR(BAD_MINOR, get_partition_name(rdev), sb->md_minor);
+		goto abort;
+	}
+	if (calc_sb_csum(sb) != sb->sb_csum) {
+		LOG_ERROR(BAD_CSUM, get_partition_name(rdev));
+		goto abort;
+	}
+
+	switch (sb->level) {
+	case -1:
+	case  0:
+	case  1:
+	case  5:
+		break;
+	default:
+		LOG_ERROR("%s: EVMS MD does not support MD level %d\n", __FUNCTION__, sb->level);
+		goto abort;
+	}
+	ret = 0;
+abort:
+	return ret;
+}
+
+static kdev_t dev_unit(kdev_t dev)
+{
+	unsigned int mask;
+	struct gendisk *hd = get_gendisk(dev);
+
+	if (!hd)
+		return 0;
+	mask = ~((1 << hd->minor_shift) - 1);
+
+	return MKDEV(MAJOR(dev), MINOR(dev) & mask);
+}
+
+static mdk_rdev_t * match_dev_unit(mddev_t *mddev, kdev_t dev)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	ITERATE_RDEV(mddev,rdev,tmp)
+		if (dev_unit(rdev->dev) == dev_unit(dev))
+			return rdev;
+
+	return NULL;
+}
+
+static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	ITERATE_RDEV(mddev1,rdev,tmp)
+		if (match_dev_unit(mddev2, rdev->dev))
+			return 1;
+
+	return 0;
+}
+
+
+static void bind_rdev_to_array (mdk_rdev_t * rdev, mddev_t * mddev)
+{
+	mdk_rdev_t *same_pdev;
+
+	if (rdev->mddev) {
+		MD_BUG();
+		return;
+	}
+
+	same_pdev = match_dev_unit(mddev, rdev->dev);
+	if (same_pdev)
+		LOG_WARNING("[md%d] WARNING: %s appears to be on the same physical disk as %s. True\n"
+			    "     protection against single-disk failure might be compromised.\n",
+			    mdidx(mddev), get_partition_name(rdev),get_partition_name(same_pdev));
+		
+	list_add(&rdev->same_set, &mddev->disks);
+	rdev->mddev = mddev;
+	mddev->nb_dev++;
+	if (rdev->sb && disk_active(&rdev->sb->this_disk))
+		mddev->nr_raid_disks++;
+	LOG_DETAILS("bind<%s,%d>\n", get_partition_name(rdev), rdev->mddev->nb_dev);
+}
+
+static void unbind_rdev_from_array (mdk_rdev_t * rdev)
+{
+	if (!rdev->mddev) {
+		MD_BUG();
+		return;
+	}
+	list_del(&rdev->same_set);
+	MD_INIT_LIST_HEAD(&rdev->same_set);
+	rdev->mddev->nb_dev--;
+	if (rdev->sb && disk_active(&rdev->sb->this_disk))
+		rdev->mddev->nr_raid_disks--;
+	LOG_DETAILS("unbind<%s,%d>\n", get_partition_name(rdev), rdev->mddev->nb_dev);
+	rdev->mddev = NULL;
+}
+
+
+/*
+ * Function: evms_md_export_rdev
+ *	EVMS MD version of export_rdev()
+ *	Discard this MD "extended" device
+ */
+static void evms_md_export_rdev (mdk_rdev_t * rdev, int delete_node)
+{
+	LOG_DETAILS("%s: (%s)\n", __FUNCTION__ , get_partition_name(rdev));
+	if (rdev->mddev)
+		MD_BUG();
+	free_disk_sb(rdev);
+	list_del(&rdev->all);
+	MD_INIT_LIST_HEAD(&rdev->all);
+	if (rdev->pending.next != &rdev->pending) {
+		LOG_WARNING("%s: (%s was pending)\n",__FUNCTION__ ,get_partition_name(rdev));
+		list_del(&rdev->pending);
+		MD_INIT_LIST_HEAD(&rdev->pending);
+	}
+	if (rdev->node && delete_node) {
+		if (cur_discover_list) {
+			LOG_DETAILS("%s: remove (%s) from discover list.\n", __FUNCTION__,
+				get_partition_name(rdev));
+			evms_cs_remove_logical_node_from_list(cur_discover_list, rdev->node);
+		}
+		LOG_DETAILS("%s: deleting node %s\n", __FUNCTION__, get_partition_name(rdev));
+		DELETE(rdev->node);
+		rdev->node = NULL;
+	}
+	rdev->dev = 0;
+	rdev->faulty = 0;
+	kfree(rdev);
+}
+
+
+static void kick_rdev_from_array (mdk_rdev_t * rdev)
+{
+	LOG_DEFAULT("%s: (%s)\n", __FUNCTION__,get_partition_name(rdev));
+	unbind_rdev_from_array(rdev);
+	evms_md_export_rdev(rdev, TRUE);
+}
+
+static void export_array (mddev_t *mddev)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+	mdp_super_t *sb = mddev->sb;
+
+	LOG_DEFAULT("%s: [md%d]\n",__FUNCTION__ ,mdidx(mddev));
+	if (mddev->sb) {
+		mddev->sb = NULL;
+		free_page((unsigned long) sb);
+	}
+
+	LOG_DEBUG("%s: removing all extended devices belong to md%d\n",__FUNCTION__,mdidx(mddev));
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (!rdev->mddev) {
+			MD_BUG();
+			continue;
+		}
+		kick_rdev_from_array(rdev);
+	}
+	if (mddev->nb_dev)
+		MD_BUG();
+}
+
+static void free_mddev (mddev_t *mddev)
+{
+	struct evms_logical_node *node;
+	struct evms_md *evms_md;
+
+	if (!mddev) {
+		MD_BUG();
+		return;
+	}
+
+	node = mddev->node;
+
+	export_array(mddev);
+	evms_md_size[mdidx(mddev)] = 0;
+
+
+	/*
+	 * Make sure nobody else is using this mddev
+	 * (careful, we rely on the global kernel lock here)
+	 */
+	while (atomic_read(&mddev->resync_sem.count) != 1)
+		schedule();
+	while (atomic_read(&mddev->recovery_sem.count) != 1)
+		schedule();
+
+	evms_md_del_mddev_mapping(mddev, MKDEV(MD_MAJOR, mdidx(mddev)));
+	list_del(&mddev->all_mddevs);
+	INIT_LIST_HEAD(&mddev->all_mddevs);
+	if (!list_empty(&mddev->running_mddevs)) {
+		list_del(&mddev->running_mddevs);
+		INIT_LIST_HEAD(&mddev->running_mddevs);
+	}
+	if (!list_empty(&mddev->incomplete_mddevs)) {
+		list_del(&mddev->incomplete_mddevs);
+		INIT_LIST_HEAD(&mddev->incomplete_mddevs);
+	}
+
+	kfree(mddev);
+	if (node) {
+		evms_md = node->private;
+		evms_md->mddev = NULL;
+	}
+	MOD_DEC_USE_COUNT;
+	evms_md_destroy_recovery_thread();
+}
+
+
+static void print_desc(mdp_disk_t *desc)
+{
+	printk(" DISK<N:%d,R:%d,S:%d>\n", desc->number,
+		desc->raid_disk,desc->state);
+}
+
+static void print_sb(mdp_super_t *sb)
+{
+	int i;
+
+	printk(" SB: (V:%d.%d.%d) ID:<%08x.%08x.%08x.%08x> CT:%08x\n",
+		sb->major_version, sb->minor_version, sb->patch_version,
+		sb->set_uuid0, sb->set_uuid1, sb->set_uuid2, sb->set_uuid3,
+		sb->ctime);
+	printk("    L%d S%08d ND:%d RD:%d md%d LO:%d CS:%d\n", sb->level,
+		sb->size, sb->nr_disks, sb->raid_disks, sb->md_minor,
+		sb->layout, sb->chunk_size);
+	printk("    UT:%08x ST:%d AD:%d WD:%d FD:%d SD:%d CSUM:%08x E:%x\n",
+		sb->utime, sb->state, sb->active_disks, sb->working_disks,
+		sb->failed_disks, sb->spare_disks,
+		sb->sb_csum, sb->events_lo);
+
+	for (i = 0; i < MD_SB_DISKS; i++) {
+		mdp_disk_t *desc;
+
+		desc = sb->disks + i;
+		if (desc->number || desc->major || desc->minor || desc->raid_disk || (desc->state && (desc->state != 4))) {
+			printk("     D %2d: ", i);
+			print_desc(desc);
+		}
+	}
+	printk("    THIS: ");
+	print_desc(&sb->this_disk);
+
+}
+
+static void print_rdev(mdk_rdev_t *rdev)
+{
+	printk("rdev %s: SZ:%08ld F:%d DN:%d ",
+		get_partition_name(rdev),
+		rdev->size, rdev->faulty, rdev->desc_nr);
+	if (rdev->sb) {
+		printk("rdev superblock:\n");
+		print_sb(rdev->sb);
+	} else
+		printk("no rdev superblock!\n");
+}
+
+void evms_md_print_devices (void)
+{
+	struct list_head *tmp, *tmp2;
+	mdk_rdev_t *rdev;
+	mddev_t *mddev;
+
+	printk("\n");
+	printk(":	**********************************\n");
+	printk(":	* <COMPLETE RAID STATE PRINTOUT> *\n");
+	printk(":	**********************************\n");
+	ITERATE_MDDEV(mddev,tmp) {
+		printk("md%d: ", mdidx(mddev));
+
+		ITERATE_RDEV(mddev,rdev,tmp2)
+			printk("<%s>", get_partition_name(rdev));
+
+		if (mddev->sb) {
+			printk(" array superblock:\n");
+			print_sb(mddev->sb);
+		} else
+			printk(" no array superblock.\n");
+
+		ITERATE_RDEV(mddev,rdev,tmp2)
+			print_rdev(rdev);
+	}
+	printk(":	**********************************\n");
+	printk("\n");
+}
+
+static int sb_equal ( mdp_super_t *sb1, mdp_super_t *sb2)
+{
+	int ret;
+	mdp_super_t *tmp1, *tmp2;
+
+	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
+	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
+
+	if (!tmp1 || !tmp2) {
+		ret = 0;
+		printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
+		goto abort;
+	}
+
+	*tmp1 = *sb1;
+	*tmp2 = *sb2;
+
+	/*
+	 * nr_disks is not constant
+	 */
+	tmp1->nr_disks = 0;
+	tmp2->nr_disks = 0;
+
+	if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
+		ret = 0;
+	else
+		ret = 1;
+
+abort:
+	if (tmp1)
+		kfree(tmp1);
+	if (tmp2)
+		kfree(tmp2);
+
+	return ret;
+}
+
+static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2)
+{
+	if (	(rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) &&
+		(rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) &&
+		(rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) &&
+		(rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3))
+
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Function: evms_md_find_rdev_all
+ *	EVMS MD version of find_rdev_all()
+ *	Search entire all_raid_disks for "node"
+ *	Return the MD "extended" device if found.
+ */
+static mdk_rdev_t * evms_md_find_rdev_all (struct evms_logical_node *node)
+{
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	tmp = all_raid_disks.next;
+	while (tmp != &all_raid_disks) {
+		rdev = list_entry(tmp, mdk_rdev_t, all);
+		if (rdev->node == node)
+			return rdev;
+		tmp = tmp->next;
+	}
+	return NULL;
+}
+
+/*
+ * Function: evms_md_find_mddev_all
+ */
+static mddev_t * evms_md_find_mddev_all (struct evms_logical_node *node)
+{
+	struct list_head *tmp;
+	mddev_t *mddev;
+
+	ITERATE_MDDEV(mddev,tmp) {
+		if (mddev->node == node)
+			return mddev;
+	}
+	return NULL;
+}
+
+
+/*
+ * Function: evms_md_write_disk_sb
+ *	EVMS MD version of write_disk_sb
+ */
+static int evms_md_write_disk_sb(mdk_rdev_t * rdev)
+{
+	unsigned long size;
+	u64 sb_offset_in_sectors;
+
+	if (!rdev->sb) {
+		MD_BUG();
+		return 1;
+	}
+	if (rdev->faulty) {
+		MD_BUG();
+		return 1;
+	}
+	if (rdev->sb->md_magic != MD_SB_MAGIC) {
+		MD_BUG();
+		return 1;
+	}
+
+	sb_offset_in_sectors = evms_md_calc_dev_sboffset(rdev->node, rdev->mddev, 1);
+	if (rdev->sb_offset != (sb_offset_in_sectors >> 1)) {
+		LOG_WARNING("%s's sb offset has changed from blocks(%ld) to blocks(%ld), skipping\n",
+			   get_partition_name(rdev),
+			   rdev->sb_offset,
+			   (unsigned long)(sb_offset_in_sectors >> 1));
+		goto skip;
+	}
+	/*
+	 * If the disk went offline meanwhile and it's just a spare, then
+	 * its size has changed to zero silently, and the MD code does
+	 * not yet know that it's faulty.
+	 */
+	size = evms_md_calc_dev_size(rdev->node, rdev->mddev, 1);
+	if (size != rdev->size) {
+		LOG_WARNING("%s's size has changed from %ld to %ld since import, skipping\n",
+			   get_partition_name(rdev), rdev->size, size);
+		goto skip;
+	}
+
+	LOG_DETAILS("(write) %s's sb offset: "PFU64"\n",get_partition_name(rdev), sb_offset_in_sectors);
+
+	INIT_IO(rdev->node,WRITE,sb_offset_in_sectors,MD_SB_SECTORS,rdev->sb);
+
+skip:
+	return 0;
+}
+
+static int evms_md_sync_sbs(mddev_t * mddev)
+{
+	mdk_rdev_t *rdev;
+	struct list_head *tmp;
+	mdp_disk_t * disk;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->virtual_spare || rdev->faulty)
+			continue;
+			
+		/* copy everything from the master */
+		memcpy(rdev->sb, mddev->sb, sizeof(mdp_super_t));
+		
+		/* this_disk is unique, copy it from the master */
+//		rdev->sb->this_disk = mddev->sb->disks[rdev->desc_nr];
+		// use the SB disk array since if update occurred on normal shutdown
+		// the rdevs may be out of date.
+		disk = evms_md_find_disk(mddev, rdev->dev);
+		if (disk) {
+			rdev->sb->this_disk = *disk;
+		}
+		
+		rdev->sb->sb_csum = calc_sb_csum(rdev->sb);
+	}
+	return 0;
+}
+
+static int evms_md_update_sb_sync(mddev_t * mddev, int clean)
+{
+	mdk_rdev_t *rdev;
+	struct list_head *tmp;
+	int rc = 0;
+	int found = FALSE;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+
+		if (rdev->virtual_spare || rdev->faulty)
+			continue;
+
+		if ((rc = evms_md_read_disk_sb(rdev))) {
+			LOG_ERROR("%s: error reading superblock on %s!\n",
+				  __FUNCTION__, evms_md_partition_name(rdev->node));
+			break;
+		}
+
+		if ((rc = check_disk_sb(rdev))) {
+			LOG_ERROR("%s: %s has invalid sb!\n",
+				  __FUNCTION__, evms_md_partition_name(rdev->node));
+			break;
+		}
+
+		rdev->desc_nr = rdev->sb->this_disk.number;
+		rdev->dev = MKDEV(rdev->sb->this_disk.major, rdev->sb->this_disk.minor);
+			
+		/* copy master superlbock from the first good rdev */
+		if (!found) {
+			found = TRUE;
+			memcpy(mddev->sb, rdev->sb, sizeof(mdp_super_t));
+			if (clean)
+				mddev->sb->state |= 1 << MD_SB_CLEAN;
+			else
+				mddev->sb->state &= ~(1 << MD_SB_CLEAN);
+		}
+	}
+	if (!rc && found) {
+		evms_md_update_sb(mddev);
+	} else {
+		LOG_SERIOUS("%s: BUG! BUG! superblocks will not be updated!\n", __FUNCTION__);
+	}
+	return rc;
+
+}
+
+int evms_md_update_sb(mddev_t * mddev)
+{
+	int err, count = 100;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+
+repeat:
+	mddev->sb->utime = CURRENT_TIME;
+	if ((++mddev->sb->events_lo)==0)
+		++mddev->sb->events_hi;
+
+	if ((mddev->sb->events_lo|mddev->sb->events_hi)==0) {
+		/*
+		 * oops, this 64-bit counter should never wrap.
+		 * Either we are in around ~1 trillion A.C., assuming
+		 * 1 reboot per second, or we have a bug:
+		 */
+		MD_BUG();
+		mddev->sb->events_lo = mddev->sb->events_hi = 0xffffffff;
+	}
+	evms_md_sync_sbs(mddev);
+
+	/*
+	 * do not write anything to disk if using
+	 * nonpersistent superblocks
+	 */
+	if (mddev->sb->not_persistent)
+		return 0;
+
+	LOG_DETAILS("%s: updating [md%d] superblock\n",__FUNCTION__ ,mdidx(mddev));
+
+	err = 0;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (!rdev->virtual_spare && !rdev->faulty) {
+			LOG_DETAILS(" %s [events: %x]",
+				get_partition_name(rdev),
+				rdev->sb->events_lo);
+			err += evms_md_write_disk_sb(rdev);
+		} else {
+			if (rdev->faulty)
+				LOG_DETAILS(" skipping faulty %s\n", get_partition_name(rdev));
+			if (rdev->virtual_spare)
+				LOG_DETAILS(" skipping virtual spare.\n");
+		}
+	}
+	if (err) {
+		if (--count) {
+			LOG_WARNING("errors occurred during superblock update, repeating\n");
+			goto repeat;
+		}
+		LOG_ERROR("excessive errors occurred during superblock update, exiting\n");
+	}
+	return 0;
+}
+
+/*
+ * Function: evms_md_import_device
+ *	Insure that node is not yet imported.
+ *	Read and validate the MD super block on this device
+ *	Add to the global MD "extended" devices list (all_raid_disks)
+ *
+ */
+static int evms_md_import_device (struct evms_logical_node **discover_list,
+				  struct evms_logical_node *node)
+{
+	int err;
+	mdk_rdev_t *rdev;
+
+	LOG_ENTRY_EXIT("%s: discovering %s\n",__FUNCTION__,evms_md_partition_name(node));
+
+	if (evms_md_find_rdev_all(node)) {
+		LOG_DEBUG("%s exists\n", evms_md_partition_name(node));
+		return -EEXIST;
+	}
+
+	rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL);
+	if (!rdev) {
+		LOG_ERROR("could not alloc mem for %s!\n", evms_md_partition_name(node));
+		return -ENOMEM;
+	}
+	memset(rdev, 0, sizeof(*rdev));
+
+	if ((err = alloc_disk_sb(rdev)))
+		goto abort_free;
+
+	rdev->node = node; /* set this for evms_md_read_disk_sb() */
+	
+	rdev->desc_nr = -1;
+	rdev->faulty = 0;
+
+	if (!node->total_vsectors) {
+		LOG_ERROR("%s has zero size!\n", evms_md_partition_name(node));
+		err = -EINVAL;
+		goto abort_free;
+	}
+
+	if ((err = evms_md_read_disk_sb(rdev))) {
+		LOG_EXTRA("could not read %s's sb, not importing!\n",evms_md_partition_name(node));
+		goto abort_free;
+	}
+	if ((err = check_disk_sb(rdev))) {
+		LOG_EXTRA("%s has invalid sb, not importing!\n",evms_md_partition_name(node));
+		goto abort_free;
+	}
+	rdev->desc_nr = rdev->sb->this_disk.number;
+	rdev->dev = MKDEV(rdev->sb->this_disk.major, rdev->sb->this_disk.minor);
+	LOG_DETAILS("FOUND %s desc_nr(%d)\n", get_partition_name(rdev), rdev->desc_nr);
+	list_add(&rdev->all, &all_raid_disks);
+	MD_INIT_LIST_HEAD(&rdev->pending);
+
+	if (rdev->faulty && rdev->sb)
+		free_disk_sb(rdev);
+
+	return 0;
+
+abort_free:
+	if (rdev->sb) {
+		free_disk_sb(rdev);
+	}
+	kfree(rdev);
+	return err;
+}
+
+
+
+/*
+ * Function: evms_md_analyze_sbs
+ *	EVMS MD version of analyze_sbs()
+ */
+static int evms_md_analyze_sbs (mddev_t * mddev)
+{
+	int out_of_date = 0, i;
+	struct list_head *tmp, *tmp2;
+	mdk_rdev_t *rdev, *rdev2, *freshest;
+	mdp_super_t *sb;
+
+	LOG_ENTRY_EXIT("Analyzing all superblocks...\n");
+	/*
+	 * Verify the RAID superblock on each real device
+	 */
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->faulty) {
+			MD_BUG();
+			goto abort;
+		}
+		if (!rdev->sb) {
+			MD_BUG();
+			goto abort;
+		}
+		if (check_disk_sb(rdev))
+			goto abort;
+	}
+
+	/*
+	 * The superblock constant part has to be the same
+	 * for all disks in the array.
+	 */
+	sb = NULL;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (!sb) {
+			sb = rdev->sb;
+			continue;
+		}
+		if (!sb_equal(sb, rdev->sb)) {
+			LOG_WARNING("kick out %s\n",get_partition_name(rdev));
+			kick_rdev_from_array(rdev);
+			continue;
+		}
+	}
+
+	/*
+	 * OK, we have all disks and the array is ready to run. Let's
+	 * find the freshest superblock, that one will be the superblock
+	 * that represents the whole array.
+	 */
+	if (!mddev->sb)
+		if (alloc_array_sb(mddev))
+			goto abort;
+	sb = mddev->sb;
+	freshest = NULL;
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		__u64 ev1, ev2;
+		/*
+		 * if the checksum is invalid, use the superblock
+		 * only as a last resort. (decrease it's age by
+		 * one event)
+		 */
+		if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
+			if (rdev->sb->events_lo || rdev->sb->events_hi)
+				if ((rdev->sb->events_lo--)==0)
+					rdev->sb->events_hi--;
+		}
+		LOG_DETAILS("%s's event counter: %x\n",get_partition_name(rdev), rdev->sb->events_lo);
+
+		if (!freshest) {
+			freshest = rdev;
+			continue;
+		}
+		/*
+		 * Find the newest superblock version
+		 */
+		ev1 = md_event(rdev->sb);
+		ev2 = md_event(freshest->sb);
+		if (ev1 != ev2) {
+			out_of_date = 1;
+			if (ev1 > ev2)
+				freshest = rdev;
+		}
+	}
+	if (out_of_date) {
+		LOG_WARNING("OUT OF DATE, freshest: %s\n",get_partition_name(freshest));
+	}
+	memcpy (sb, freshest->sb, sizeof(*sb));
+
+	/*
+	 * at this point we have picked the 'best' superblock
+	 * from all available superblocks.
+	 * now we validate this superblock and kick out possibly
+	 * failed disks.
+	 */
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		/*
+		 * Kick all non-fresh devices
+		 */
+		__u64 ev1, ev2;
+		ev1 = md_event(rdev->sb);
+		ev2 = md_event(sb);
+		if (ev1 < ev2) {
+			if (ev1) {
+				LOG_WARNING("kicking non-fresh %s from array!\n",get_partition_name(rdev));
+				kick_rdev_from_array(rdev);
+			continue;
+			} else {
+				LOG_DETAILS("%s is a new spare.\n",get_partition_name(rdev));
+			}
+		}
+	}
+
+	/*
+	 * Remove unavailable and faulty devices ...
+	 *
+	 * note that if an array becomes completely unrunnable due to
+	 * missing devices, we do not write the superblock back, so the
+	 * administrator has a chance to fix things up. The removal thus
+	 * only happens if it's nonfatal to the contents of the array.
+	 */
+	for (i = 0; i < MD_SB_DISKS; i++) {
+		int found;
+		mdp_disk_t *desc;
+
+		desc = sb->disks + i;
+
+		/*
+		 * We kick faulty devices/descriptors immediately.
+		 *
+		 * Note: multipath devices are a special case.  Since we
+		 * were able to read the superblock on the path, we don't
+		 * care if it was previously marked as faulty, it's up now
+		 * so enable it.
+		 */
+		if (disk_faulty(desc) && mddev->sb->level != -4) {
+			found = 0;
+			ITERATE_RDEV(mddev,rdev,tmp) {
+				if (rdev->desc_nr != desc->number)
+					continue;
+				LOG_WARNING("[md%d] kicking faulty %s!\n",mdidx(mddev),get_partition_name(rdev));
+				kick_rdev_from_array(rdev);
+				found = 1;
+				break;
+			}
+			if (!found) {
+				LOG_WARNING("%s: [md%d] found former faulty device [number=%d]\n",
+					    __FUNCTION__ ,mdidx(mddev), desc->number);
+			}
+			/*
+			 * Don't call remove_descriptor(),
+			 * let the administrator remove it from the user-land */
+			/* remove_descriptor(desc, sb); */
+			continue;
+		} else if (disk_faulty(desc)) {
+			/*
+			 * multipath entry marked as faulty, unfaulty it
+			 */
+			kdev_t dev;
+
+			dev = MKDEV(desc->major, desc->minor);
+
+			rdev = evms_md_find_rdev(mddev, dev);
+			if (rdev)
+				mark_disk_spare(desc);
+			else {
+				LOG_WARNING("%s: [md%d] (MULTIPATH) found former faulty device [number=%d]\n",
+					    __FUNCTION__ ,mdidx(mddev), desc->number);
+				/*
+				 * Don't call remove_descriptor(),
+				 * let the administrator remove it from the user-land */
+				/* remove_descriptor(desc, sb); */
+			}
+		}
+
+		/*
+		 * Is this device present in the rdev ring?
+		 */
+		found = 0;
+		ITERATE_RDEV(mddev,rdev,tmp) {
+			/*
+			 * Multi-path IO special-case: since we have no
+			 * this_disk descriptor at auto-detect time,
+			 * we cannot check rdev->number.
+			 * We can check the device though.
+			 */
+			if ((sb->level == -4) && (rdev->dev ==
+					MKDEV(desc->major,desc->minor))) {
+				found = 1;
+				break;
+			}
+			if (rdev->desc_nr == desc->number) {
+				found = 1;
+				break;
+			}
+		}
+		if (found)
+			continue;
+
+		LOG_WARNING(" [md%d]: former device [number=%d] is unavailable!\n",
+			    mdidx(mddev), desc->number);
+		remove_descriptor(desc, sb);
+	}
+
+	/*
+	 * Kick all rdevs that are not in the
+	 * descriptor array:
+	 */
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->desc_nr == -1)
+			kick_rdev_from_array(rdev);
+	}
+
+	/*
+	 * Do a final reality check.
+	 */
+	if (mddev->sb->level != -4) {
+		ITERATE_RDEV(mddev,rdev,tmp) {
+			if (rdev->desc_nr == -1) {
+				MD_BUG();
+				goto abort;
+			}
+			/*
+			 * is the desc_nr unique?
+			 */
+			ITERATE_RDEV(mddev,rdev2,tmp2) {
+				if ((rdev2 != rdev) &&
+						(rdev2->desc_nr == rdev->desc_nr)) {
+					MD_BUG();
+					goto abort;
+				}
+			}
+		}
+	}
+
+#define OLD_VERSION KERN_ALERT \
+"md%d: unsupported raid array version %d.%d.%d\n"
+
+#define NOT_CLEAN_IGNORE KERN_ERR \
+"md%d: raid array is not clean -- starting background reconstruction\n"
+
+	/*
+	 * Check if we can support this RAID array
+	 */
+	if (sb->major_version != MD_MAJOR_VERSION ||
+			sb->minor_version > MD_MINOR_VERSION) {
+
+		LOG_ERROR("[md%d] unsupported raid array version %d.%d.%d\n",
+			   mdidx(mddev),
+			   sb->major_version,
+			   sb->minor_version,
+			   sb->patch_version);
+		goto abort;
+	}
+
+	if ((sb->state != (1 << MD_SB_CLEAN)) && ((sb->level == 1) ||
+			(sb->level == 4) || (sb->level == 5)))
+		LOG_WARNING("[md%d, level=%d] raid array is not clean -- starting background reconstruction\n",
+			    mdidx(mddev), sb->level);
+
+	LOG_ENTRY_EXIT("analysis of all superblocks is OK!\n");
+	return 0;
+abort:
+	LOG_WARNING("ABORT analyze_sbs()!!!\n");
+	return 1;
+}
+
+
+static int device_size_calculation (mddev_t * mddev)
+{
+	int data_disks = 0, persistent;
+	//unsigned int readahead;
+	mdp_super_t *sb = mddev->sb;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+	/*
+	 * Do device size calculation. Bail out if too small.
+	 * (we have to do this after having validated chunk_size,
+	 * because device size has to be modulo chunk_size)
+	 */
+	persistent = !mddev->sb->not_persistent;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->faulty)
+			continue;
+		if (rdev->size) {
+			LOG_DEFAULT("%s: already calculated %s\n", __FUNCTION__, get_partition_name(rdev));
+			continue;
+		}
+		rdev->size = evms_md_calc_dev_size(rdev->node, mddev, persistent);
+		if (rdev->size < sb->chunk_size / 1024) {
+			LOG_WARNING("Dev %s smaller than chunk_size: %ldk < %dk\n",
+				   get_partition_name(rdev), rdev->size, sb->chunk_size / 1024);
+			return -EINVAL;
+		}
+	}
+
+	switch (sb->level) {
+		case -4:
+			data_disks = 1;
+			break;
+		case -3:
+			data_disks = 1;
+			break;
+		case -2:
+			data_disks = 1;
+			break;
+		case -1:
+			zoned_raid_size(mddev);
+			data_disks = 1;
+			break;
+		case 0:
+			zoned_raid_size(mddev);
+			data_disks = sb->raid_disks;
+			break;
+		case 1:
+			data_disks = 1;
+			break;
+		case 4:
+		case 5:
+			data_disks = sb->raid_disks-1;
+			break;
+		default:
+			LOG_ERROR("[md%d] unkown level %d\n", mdidx(mddev), sb->level);
+			goto abort;
+	}
+	if (!evms_md_size[mdidx(mddev)])
+		evms_md_size[mdidx(mddev)] = sb->size * data_disks;
+
+	return 0;
+abort:
+	return 1;
+}
+
+
+#define TOO_BIG_CHUNKSIZE KERN_ERR \
+"too big chunk_size: %d > %d\n"
+
+#define TOO_SMALL_CHUNKSIZE KERN_ERR \
+"too small chunk_size: %d < %ld\n"
+
+#define BAD_CHUNKSIZE KERN_ERR \
+"no chunksize specified, see 'man raidtab'\n"
+
+static int do_md_run (mddev_t * mddev)
+{
+	int pnum, err;
+	int chunk_size;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev;
+
+
+	if (!mddev->nb_dev) {
+		MD_BUG();
+		return -EINVAL;
+	}
+
+	if (mddev->pers)
+		return -EBUSY;
+
+	/*
+	 * Resize disks to align partitions size on a given
+	 * chunk size.
+	 */
+	evms_md_size[mdidx(mddev)] = 0;
+
+	/*
+	 * Analyze all RAID superblock(s)
+	 */
+	if (evms_md_analyze_sbs(mddev)) {
+		MD_BUG();
+		return -EINVAL;
+	}
+
+	mddev->chunk_size = chunk_size = mddev->sb->chunk_size;
+	pnum = level_to_pers(mddev->sb->level);
+
+	if ((pnum != MULTIPATH) && (pnum != RAID1)) {
+		if (!chunk_size) {
+			/*
+			 * 'default chunksize' in the old md code used to
+			 * be PAGE_SIZE, baaad.
+			 * we abort here to be on the safe side. We dont
+			 * want to continue the bad practice.
+			 */
+			printk(BAD_CHUNKSIZE);
+			return -EINVAL;
+		}
+		if (chunk_size > MAX_CHUNK_SIZE) {
+			printk(TOO_BIG_CHUNKSIZE, chunk_size, MAX_CHUNK_SIZE);
+			return -EINVAL;
+		}
+		/*
+		 * chunk-size has to be a power of 2 and multiples of PAGE_SIZE
+		 */
+		if ( (1 << ffz(~chunk_size)) != chunk_size) {
+			MD_BUG();
+			return -EINVAL;
+		}
+		if (chunk_size < PAGE_SIZE) {
+			printk(TOO_SMALL_CHUNKSIZE, chunk_size, PAGE_SIZE);
+			return -EINVAL;
+		}
+	} else
+		if (chunk_size)
+			printk(KERN_INFO "RAID level %d does not need chunksize! Continuing anyway.\n", mddev->sb->level);
+
+	if (pnum >= MAX_PERSONALITY) {
+		MD_BUG();
+		return -EINVAL;
+	}
+	if (!pers[pnum])
+	{
+#ifdef CONFIG_KMOD
+		char module_name[80];
+		sprintf (module_name, "md-personality-%d", pnum);
+		request_module (module_name);
+		if (!pers[pnum])
+#endif
+		{
+			printk(KERN_ERR "personality %d is not loaded!\n",
+				pnum);
+			return -EINVAL;
+		}
+	}
+	if (device_size_calculation(mddev))
+		return -EINVAL;
+
+	/*
+	 * Drop all container device buffers, from now on
+	 * the only valid external interface is through the md
+	 * device.
+	 * Also find largest hardsector size
+	 */
+	md_hardsect_sizes[mdidx(mddev)] = 512;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev->faulty)
+			continue;
+		invalidate_device(rdev->dev, 1);
+/*		if (get_hardsect_size(rdev->dev)
+			> md_hardsect_sizes[mdidx(mddev)])
+			md_hardsect_sizes[mdidx(mddev)] =
+				get_hardsect_size(rdev->dev); */
+		if (rdev->node->hardsector_size  > md_hardsect_sizes[mdidx(mddev)]) {
+			md_hardsect_sizes[mdidx(mddev)] = rdev->node->hardsector_size;
+		}
+
+	}
+	md_blocksizes[mdidx(mddev)] = 1024;
+	if (md_blocksizes[mdidx(mddev)] < md_hardsect_sizes[mdidx(mddev)])
+		md_blocksizes[mdidx(mddev)] = md_hardsect_sizes[mdidx(mddev)];
+
+	mddev->pers = pers[pnum];
+
+	err = mddev->pers->run(mddev);
+	if (err) {
+		LOG_WARNING("%s: pers->run() failed.\n", __FUNCTION__);
+		mddev->pers = NULL;
+		return -EINVAL;
+	}
+	mddev->sb->state &= ~(1 << MD_SB_CLEAN);
+
+	evms_md_update_sb(mddev);
+
+	if (incomplete_mddev(mddev)) {
+		LOG_DEFAULT("%s: [md%d] was incomplete!\n", __FUNCTION__, mdidx(mddev));
+		list_del(&mddev->incomplete_mddevs);
+		INIT_LIST_HEAD(&mddev->incomplete_mddevs);
+	}
+
+	list_add(&mddev->running_mddevs, &running_mddevs);
+
+	return (0);
+}
+
+#undef TOO_BIG_CHUNKSIZE
+#undef BAD_CHUNKSIZE
+
+
+#define OUT(x) do { err = (x); goto out; } while (0)
+
+
+#define STILL_MOUNTED KERN_WARNING \
+"md%d still mounted.\n"
+#define	STILL_IN_USE \
+"md%d still in use.\n"
+
+static int do_md_stop (mddev_t * mddev, int ro)
+{
+	int err = 0, resync_interrupted = 0, clean = 0;
+	kdev_t dev = mddev_to_kdev(mddev);
+
+ 	if (atomic_read(&mddev->active)>1) {
+ 		printk(STILL_IN_USE, mdidx(mddev));
+ 		OUT(-EBUSY);
+ 	}
+
+	if (mddev->pers) {
+		/*
+		 * It is safe to call stop here, it only frees private
+		 * data. Also, it tells us if a device is unstoppable
+		 * (eg. resyncing is in progress)
+		 */
+		if (mddev->pers->stop_resync)
+			if (mddev->pers->stop_resync(mddev))
+				resync_interrupted = 1;
+
+		if (mddev->recovery_running)
+			evms_cs_interrupt_thread(evms_md_recovery_thread);
+
+		/*
+		 * This synchronizes with signal delivery to the
+		 * resync or reconstruction thread. It also nicely
+		 * hangs the process if some reconstruction has not
+		 * finished.
+		 */
+		down(&mddev->recovery_sem);
+		up(&mddev->recovery_sem);
+
+		invalidate_device(dev, 1);
+
+		if (ro) {
+			if (mddev->ro)
+				OUT(-ENXIO);
+			mddev->ro = 1;
+			mddev->node->plugin = &md_plugin_header;
+		} else {
+			if (mddev->ro)
+				set_device_ro(dev, 0);
+			if (mddev->pers->stop(mddev)) {
+				if (mddev->ro)
+					set_device_ro(dev, 1);
+				OUT(-EBUSY);
+			}
+			if (mddev->ro)
+				mddev->ro = 0;
+		}
+		if (mddev->sb) {
+			/*
+			 * mark it clean only if there was no resync
+			 * interrupted.
+			 */
+			if (!mddev->recovery_running && !resync_interrupted) {
+				LOG_DEBUG("%s: marking sb clean...\n", __FUNCTION__);
+				clean = 1;
+			}
+			evms_md_update_sb_sync(mddev, clean);
+		}
+		if (ro)
+			set_device_ro(dev, 1);
+	}
+
+	/*
+	 * Free resources if final stop
+	 */
+	if (!ro) {
+		printk (KERN_INFO "md%d stopped.\n", mdidx(mddev));
+		free_mddev(mddev);
+
+	} else
+		printk (KERN_INFO
+			"md%d switched to read-only mode.\n", mdidx(mddev));
+out:
+	return err;
+}
+
+
+static int evms_md_run_array (struct evms_logical_node ** discover_list, mddev_t *mddev)
+{
+	mdk_rdev_t *rdev;
+	struct list_head *tmp;
+	int err = 0;
+	uint flags = 0;
+
+	if (mddev->disks.prev == &mddev->disks) {
+		MD_BUG();
+		return -EINVAL;
+	}
+
+	LOG_DETAILS("%s: trying to run array md%d\n", __FUNCTION__,mdidx(mddev) );
+
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		LOG_DETAILS(" <%s>\n", get_partition_name(rdev));
+	}
+
+	err = do_md_run (mddev);
+	if (!err) {
+		/*
+		 * remove all nodes consumed by this md device from the discover list
+		 */
+		ITERATE_RDEV(mddev,rdev,tmp) {
+			LOG_DETAILS(" removing %s from discover list.\n", get_partition_name(rdev));
+			evms_cs_remove_logical_node_from_list(discover_list,rdev->node);
+			flags |= rdev->node->flags;
+		}
+		err = evms_md_create_logical_node(discover_list,mddev,flags);
+		if (!err) {
+			exported_nodes++;
+		}
+	} else {
+		LOG_WARNING("%s: could not start [md%d] containing: \n",__FUNCTION__,mdidx(mddev));
+		ITERATE_RDEV(mddev,rdev,tmp) {
+			LOG_WARNING("  (%s, desc_nr=%d)\n", get_partition_name(rdev), rdev->desc_nr);
+		}
+		LOG_WARNING("%s: will try restart [md%d] again later.\n",__FUNCTION__,mdidx(mddev));
+
+		mddev->sb_dirty = 0;
+	}
+	return err;
+}
+
+static void evms_md_run_incomplete_array (struct evms_logical_node ** discover_list, mddev_t *mddev)
+{
+	mdk_rdev_t *rdev;
+
+	LOG_DEFAULT("%s [md%d]\n",
+		    __FUNCTION__, mdidx(mddev));
+	if (evms_md_run_array(discover_list,mddev) == 0) {
+		/*
+		 * We succeeded running this MD device.
+		 * Now read MD superblock on this newly created MD node.
+		 */
+		if (mddev->node && 
+		    (evms_md_import_device(discover_list,mddev->node) == 0)) {
+			/*
+			 * Yes, there is a superblock on this MD node.
+			 * We probably have a MD stacking case here.
+			 */
+			rdev = evms_md_find_rdev_all(mddev->node);
+			if (rdev) {
+				list_add(&rdev->pending, &pending_raid_disks);
+				evms_md_run_devices(discover_list);
+			} else {
+				LOG_WARNING("%s: imported %s but no rdev was found!\n",
+					    __FUNCTION__, 
+					    evms_md_partition_name(mddev->node));
+			}
+		}
+	}
+	if (incomplete_mddev(mddev)) {
+		list_del(&mddev->incomplete_mddevs);
+		INIT_LIST_HEAD(&mddev->incomplete_mddevs);
+	}
+}
+
+/*
+ * lets try to run arrays based on all disks that have arrived
+ * until now. (those are in the ->pending list)
+ *
+ * the method: pick the first pending disk, collect all disks with
+ * the same UUID, remove all from the pending list and put them into
+ * the 'same_array' list. Then order this list based on superblock
+ * update time (freshest comes first), kick out 'old' disks and
+ * compare superblocks. If everything's fine then run it.
+ *
+ * If "unit" is allocated, then bump its reference count
+ */
+static void evms_md_run_devices (struct evms_logical_node **discover_list)
+{
+	struct list_head candidates;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev0, *rdev;
+	mddev_t *mddev;
+	kdev_t md_kdev;
+
+
+	LOG_ENTRY_EXIT("%s: ENTRY\n", __FUNCTION__);
+	while (pending_raid_disks.next != &pending_raid_disks) {
+		rdev0 = list_entry(pending_raid_disks.next,
+					 mdk_rdev_t, pending);
+		MD_INIT_LIST_HEAD(&candidates);
+		ITERATE_RDEV_PENDING(rdev,tmp) {
+			if (uuid_equal(rdev0, rdev)) {
+				if (!sb_equal(rdev0->sb, rdev->sb)) {
+					LOG_DETAILS("%s has same UUID as %s, but superblocks differ ...\n",\
+						    get_partition_name(rdev),get_partition_name(rdev0));
+					continue;
+				}
+				list_del(&rdev->pending);
+				list_add(&rdev->pending, &candidates);
+			}
+		}
+
+		/*
+		 * now we have a set of devices, with all of them having
+		 * mostly sane superblocks. It's time to allocate the
+		 * mddev.
+		 */
+		md_kdev = MKDEV(MD_MAJOR, rdev0->sb->md_minor);
+		mddev = kdev_to_mddev(md_kdev);
+		if (mddev && (!incomplete_mddev(mddev))) {
+			LOG_DETAILS("md%d already running, cannot run %s\n",
+				   mdidx(mddev), get_partition_name(rdev0));
+
+			ITERATE_RDEV(mddev,rdev,tmp) {
+				/*
+				 * This is EVMS re-discovery!
+				 * Remove all nodes consumed by this md device from the discover list
+				 */
+				evms_cs_remove_logical_node_from_list(discover_list,rdev->node);
+			}
+
+			ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
+				if (evms_md_find_mddev_all(rdev->node))
+					/*
+					 * We have found an MD superblock on top of a running MD array.
+					 * Delete rdev but keep the MD array.
+					 */
+					evms_md_export_rdev(rdev, FALSE);
+				else
+					evms_md_export_rdev(rdev, TRUE);
+			}
+			continue;
+		}
+
+		if (!mddev) {
+			mddev = alloc_mddev(md_kdev);
+			if (mddev == NULL) {
+				LOG_ERROR("cannot allocate memory for md drive.\n");
+				break;
+			}
+			LOG_DETAILS("created md%d\n", mdidx(mddev));
+		} else {
+			LOG_DETAILS("%s: found INCOMPLETE md%d\n", __FUNCTION__, mdidx(mddev));
+		}
+
+		ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
+			bind_rdev_to_array(rdev, mddev);
+			list_del(&rdev->pending);
+			MD_INIT_LIST_HEAD(&rdev->pending);
+		}
+
+		if ((mddev->nr_raid_disks >= rdev0->sb->raid_disks) ||
+		    (mddev->nb_dev == rdev0->sb->nr_disks)) {
+			evms_md_run_array(discover_list,mddev);
+		} else {
+			LOG_DETAILS("THIS md%d IS INCOMPLETE, found %d devices, need %d\n",
+				    mdidx(mddev), mddev->nr_raid_disks, rdev0->sb->raid_disks);
+			list_add(&mddev->incomplete_mddevs, &incomplete_mddevs);
+			ITERATE_RDEV(mddev,rdev,tmp) {
+				evms_cs_remove_logical_node_from_list(discover_list,rdev->node);
+			}
+		}
+	}
+	LOG_ENTRY_EXIT("%s: EXIT\n", __FUNCTION__);
+}
+
+void evms_md_recover_arrays(void)
+{
+	if (!evms_md_recovery_thread) {
+		MD_BUG();
+		return;
+	}
+	evms_cs_wakeup_thread(evms_md_recovery_thread);
+}
+
+int evms_md_error_dev(
+	mddev_t *mddev,
+	kdev_t dev)
+{
+	mdk_rdev_t * rdev;
+
+	rdev = evms_md_find_rdev(mddev, dev);
+	if (rdev) {
+		return evms_md_error(mddev,rdev->node);
+	} else {
+		LOG_ERROR("%s: could not find %s in md%d\n",
+			__FUNCTION__, org_partition_name(dev), mdidx(mddev));
+		return 0;
+	}
+}
+
+int evms_md_error(
+	mddev_t *mddev,
+	struct evms_logical_node *node)
+{
+	mdk_rdev_t * rrdev;
+
+	/* check for NULL first */
+	if (!mddev) {
+		MD_BUG();
+		return 0;
+	}
+	LOG_ERROR("evms_md_error dev:(md%d), node:(%s), (caller: %p,%p,%p,%p).\n",
+		   mdidx(mddev), node->name,
+		   __builtin_return_address(0),__builtin_return_address(1),
+		   __builtin_return_address(2),__builtin_return_address(3));
+
+	rrdev = evms_md_find_rdev_from_node(mddev, node);
+	if (!rrdev || rrdev->faulty)
+		return 0;
+	if (!mddev->pers->error_handler
+			|| mddev->pers->error_handler(mddev,node) <= 0) {
+		free_disk_sb(rrdev);
+		rrdev->faulty = 1;
+	} else
+		return 1;
+	/*
+	 * if recovery was running, stop it now.
+	 */
+	if (mddev->pers->stop_resync)
+		mddev->pers->stop_resync(mddev);
+	if (mddev->recovery_running)
+		evms_cs_interrupt_thread(evms_md_recovery_thread);
+	evms_md_recover_arrays();
+
+	return 0;
+}
+
+int evms_register_md_personality (int pnum, mdk_personality_t *p)
+{
+	if (pnum >= MAX_PERSONALITY) {
+		MD_BUG();
+		return -EINVAL;
+	}
+
+	if (pers[pnum]) {
+		MD_BUG();
+		return -EBUSY;
+	}
+
+	pers[pnum] = p;
+	LOG_DETAILS("%s personality registered as nr %d\n",p->name, pnum);
+	return 0;
+}
+
+int evms_unregister_md_personality (int pnum)
+{
+	if (pnum >= MAX_PERSONALITY) {
+		MD_BUG();
+		return -EINVAL;
+	}
+
+	printk(KERN_INFO "%s personality unregistered\n", pers[pnum]->name);
+	pers[pnum] = NULL;
+	return 0;
+}
+
+mdp_disk_t *evms_md_get_spare(mddev_t *mddev)
+{
+	mdp_super_t *sb = mddev->sb;
+	mdp_disk_t *disk;
+	mdk_rdev_t *rdev;
+	int i, j;
+
+	for (i = 0, j = 0; j < mddev->nb_dev; i++) {
+                rdev = evms_md_find_rdev_nr(mddev, i);
+		if (rdev == NULL)
+			continue;
+		j++;
+                if (rdev->faulty)
+			continue;
+		if (!rdev->sb) {
+			if (!rdev->virtual_spare)
+				MD_BUG();
+			continue;
+		}
+		disk = &sb->disks[rdev->desc_nr];
+		if (disk_faulty(disk)) {
+			MD_BUG();
+			continue;
+		}
+		if (disk_active(disk))
+			continue;
+		return disk;
+	}
+	return NULL;
+}
+
+static mdp_disk_t *evms_md_find_disk(mddev_t *mddev, kdev_t dev)
+{
+	mdp_super_t *sb = mddev->sb;
+	mdp_disk_t *disk;
+	int i;
+
+	for (i=0; i < MD_SB_DISKS; i++) {
+		disk = &sb->disks[i];
+		if ((disk->major == MAJOR(dev)) && (disk->minor == MINOR(dev)))
+			return disk;
+	}
+	return NULL;
+}
+
+static unsigned int sync_io[DK_MAX_MAJOR][DK_MAX_DISK];
+void evms_md_sync_acct(
+	kdev_t dev,
+	unsigned long nr_sectors)
+{
+	unsigned int major = MAJOR(dev);
+	unsigned int index;
+
+	index = disk_index(dev);
+	if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+		return;
+
+	sync_io[major][index] += nr_sectors;
+}
+
+static int is_mddev_idle(mddev_t *mddev)
+{
+	mdk_rdev_t * rdev;
+	struct list_head *tmp;
+	int idle;
+	unsigned long curr_events;
+
+	idle = 1;
+	ITERATE_RDEV(mddev,rdev,tmp) {
+		int major = MAJOR(rdev->dev);
+		int idx = disk_index(rdev->dev);
+
+		if ((idx >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
+			continue;
+
+		curr_events = kstat.dk_drive_rblk[major][idx] +
+						kstat.dk_drive_wblk[major][idx] ;
+		curr_events -= sync_io[major][idx];
+		if ((curr_events - rdev->last_events) > 32) {
+			rdev->last_events = curr_events;
+			idle = 0;
+		}
+	}
+	return idle;
+}
+
+MD_DECLARE_WAIT_QUEUE_HEAD(evms_resync_wait);
+
+void evms_md_done_sync(mddev_t *mddev, int blocks, int ok)
+{
+	/* another "blocks" (512byte) blocks have been synced */
+	atomic_sub(blocks, &mddev->recovery_active);
+	wake_up(&mddev->recovery_wait);
+	if (!ok) {
+		// stop recovery, signal do_sync ....
+	}
+}
+
+#define SYNC_MARKS	10
+#define	SYNC_MARK_STEP	(3*HZ)
+int evms_md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
+{
+	mddev_t *mddev2;
+	unsigned int max_sectors, currspeed,
+		j, window, err, serialize;
+	unsigned long mark[SYNC_MARKS];
+	unsigned long mark_cnt[SYNC_MARKS];
+	int last_mark,m;
+	struct list_head *tmp;
+	unsigned long last_check;
+
+
+	err = down_interruptible(&mddev->resync_sem);
+	if (err)
+		goto out_nolock;
+
+recheck:
+	serialize = 0;
+	ITERATE_MDDEV(mddev2,tmp) {
+		if (mddev2 == mddev)
+			continue;
+		if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) {
+			LOG_DEFAULT("delaying resync of md%d until md%d "
+				   "has finished resync (they share one or more physical units)\n",
+				   mdidx(mddev), mdidx(mddev2));
+			serialize = 1;
+			break;
+		}
+	}
+	if (serialize) {
+		interruptible_sleep_on(&evms_resync_wait);
+		if (md_signal_pending(current)) {
+			md_flush_signals();
+			err = -EINTR;
+			goto out;
+		}
+		goto recheck;
+	}
+
+	mddev->curr_resync = 1;
+
+	max_sectors = mddev->sb->size<<1;
+
+	LOG_DEFAULT("syncing RAID array md%d\n", mdidx(mddev));
+	LOG_DEFAULT("minimum _guaranteed_ reconstruction speed: %d KB/sec/disc.\n",
+		   sysctl_speed_limit_min);
+	LOG_DEFAULT("using maximum available idle IO bandwith "
+		   "(but not more than %d KB/sec) for reconstruction.\n",
+		   sysctl_speed_limit_max);
+
+	/*
+	 * Resync has low priority.
+	 */
+#ifdef O1_SCHEDULER
+	set_user_nice(current,19);
+#else
+	current->nice = 19;
+#endif
+
+	is_mddev_idle(mddev); /* this also initializes IO event counters */
+	for (m = 0; m < SYNC_MARKS; m++) {
+		mark[m] = jiffies;
+		mark_cnt[m] = 0;
+	}
+	last_mark = 0;
+	mddev->resync_mark = mark[last_mark];
+	mddev->resync_mark_cnt = mark_cnt[last_mark];
+
+	/*
+	 * Tune reconstruction:
+	 */
+	window = MD_READAHEAD*(PAGE_SIZE/512);
+	LOG_DEFAULT("using %dk window, over a total of %d blocks.\n",
+		   window/2,max_sectors/2);
+
+	atomic_set(&mddev->recovery_active, 0);
+	init_waitqueue_head(&mddev->recovery_wait);
+	last_check = 0;
+	for (j = 0; j < max_sectors;) {
+		int sectors;
+
+		sectors = mddev->pers->sync_request(mddev, j);
+
+		if (sectors < 0) {
+			err = sectors;
+			goto out;
+		}
+		atomic_add(sectors, &mddev->recovery_active);
+		j += sectors;
+		mddev->curr_resync = j;
+
+		if (last_check + window > j)
+			continue;
+
+		last_check = j;
+
+		run_task_queue(&tq_disk);
+
+	repeat:
+		if (jiffies >= mark[last_mark] + SYNC_MARK_STEP ) {
+			/* step marks */
+			int next = (last_mark+1) % SYNC_MARKS;
+
+			mddev->resync_mark = mark[next];
+			mddev->resync_mark_cnt = mark_cnt[next];
+			mark[next] = jiffies;
+			mark_cnt[next] = j - atomic_read(&mddev->recovery_active);
+			last_mark = next;
+		}
+
+
+		if (md_signal_pending(current)) {
+			/*
+			 * got a signal, exit.
+			 */
+			mddev->curr_resync = 0;
+			LOG_DEFAULT("evms_md_do_sync() got signal ... exiting\n");
+			md_flush_signals();
+			err = -EINTR;
+			goto out;
+		}
+
+		/*
+		 * this loop exits only if either when we are slower than
+		 * the 'hard' speed limit, or the system was IO-idle for
+		 * a jiffy.
+		 * the system might be non-idle CPU-wise, but we only care
+		 * about not overloading the IO subsystem. (things like an
+		 * e2fsck being done on the RAID array should execute fast)
+		 */
+		if (md_need_resched(current))
+			schedule();
+
+		currspeed = (j-mddev->resync_mark_cnt)/2/((jiffies-mddev->resync_mark)/HZ +1) +1;
+
+		if (currspeed > sysctl_speed_limit_min) {
+#ifdef O1_SCHEDULER
+			set_user_nice(current,19);
+#else
+			current->nice = 19;
+#endif
+
+			if ((currspeed > sysctl_speed_limit_max) ||
+					!is_mddev_idle(mddev)) {
+#ifdef O1_SCHEDULER
+				set_current_state(TASK_INTERRUPTIBLE);
+#else
+				current->state = TASK_INTERRUPTIBLE;
+#endif
+				md_schedule_timeout(HZ/4);
+				goto repeat;
+			}
+		} else
+#ifdef O1_SCHEDULER
+			set_user_nice(current,-20);
+#else
+			current->nice = -20;
+#endif
+	}
+	LOG_DEFAULT("md%d: sync done.\n",mdidx(mddev));
+	err = 0;
+	/*
+	 * this also signals 'finished resyncing' to md_stop
+	 */
+out:
+	wait_event(mddev->recovery_wait, atomic_read(&mddev->recovery_active)==0);
+	up(&mddev->resync_sem);
+out_nolock:
+	mddev->curr_resync = 0;
+	wake_up(&evms_resync_wait);
+	return err;
+}
+
+
+
+/*
+ * This is a kernel thread which syncs a spare disk with the active array
+ *
+ * the amount of foolproofing might seem to be a tad excessive, but an
+ * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs
+ * of my root partition with the first 0.5 gigs of my /home partition ... so
+ * i'm a bit nervous ;)
+ */
+void evms_md_do_recovery(void *data)
+{
+	int err;
+	mddev_t *mddev;
+	mdp_super_t *sb;
+	mdp_disk_t *spare;
+	struct list_head *tmp;
+
+	LOG_DEFAULT("recovery thread got woken up ...\n");
+restart:
+	ITERATE_MDDEV(mddev,tmp) {
+
+		sb = mddev->sb;
+		if (!sb)
+			continue;
+		if (mddev->recovery_running)
+			continue;
+		if (sb->active_disks == sb->raid_disks)
+			continue;
+		if (!sb->spare_disks) {
+			LOG_ERROR(" [md%d] no spare disk to reconstruct array! "
+				   "-- continuing in degraded mode\n", mdidx(mddev));
+			continue;
+		}
+
+		spare = NULL;
+
+		if (!spare) {
+			/*
+			 * now here we get the spare and resync it.
+			 */
+			spare = evms_md_get_spare(mddev);
+		}
+		if (!spare)
+			continue;
+
+		LOG_DEFAULT(" [md%d] resyncing spare disk %s to replace failed disk\n",
+			   mdidx(mddev), org_partition_name(MKDEV(spare->major,spare->minor)));
+		if (!mddev->pers->diskop)
+			continue;
+
+		if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE))
+			continue;
+
+		down(&mddev->recovery_sem);
+		mddev->recovery_running = 1;
+		err = evms_md_do_sync(mddev, spare);
+		if (err == -EIO) {
+			LOG_DEFAULT("[md%d] spare disk %s failed, skipping to next spare.\n",
+				   mdidx(mddev), org_partition_name(MKDEV(spare->major,spare->minor)));
+			if (!disk_faulty(spare)) {
+				mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE);
+				mark_disk_faulty(spare);
+				mark_disk_nonsync(spare);
+				mark_disk_inactive(spare);
+				sb->spare_disks--;
+				sb->working_disks--;
+				sb->failed_disks++;
+			}
+		} else
+			if (disk_faulty(spare))
+				mddev->pers->diskop(mddev, &spare,
+						DISKOP_SPARE_INACTIVE);
+		if (err == -EINTR || err == -ENOMEM) {
+			/*
+			 * Recovery got interrupted, or ran out of mem ...
+			 * signal back that we have finished using the array.
+			 */
+			mddev->pers->diskop(mddev, &spare,
+							 DISKOP_SPARE_INACTIVE);
+			up(&mddev->recovery_sem);
+			mddev->recovery_running = 0;
+			continue;
+		} else {
+			mddev->recovery_running = 0;
+			up(&mddev->recovery_sem);
+		}
+		if (!disk_faulty(spare)) {
+			/*
+			 * the SPARE_ACTIVE diskop possibly changes the
+			 * pointer too
+			 */
+			mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE);
+			mark_disk_sync(spare);
+			mark_disk_active(spare);
+			sb->active_disks++;
+			sb->spare_disks--;
+		}
+		mddev->sb_dirty = 1;
+		evms_md_update_sb(mddev);
+		goto restart;
+	}
+	LOG_DEFAULT("recovery thread finished ...\n");
+
+}
+
+static void evms_md_create_recovery_thread(void)
+{
+	static char * name = "evms_mdrecoveryd";
+
+	if (!evms_md_recovery_thread) {
+		/* Create MD recovery thread */
+		evms_md_recovery_thread = evms_cs_register_thread(evms_md_do_recovery, NULL, name);
+		if (!evms_md_recovery_thread)
+			LOG_SERIOUS("%s: evms_cs_recovery_thread failed\n", __FUNCTION__);
+	}
+}
+
+static void evms_md_destroy_recovery_thread(void)
+{
+	if (evms_md_recovery_thread && !MOD_IN_USE) {
+		/* Destroy MD recovery thread */
+		evms_cs_unregister_thread(evms_md_recovery_thread);
+		evms_md_recovery_thread = NULL;
+	}
+}
+
+/**
+ * evms_md_create_logical_node
+ **/
+static int evms_md_create_logical_node(
+	struct evms_logical_node **discover_list,
+	mddev_t *mddev,
+	uint flags)
+{
+	int rc;
+	struct evms_md *evms_md = NULL;
+	struct evms_logical_node *newnode = NULL;
+	struct evms_plugin_header *hdr = NULL;
+	struct evms_plugin_fops *fops = NULL;
+
+	rc = evms_cs_allocate_logical_node(&newnode);
+	if (!rc) {
+		evms_md = kmalloc(sizeof(*evms_md), GFP_KERNEL);
+		if (!evms_md) {
+			rc = -ENOMEM;
+		} else {
+
+			memset(evms_md,0,sizeof(*evms_md));
+			evms_md->mddev = mddev;
+
+			fops = kmalloc(sizeof(*fops), GFP_KERNEL);
+			if (fops) {
+				/* copy MD plugin header
+				 * copy function table
+				 * replace read and write function pointers.
+				 */
+				evms_md->instance_plugin_hdr = md_plugin_header;
+				memcpy(fops, &md_fops, sizeof(*fops));
+				fops->read = mddev->pers->read;
+				fops->write = mddev->pers->write;
+				evms_md->instance_plugin_hdr.fops = fops;
+				hdr = &evms_md->instance_plugin_hdr;
+			} else {
+				LOG_WARNING("%s: No memory to copy function table\n",__FUNCTION__);
+				rc = 0; /* clear rc and continue */
+				hdr = &md_plugin_header;
+			}
+		}
+	}
+
+	if (!rc && hdr) {
+		memset(newnode,0,sizeof(*newnode));
+		newnode->plugin = hdr;
+		newnode->total_vsectors = (u64)evms_md_size[mdidx(mddev)] * 2;
+		newnode->block_size = md_blocksizes[mdidx(mddev)];
+		newnode->hardsector_size = md_hardsect_sizes[mdidx(mddev)];
+		sprintf(newnode->name,"md/md%d",mdidx(mddev));
+		newnode->private = evms_md;
+		newnode->flags = flags;
+
+		rc = evms_cs_add_logical_node_to_list(discover_list, newnode);
+		if (rc) {
+			LOG_ERROR("%s: could not add md node %s\n", __FUNCTION__, newnode->name);
+		} else {
+			LOG_DEBUG("%s: added [%s] to discover list (total_vsectors="PFU64")\n",
+				  __FUNCTION__, newnode->name, newnode->total_vsectors);
+		}
+	}
+
+	if (!rc) {
+		mddev->node = newnode;
+	} else {
+		if (evms_md) {
+			if (fops)
+				kfree(fops);
+			kfree(evms_md);
+		}
+		if (newnode)
+			evms_cs_deallocate_logical_node(newnode);
+	}
+	return rc;
+}
+
+
+/*
+ * Function: evms_md_autostart_arrays
+ *	Discover MD "extended" devices
+ *	Add MD "extended" devices to pending list for further processing
+ */
+static void evms_md_autostart_arrays (struct evms_logical_node **discover_list)
+{
+        struct evms_logical_node *node, *next_node;
+	mdk_rdev_t *rdev;
+	int rc=0;
+
+        LOG_ENTRY_EXIT(":autostart_arrays() ENTRY\n");
+
+        /* examine each node on the discover list */
+        next_node = *discover_list;
+        while(next_node) {
+                node = next_node;
+                next_node = node->next;
+
+		rc = evms_md_import_device(discover_list, node);
+		if (rc && (rc != -EEXIST)) {
+			LOG_EXTRA("autostart_arrrays() Not %s!\n",evms_md_partition_name(node));
+			continue;
+		}
+
+		/*
+		 * Sanity checks:
+		 */
+		rdev = evms_md_find_rdev_all(node);
+		if (!rdev) {
+			LOG_ERROR("find_rdev_all() failed\n");
+			continue;
+		}
+		if (rdev->faulty) {
+			MD_BUG();
+			continue;
+		}
+
+		if (!rc) {
+			list_add(&rdev->pending, &pending_raid_disks);
+		} else if (rc == -EEXIST) {
+			struct evms_logical_node *md_node;
+			/*
+			 * Must be in a re-discovery process here.
+			 * Find the EVMS MD node that this rdev is a member of
+			 */
+			if (rdev->mddev) {
+				md_node = rdev->mddev->node;
+				if (md_node) {
+					rc = evms_cs_add_logical_node_to_list(discover_list,md_node);
+					switch (rc) {
+					case 0:
+						exported_nodes++;
+						LOG_DETAILS("Added MD node (%s) to discover list\n",
+							md_node->name);
+						break;
+					case 1: /* already on the list */
+					case 2: /* already on the list */
+						break;
+					default:
+						LOG_WARNING("could not add md node (%s), rc=%d\n",
+							md_node->name, rc);
+					}
+				} else {
+					LOG_ERROR("This MD device [md%d] does not have an EVMS logical node.\n",
+						   rdev->mddev->__minor);
+				}
+			} else {
+				LOG_ERROR("This device [%s] does not belong to any array!\n",
+					  get_partition_name(rdev));
+				evms_md_export_rdev(rdev, TRUE);
+			}
+			evms_cs_remove_logical_node_from_list(discover_list,node);
+		}
+        }
+
+	evms_md_run_devices(discover_list);
+        LOG_DETAILS("EVMD MD:autostart_arrays() EXIT (exported_nodes=%d)\n",exported_nodes);
+}
+
+#ifdef CONFIG_PROC_FS
+static int status_resync(char * page, off_t * offset, int count, mddev_t * mddev)
+{
+	int sz = 0;
+	off_t off = *offset;
+	unsigned long max_blocks, resync, res, dt, db, rt;
+
+	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
+	max_blocks = mddev->sb->size;
+
+	/*
+	 * Should not happen.
+	 */
+	if (!max_blocks) {
+		MD_BUG();
+		return 0;
+	}
+	res = (resync/1024)*1000/(max_blocks/1024 + 1);
+	{
+		int i, x = res/50, y = 20-x;
+		PROCPRINT("[");
+		for (i = 0; i < x; i++)
+			PROCPRINT("=");
+		sz += sprintf(page + sz, ">");
+		for (i = 0; i < y; i++)
+			PROCPRINT(".");
+		PROCPRINT("] ");
+	}
+	if (!mddev->recovery_running)
+		/*
+		 * true resync
+		 */
+		PROCPRINT(" resync =%3lu.%lu%% (%lu/%lu)",
+			res/10, res % 10, resync, max_blocks);
+	else
+		/*
+		 * recovery ...
+		 */
+		PROCPRINT(" recovery =%3lu.%lu%% (%lu/%lu)",
+			res/10, res % 10, resync, max_blocks);
+
+	/*
+	 * We do not want to overflow, so the order of operands and
+	 * the * 100 / 100 trick are important. We do a +1 to be
+	 * safe against division by zero. We only estimate anyway.
+	 *
+	 * dt: time from mark until now
+	 * db: blocks written from mark until now
+	 * rt: remaining time
+	 */
+	dt = ((jiffies - mddev->resync_mark) / HZ);
+	if (!dt) dt++;
+	db = resync - (mddev->resync_mark_cnt/2);
+	rt = (dt * ((max_blocks-resync) / (db/100+1)))/100;
+
+	PROCPRINT(" finish=%lu.%lumin", rt / 60, (rt % 60)/6);
+
+	PROCPRINT(" speed=%ldK/sec", db/dt);
+
+out:
+    	*offset = off;
+	return sz;
+}
+
+static int evms_md_status_read_proc(char *page, char **start, off_t off,
+			int count, int *eof, void *data)
+{
+	int sz = 0, j, size;
+	struct list_head *tmp, *tmp2;
+	mdk_rdev_t *rdev;
+	mddev_t *mddev;
+
+	PROCPRINT("Enterprise Volume Management System: MD Status\n");
+	PROCPRINT("Personalities : ");
+	for (j = 0; j < MAX_PERSONALITY; j++)
+	if (pers[j])
+		PROCPRINT("[%s] ", pers[j]->name);
+
+	PROCPRINT("\n");
+
+
+	ITERATE_MDDEV(mddev,tmp) {
+		PROCPRINT("md%d : %sactive", mdidx(mddev),
+			mddev->pers ? "" : "in");
+		if (mddev->pers) {
+			if (mddev->ro)
+				PROCPRINT(" (read-only)");
+			PROCPRINT(" %s", mddev->pers->name);
+		}
+
+		size = 0;
+		ITERATE_RDEV(mddev,rdev,tmp2) {
+			PROCPRINT(" %s[%d]",
+				rdev->node->name, rdev->desc_nr);
+			if (rdev->faulty) {
+				PROCPRINT("(F)");
+				continue;
+			}
+			size += rdev->size;
+		}
+
+		if (mddev->nb_dev) {
+			if (mddev->pers)
+				PROCPRINT("\n      "PFU64" blocks",
+						 mddev->node->total_vsectors >> 1);
+			else
+				PROCPRINT("\n      %d blocks", size);
+		}
+
+		if (!mddev->pers) {
+			PROCPRINT("\n");
+			continue;
+		}
+
+		sz += mddev->pers->status (page+sz, mddev);
+		
+		PROCPRINT("\n      ");
+		if (mddev->curr_resync) {
+			sz += status_resync (page+sz, &off, count, mddev);
+		} else {
+			if (atomic_read(&mddev->resync_sem.count) != 1)
+				PROCPRINT("	resync=DELAYED");
+		}
+
+		PROCPRINT("\n");
+	}
+     	*eof = 1;
+out:
+	*start = page + off;
+	sz -= off;
+	if (sz < 0)
+		sz = 0;
+	return sz > count ? count : sz;
+}
+#endif
+
+/* Function: md_core_init
+ */
+int __init md_core_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *evms_proc_dir;
+#endif
+
+#ifdef CONFIG_PROC_FS
+	evms_proc_dir = evms_cs_get_evms_proc_dir();
+	if (evms_proc_dir) {
+		create_proc_read_entry("mdstat", 0, evms_proc_dir, evms_md_status_read_proc, NULL);
+	}
+	md_table_header = register_sysctl_table(dev_dir_table, 1);
+#endif
+
+	return evms_cs_register_plugin(&md_plugin_header);
+}
+
+static void __exit md_core_exit(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *evms_proc_dir;
+	
+	evms_proc_dir = evms_cs_get_evms_proc_dir();
+	if (evms_proc_dir) {
+		remove_proc_entry("mdstat", evms_proc_dir);
+	}
+	unregister_sysctl_table(md_table_header);
+#endif
+	evms_cs_unregister_plugin(&md_plugin_header);
+}
+
+module_init(md_core_init);
+module_exit(md_core_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
+
+/*
+ * In order to have the coexistence of this EVMS plugin and the orginal MD
+ * module, the symbols exported by this plugin are prefixed with "evms_"
+ */
+
+MD_EXPORT_SYMBOL(evms_md_size);
+MD_EXPORT_SYMBOL(evms_register_md_personality);
+MD_EXPORT_SYMBOL(evms_unregister_md_personality);
+	/* Export the following function for use with rdev->node in evms_md_k.h */
+MD_EXPORT_SYMBOL(evms_md_partition_name);
+	/* Export the following function for use with disks[] in md_p.h */
+MD_EXPORT_SYMBOL(evms_md_error);
+MD_EXPORT_SYMBOL(evms_md_error_dev);
+MD_EXPORT_SYMBOL(evms_md_update_sb);
+MD_EXPORT_SYMBOL(evms_md_find_rdev_nr);
+MD_EXPORT_SYMBOL(evms_md_find_rdev);
+MD_EXPORT_SYMBOL(evms_md_find_rdev_from_node);
+MD_EXPORT_SYMBOL(evms_md_print_devices);
+MD_EXPORT_SYMBOL(evms_mddev_map);
+MD_EXPORT_SYMBOL(evms_md_check_ordering);
+MD_EXPORT_SYMBOL(evms_md_partial_sync_io);
+MD_EXPORT_SYMBOL(evms_md_sync_io);
+MD_EXPORT_SYMBOL(evms_md_do_sync);
+MD_EXPORT_SYMBOL(evms_md_sync_acct);
+MD_EXPORT_SYMBOL(evms_md_done_sync);
+MD_EXPORT_SYMBOL(evms_md_recover_arrays);
+MD_EXPORT_SYMBOL(evms_md_get_spare);
+
diff -Naur linux-2002-09-30/drivers/evms/md_linear.c evms-2002-09-30/drivers/evms/md_linear.c
--- linux-2002-09-30/drivers/evms/md_linear.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/md_linear.c	Thu Aug 15 13:50:12 2002
@@ -0,0 +1,285 @@
+/*
+   linear.c : Multiple Devices driver for Linux
+              Copyright (C) 1994-96 Marc ZYNGIER
+	      <zyngier@ufr-info-p7.ibp.fr> or
+	      <maz@gloups.fdn.fr>
+
+   Linear mode management functions.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+   
+   You should have received a copy of the GNU General Public License
+   (for example /usr/src/linux/COPYING); if not, write to the Free
+   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
+*/
+
+#include <linux/module.h>
+#include <linux/evms/evms_md.h>
+#include <linux/evms/evms_linear.h>
+#include <linux/slab.h>
+
+
+#define MAJOR_NR MD_MAJOR
+#define MD_DRIVER
+#define MD_PERSONALITY
+
+#define LOG_PREFIX "md linear: "
+static int linear_run (mddev_t *mddev)
+{
+	linear_conf_t *conf;
+	struct linear_hash *table;
+	mdk_rdev_t *rdev;
+	int size, i, j, nb_zone;
+	unsigned int curr_offset;
+
+	MOD_INC_USE_COUNT;
+
+	conf = kmalloc (sizeof (*conf), GFP_KERNEL);
+	if (!conf)
+		goto out;
+	mddev->private = conf;
+
+	if (evms_md_check_ordering(mddev)) {
+		printk("linear: disks are not ordered, aborting!\n");
+		goto out;
+	}
+
+	/*
+	 * Find the smallest device.
+	 */
+
+	conf->smallest = NULL;
+	curr_offset = 0;
+	ITERATE_RDEV_ORDERED(mddev,rdev,j) {
+		dev_info_t *disk = conf->disks + j;
+		disk->node = rdev->node;
+		disk->dev = rdev->dev;
+		disk->size = rdev->size;
+		disk->offset = curr_offset;
+
+		curr_offset += disk->size;
+
+		if (!conf->smallest || (disk->size < conf->smallest->size))
+			conf->smallest = disk;
+	}
+
+	nb_zone = conf->nr_zones = evms_md_size[mdidx(mddev)] / conf->smallest->size + 
+		((evms_md_size[mdidx(mddev)] % conf->smallest->size) ? 1 : 0);
+  
+	conf->hash_table = kmalloc (sizeof (struct linear_hash) * nb_zone,
+					GFP_KERNEL);
+	if (!conf->hash_table)
+		goto out;
+
+	/*
+	 * Here we generate the linear hash table
+	 */
+	table = conf->hash_table;
+	i = 0;
+	size = 0;
+	for (j = 0; j < mddev->nb_dev; j++) {
+		dev_info_t *disk = conf->disks + j;
+
+		if (size < 0) {
+			table[-1].dev1 = disk;
+		}
+		size += disk->size;
+
+		while (size>0) {
+			table->dev0 = disk;
+			table->dev1 = NULL;
+			size -= conf->smallest->size;
+			table++;
+		}
+	}
+	if (table-conf->hash_table != nb_zone)
+		BUG();
+	LOG_DETAILS("%s: nr_zones=%d, smallest=%lu\n",
+		    __FUNCTION__, conf->nr_zones, conf->smallest->size);
+	return 0;
+
+out:
+	if (conf)
+		kfree(conf);
+	MOD_DEC_USE_COUNT;
+	return 1;
+}
+
+static int linear_stop (mddev_t *mddev)
+{
+	linear_conf_t *conf = mddev_to_conf(mddev);
+  
+	kfree(conf->hash_table);
+	kfree(conf);
+
+	MOD_DEC_USE_COUNT;
+
+	return 0;
+}
+
+/*
+ * Function: linear_map
+ */
+static int linear_map(
+	mddev_t *mddev,
+	struct evms_logical_node **node,
+	struct buffer_head *bh)
+{
+	linear_conf_t *conf = mddev_to_conf(mddev);
+	struct linear_hash *hash;
+	dev_info_t *tmp_dev;
+	unsigned long block;
+
+	block = (bh->b_rsector >> 1);
+	hash = conf->hash_table + (block / conf->smallest->size);
+	if (block >= (hash->dev0->size + hash->dev0->offset)) {
+		if (!hash->dev1) {
+			LOG_ERROR("%s: hash->dev1==NULL for block %ld\n", __FUNCTION__, block);
+			return -ENXIO;
+		}
+		tmp_dev = hash->dev1;
+	} else
+		tmp_dev = hash->dev0;
+    
+	if ( (block + (bh->b_size >> 10)) > (tmp_dev->size + tmp_dev->offset)
+				|| block < tmp_dev->offset) {
+		LOG_ERROR("%s: Block %ld out of bounds on node %s size %ld offset %ld\n",
+			  __FUNCTION__,
+			   block,
+			   tmp_dev->node->name,
+			   tmp_dev->size,
+			   tmp_dev->offset);
+		return -ENXIO;
+	}
+	bh->b_rsector -= (tmp_dev->offset << 1);
+	*node = tmp_dev->node;
+	return 0;
+}
+
+static void linear_read(
+	struct evms_logical_node *md_node,
+	struct buffer_head *bh)
+{
+	mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node);
+	struct evms_logical_node *node;
+
+	if (evms_md_check_boundary(md_node, bh)) return;
+
+	if (!linear_map(mddev, &node, bh)) {
+		R_IO(node, bh);
+	} else {
+		bh->b_end_io(bh, 0);
+	}
+}
+
+static void linear_write(
+	struct evms_logical_node *md_node,
+	struct buffer_head *bh)
+{
+	mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node);
+	struct evms_logical_node *node;
+
+	if (evms_md_check_boundary(md_node, bh)) return;
+
+	if (!linear_map(mddev, &node, bh)) {
+		W_IO(node, bh);
+	} else {
+		bh->b_end_io(bh, 0);
+	}
+}
+
+static int linear_status (char *page, mddev_t *mddev)
+{
+	int sz = 0;
+
+#undef MD_DEBUG
+#ifdef MD_DEBUG
+	int j;
+	linear_conf_t *conf = mddev_to_conf(mddev);
+  
+	sz += sprintf(page+sz, "      ");
+	for (j = 0; j < conf->nr_zones; j++)
+	{
+		sz += sprintf(page+sz, "[%s",
+			partition_name(conf->hash_table[j].dev0->dev));
+
+		if (conf->hash_table[j].dev1)
+			sz += sprintf(page+sz, "/%s] ",
+			  partition_name(conf->hash_table[j].dev1->dev));
+		else
+			sz += sprintf(page+sz, "] ");
+	}
+	sz += sprintf(page+sz, "\n");
+#endif
+	sz += sprintf(page+sz, " %dk rounding", mddev->chunk_size/1024);
+	return sz;
+}
+
+static int linear_evms_ioctl (
+	mddev_t 	* mddev,
+	struct inode 	* inode,
+	struct file 	* file, 
+	unsigned int 	cmd,
+	unsigned long 	arg)
+{
+	int rc = 0;
+	struct evms_logical_node *node;
+
+	switch (cmd) {
+		case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt *bmap = (struct evms_get_bmap_pkt *)arg;
+			struct buffer_head *bh = 
+				evms_cs_allocate_from_pool(evms_bh_pool, FALSE);
+			if (bh) {
+				bh->b_rsector = (unsigned long)bmap->rsector;
+				bh->b_size = node->block_size;
+				rc = linear_map(mddev, &node, bh);
+				if (!rc) {
+					bmap->rsector = (u64)bh->b_rsector;
+					if (node)
+						rc = IOCTL(node, inode, file, cmd, arg);
+					else
+						rc = -ENODEV;
+				}
+				evms_cs_deallocate_to_pool(evms_bh_pool, bh);
+			} else
+				rc = -ENOMEM;
+			break;
+		}
+
+		default:
+			rc = -EINVAL;
+	}
+	return rc;
+}
+
+static mdk_personality_t linear_personality = {
+	.name		= "evms_linear",
+	.read		= linear_read,
+	.write		= linear_write,
+	.run		= linear_run,
+	.stop		= linear_stop,
+	.status		= linear_status,
+	.evms_ioctl	= linear_evms_ioctl
+};
+
+static int md__init linear_init (void)
+{
+	return evms_register_md_personality (LINEAR, &linear_personality);
+}
+
+static void linear_exit (void)
+{
+	evms_unregister_md_personality (LINEAR);
+}
+
+
+module_init(linear_init);
+module_exit(linear_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/md_raid0.c evms-2002-09-30/drivers/evms/md_raid0.c
--- linux-2002-09-30/drivers/evms/md_raid0.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/md_raid0.c	Thu Aug 15 13:50:12 2002
@@ -0,0 +1,448 @@
+/*
+   raid0.c : Multiple Devices driver for Linux
+             Copyright (C) 1994-96 Marc ZYNGIER
+	     <zyngier@ufr-info-p7.ibp.fr> or
+	     <maz@gloups.fdn.fr>
+             Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
+
+
+   RAID-0 management functions.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+   
+   You should have received a copy of the GNU General Public License
+   (for example /usr/src/linux/COPYING); if not, write to the Free
+   Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  
+*/
+
+#include <linux/module.h>
+#include <linux/evms/evms_raid0.h>
+
+#define MAJOR_NR MD_MAJOR
+#define MD_DRIVER
+#define MD_PERSONALITY
+
+#define LOG_PREFIX "md raid0: "
+
+static int create_strip_zones (mddev_t *mddev)
+{
+	int i, c, j, j1, j2;
+	unsigned long current_offset, curr_zone_offset, rdev_size_in_sects;
+	raid0_conf_t *conf = mddev_to_conf(mddev);
+	mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
+ 
+	/*
+	 * The number of 'same size groups'
+	 */
+	conf->nr_strip_zones = 0;
+ 
+	ITERATE_RDEV_ORDERED(mddev,rdev1,j1) {
+		LOG_DEBUG(" looking at %s\n", evms_md_partition_name(rdev1->node));
+		c = 0;
+		ITERATE_RDEV_ORDERED(mddev,rdev2,j2) {
+			LOG_DEBUG("   comparing %s(%ld sectors) with %s(%ld sectors)\n",
+				   evms_md_partition_name(rdev1->node), rdev1->size << 1, 
+				   evms_md_partition_name(rdev2->node), rdev2->size << 1);
+			if (rdev2 == rdev1) {
+				LOG_DEBUG("   END\n");
+				break;
+			}
+			if (rdev2->size == rdev1->size)
+			{
+				/*
+				 * Not unique, dont count it as a new
+				 * group
+				 */
+				LOG_DEBUG("   EQUAL\n");
+				c = 1;
+				break;
+			}
+			LOG_DEBUG("   NOT EQUAL\n");
+		}
+		if (!c) {
+			LOG_DEBUG("   ==> UNIQUE\n");
+			conf->nr_strip_zones++;
+			LOG_DEBUG(" %d zones\n",conf->nr_strip_zones);
+		}
+	}
+	LOG_DEBUG(" FINAL %d zones\n",conf->nr_strip_zones);
+
+	conf->strip_zone = vmalloc(sizeof(struct strip_zone)*
+				conf->nr_strip_zones);
+	if (!conf->strip_zone)
+		return 1;
+
+
+	conf->smallest = NULL;
+	current_offset = 0;
+	curr_zone_offset = 0;
+
+	for (i = 0; i < conf->nr_strip_zones; i++)
+	{
+		struct strip_zone *zone = conf->strip_zone + i;
+
+		LOG_DEBUG(" zone %d\n", i);
+		zone->dev_offset = current_offset;
+		smallest = NULL;
+		c = 0;
+
+		ITERATE_RDEV_ORDERED(mddev,rdev,j) {
+
+			LOG_DEBUG(" checking %s ...",evms_md_partition_name(rdev->node));
+			rdev_size_in_sects = rdev->size << 1;
+			if (rdev_size_in_sects > current_offset)
+			{
+				LOG_DEBUG(" contained as device %d\n", c);
+				zone->node[c] = rdev->node;
+				c++;
+				if (!smallest || (rdev_size_in_sects < (smallest->size <<1) )) {
+					smallest = rdev;
+					LOG_DEBUG("  (%ld) is smallest!.\n", rdev_size_in_sects);
+				}
+			} else
+				LOG_DEBUG(" nope.\n");
+		}
+
+		zone->nb_dev = c;
+		zone->size_in_sects = ((smallest->size <<1) - current_offset) * c;
+		LOG_DEBUG(" zone->nb_dev: %d, size: %ld\n",
+			zone->nb_dev,zone->size_in_sects);
+
+		if (!conf->smallest || (zone->size_in_sects < conf->smallest->size_in_sects))
+			conf->smallest = zone;
+
+		zone->zone_offset = curr_zone_offset;
+		curr_zone_offset += zone->size_in_sects;
+
+		current_offset = smallest->size << 1;
+		LOG_DEBUG(" current zone offset: %ld\n",current_offset);
+	}
+	LOG_DEBUG(" done.\n");
+	return 0;
+}
+
+static int raid0_run (mddev_t *mddev)
+{
+	unsigned long cur=0, i=0, size, zone0_size, nb_zone;
+	unsigned long mddev_size_in_sects = evms_md_size[mdidx(mddev)] << 1;
+	raid0_conf_t *conf;
+
+	MOD_INC_USE_COUNT;
+
+	conf = vmalloc(sizeof (raid0_conf_t));
+	if (!conf)
+		goto out;
+	mddev->private = (void *)conf;
+ 
+	if (evms_md_check_ordering(mddev)) {
+		LOG_ERROR("disks are not ordered, aborting!\n");
+		goto out_free_conf;
+	}
+
+	if (create_strip_zones (mddev)) 
+		goto out_free_conf;
+
+	LOG_DETAILS("evms_md_size is %ld sectors.\n", mddev_size_in_sects);
+	LOG_DETAILS("conf->smallest->size_in_sects is %ld sectors.\n", conf->smallest->size_in_sects);
+	nb_zone = mddev_size_in_sects / conf->smallest->size_in_sects +
+			(mddev_size_in_sects % conf->smallest->size_in_sects ? 1 : 0);
+	LOG_DETAILS("nb_zone is %ld.\n", nb_zone);
+	conf->nr_zones = nb_zone;
+
+	LOG_DEBUG("Allocating %ld bytes for hash.\n", nb_zone*sizeof(struct raid0_hash));
+
+	conf->hash_table = vmalloc (sizeof (struct raid0_hash)*nb_zone);
+	if (!conf->hash_table)
+		goto out_free_zone_conf;
+	size = conf->strip_zone[cur].size_in_sects;
+
+	i = 0;
+	while (cur < conf->nr_strip_zones) {
+		conf->hash_table[i].zone0 = conf->strip_zone + cur;
+
+		/*
+		 * If we completely fill the slot
+		 */
+		if (size >= conf->smallest->size_in_sects) {
+			conf->hash_table[i++].zone1 = NULL;
+			size -= conf->smallest->size_in_sects;
+
+			if (!size) {
+				if (++cur == conf->nr_strip_zones)
+					continue;
+				size = conf->strip_zone[cur].size_in_sects;
+			}
+			continue;
+		}
+		if (++cur == conf->nr_strip_zones) {
+			/*
+			 * Last dev, set unit1 as NULL
+			 */
+			conf->hash_table[i].zone1=NULL;
+			continue;
+		}
+
+		/*
+		 * Here we use a 2nd dev to fill the slot
+		 */
+		zone0_size = size;
+		size = conf->strip_zone[cur].size_in_sects;
+		conf->hash_table[i++].zone1 = conf->strip_zone + cur;
+		size -= (conf->smallest->size_in_sects - zone0_size);
+	}
+	return 0;
+
+out_free_zone_conf:
+	vfree(conf->strip_zone);
+	conf->strip_zone = NULL;
+
+out_free_conf:
+	vfree(conf);
+	mddev->private = NULL;
+out:
+	MOD_DEC_USE_COUNT;
+	return 1;
+}
+
+static int raid0_stop (mddev_t *mddev)
+{
+	raid0_conf_t *conf = mddev_to_conf(mddev);
+
+	vfree (conf->hash_table);
+	conf->hash_table = NULL;
+	vfree (conf->strip_zone);
+	conf->strip_zone = NULL;
+	vfree (conf);
+	mddev->private = NULL;
+
+	MOD_DEC_USE_COUNT;
+	return 0;
+}
+
+
+/*
+ * Function: raid0_map
+ *
+ *	Return 0 for success, else error
+ *
+ */
+
+static inline int raid0_map(
+	mddev_t *mddev, 
+	unsigned long lsn,
+	unsigned long size,
+	struct evms_logical_node **node, 
+	unsigned long *new_lsn,
+	unsigned long *new_size)
+{
+	unsigned int sect_in_chunk, chunksize_bits, chunk_size_in_sects;
+	raid0_conf_t *conf = mddev_to_conf(mddev);
+	struct raid0_hash *hash;
+	struct strip_zone *zone;
+	unsigned long chunk;
+
+	chunk_size_in_sects = mddev->chunk_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	chunksize_bits = ffz(~chunk_size_in_sects);
+	hash = conf->hash_table + (lsn / conf->smallest->size_in_sects);
+
+	/* Sanity check */
+	if (!hash)
+		goto bad_hash;
+
+	if (!hash->zone0)
+		goto bad_zone0;
+ 
+	if (lsn >= (hash->zone0->size_in_sects + hash->zone0->zone_offset)) {
+		if (!hash->zone1)
+			goto bad_zone1;
+		zone = hash->zone1;
+	} else
+		zone = hash->zone0;
+    
+	sect_in_chunk = lsn & (chunk_size_in_sects - 1);
+	chunk = (lsn - zone->zone_offset) / (zone->nb_dev << chunksize_bits);
+	*node = zone->node[(lsn >> chunksize_bits) % zone->nb_dev];
+
+	*new_lsn = ((chunk << chunksize_bits) + zone->dev_offset) + sect_in_chunk;
+
+	*new_size = (size <= chunk_size_in_sects - sect_in_chunk) ? 
+		size : chunk_size_in_sects - sect_in_chunk;
+
+	return 0;
+
+bad_hash:
+	LOG_ERROR("%s: bug: hash==NULL for lsn %lu\n", __FUNCTION__, lsn);
+	goto outerr;
+bad_zone0:
+	LOG_ERROR("%s: bug: hash->zone0==NULL for lsn %lu\n", __FUNCTION__, lsn);
+	goto outerr;
+bad_zone1:
+	LOG_ERROR("%s: bug: hash->zone1==NULL for lsn %lu\n", __FUNCTION__, lsn);
+outerr:
+	return -EINVAL;
+}
+
+void raid0_error(int rw, struct evms_logical_node *node, struct buffer_head *bh)
+{
+	LOG_ERROR(" %s FAILED on node(%s) rsector(%lu) size(%d)\n",
+		(rw == READ) ? "READ" : "WRITE",
+		node->name,
+		bh->b_rsector,
+		bh->b_size);
+
+	bh->b_end_io(bh, 0);
+}
+
+static inline void raid0_rw (
+	struct evms_logical_node *md_node,
+	struct buffer_head *bh,
+	int rw)
+{
+	mddev_t *mddev = EVMS_MD_NODE_TO_MDDEV(md_node);
+	struct evms_logical_node *node;
+	unsigned long new_lsn, size_in_sects, new_size;
+
+	if (evms_md_check_boundary(md_node, bh)) return;
+	size_in_sects = bh->b_size >> EVMS_VSECTOR_SIZE_SHIFT;
+	if (!raid0_map(mddev, bh->b_rsector, size_in_sects, &node, &new_lsn, &new_size)) {
+		if (new_size == size_in_sects) {
+			/*
+			 * This is the normal case:
+			 * the request is entirely within the stripe boundary
+			 */
+			bh->b_rsector = new_lsn;
+			if (rw == READ) {
+				R_IO(node, bh);
+			} else {
+				W_IO(node, bh);
+			}
+			return;
+		} else {
+			/*
+			 * BUGBUG!
+			 * Need more processing here (ie. break up the request)
+			 */
+			LOG_ERROR("This version of EVMS RAID0 does not support I/O requests that are:\n");
+			LOG_ERROR(" - larger than the stripe size\n");
+			LOG_ERROR(" - cross the stripe boundary\n");
+		}
+	}
+	raid0_error(rw, node, bh);
+}
+
+static void raid0_read(
+	struct evms_logical_node *md_node,
+	struct buffer_head *bh)
+{
+	raid0_rw(md_node, bh, READ);
+}
+
+static void raid0_write(
+	struct evms_logical_node *md_node,
+	struct buffer_head *bh)
+{
+	raid0_rw(md_node, bh, WRITE);
+}
+			   
+static int raid0_status (char *page, mddev_t *mddev)
+{
+	int sz = 0;
+#undef MD_DEBUG
+#ifdef MD_DEBUG
+	int j, k;
+	raid0_conf_t *conf = mddev_to_conf(mddev);
+  
+	sz += sprintf(page + sz, "      ");
+	for (j = 0; j < conf->nr_zones; j++) {
+		sz += sprintf(page + sz, "[z%d",
+				conf->hash_table[j].zone0 - conf->strip_zone);
+		if (conf->hash_table[j].zone1)
+			sz += sprintf(page+sz, "/z%d] ",
+				conf->hash_table[j].zone1 - conf->strip_zone);
+		else
+			sz += sprintf(page+sz, "] ");
+	}
+  
+	sz += sprintf(page + sz, "\n");
+  
+	for (j = 0; j < conf->nr_strip_zones; j++) {
+		sz += sprintf(page + sz, "      z%d=[", j);
+		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
+			sz += sprintf (page+sz, "%s/", conf->strip_zone[j].node[k]->name);
+		sz--;
+		sz += sprintf (page+sz, "] zo=%d do=%d s=%d\n",
+				conf->strip_zone[j].zone_offset,
+				conf->strip_zone[j].dev_offset,
+				conf->strip_zone[j].size_in_sects);
+	}
+#endif
+	sz += sprintf(page + sz, " %dk chunks", mddev->chunk_size/1024);
+	return sz;
+}
+
+static int raid0_evms_ioctl (
+	mddev_t 	* mddev,
+	struct inode 	* inode,
+	struct file 	* file, 
+	unsigned int 	cmd,
+	unsigned long 	arg)
+{
+	int rc = 0;
+	struct evms_logical_node *node;
+
+	switch (cmd) {
+		case EVMS_GET_BMAP:
+		{
+			struct evms_get_bmap_pkt *bmap = (struct evms_get_bmap_pkt *)arg;
+			unsigned long new_lsn, new_size;
+			unsigned long size = mddev->node->block_size >> EVMS_VSECTOR_SIZE_SHIFT;
+			rc = raid0_map(mddev,
+				       (unsigned long)bmap->rsector,
+				       size,
+				       &node,
+				       &new_lsn,
+				       &new_size);
+			if (!rc) {
+				if (node) {
+					bmap->rsector = (u64)new_lsn;
+					rc = IOCTL(node, inode, file, cmd, arg);
+				} else
+					rc = -ENODEV;
+			}
+			break;
+		}
+
+		default:
+			rc = -EINVAL;
+	}
+	return rc;
+}
+
+static mdk_personality_t raid0_personality = {
+	.name		= "evms_raid0",
+	.read		= raid0_read,
+	.write		= raid0_write,
+	.run		= raid0_run,
+	.stop		= raid0_stop,
+	.status		= raid0_status,
+	.evms_ioctl	= raid0_evms_ioctl
+};
+
+static int md__init raid0_init (void)
+{
+	return evms_register_md_personality (RAID0, &raid0_personality);
+}
+
+static void raid0_exit (void)
+{
+	evms_unregister_md_personality (RAID0);
+}
+
+module_init(raid0_init);
+module_exit(raid0_exit);
+#ifdef MODULE_LICENSE
+MODULE_LICENSE("GPL");
+#endif
diff -Naur linux-2002-09-30/drivers/evms/md_raid1.c evms-2002-09-30/drivers/evms/md_raid1.c
--- linux-2002-09-30/drivers/evms/md_raid1.c	Wed Dec 31 18:00:00 1969
+++ evms-2002-09-30/drivers/evms/md_raid1.c	Mon Sep 30 00:02:48 2002
@@ -0,0 +1,1935 @@
+/*
+ * md_raid1.c : Multiple Devices driver for Linux
+ *
+ * Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
+ *
+ * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
+ *
+ * RAID-1 management functions.
+ *
+ * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
+ *