--- linux/include/linux/lvm.h.orig Mon Sep 17 22:25:26 2001 +++ linux/include/linux/lvm.h Tue Nov 13 09:46:51 2001 @@ -3,28 +3,28 @@ * kernel/lvm.h * tools/lib/lvm.h * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software + * Copyright (C) 1997 - 2001 Heinz Mauelshagen, Sistina Software * * February-November 1997 * May-July 1998 * January-March,July,September,October,Dezember 1999 * January,February,July,November 2000 - * January 2001 + * January-March,June,July 2001 * * lvm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. - * + * * lvm is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with GNU CC; see the file COPYING. If not, write to * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * Boston, MA 02111-1307, USA. * */ @@ -52,7 +52,7 @@ * 08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit * 01/01/2000 - extended lv_v2 core structure by wait_queue member * 12/02/2000 - integrated Andrea Arcagnelli's snapshot work - * 18/02/2000 - seperated user and kernel space parts by + * 18/02/2000 - seperated user and kernel space parts by * #ifdef them with __KERNEL__ * 08/03/2000 - implemented cluster/shared bits for vg_access * 26/06/2000 - implemented snapshot persistency and resizing support @@ -60,6 +60,17 @@ * 12/11/2000 - removed unneeded timestamp definitions * 24/12/2000 - removed LVM_TO_{CORE,DISK}*, use cpu_{from, to}_le* * instead - Christoph Hellwig + * 22/01/2001 - Change ulong to uint32_t + * 14/02/2001 - changed LVM_SNAPSHOT_MIN_CHUNK to 1 page + * 20/02/2001 - incremented IOP version to 11 because of incompatible + * change in VG activation (in order to support devfs better) + * 01/03/2001 - Revert to IOP10 and add VG_CREATE_OLD call for compatibility + * 08/03/2001 - new lv_t (in core) version number 5: changed page member + * to (struct kiobuf *) to use for COW exception table io + * 26/03/2001 - changed lv_v4 to lv_v5 in structure definition (HM) + * 21/06/2001 - changed BLOCK_SIZE back to 1024 for non S/390 + * 22/06/2001 - added Andreas Dilger's PE on 4k boundary alignment enhancements + * 19/07/2001 - added rwsem compatibility macros for 2.2 kernels * */ @@ -67,9 +78,11 @@ #ifndef _LVM_H_INCLUDE #define _LVM_H_INCLUDE -#define _LVM_KERNEL_H_VERSION "LVM 0.9.1_beta2 (18/01/2001)" +#define LVM_RELEASE_NAME "1.0.1-rc4" +#define LVM_RELEASE_DATE "03/10/2001" + +#define _LVM_KERNEL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")" -#include #include /* @@ -90,13 +103,18 @@ #define DEBUG_READ #define DEBUG_GENDISK #define DEBUG_VG_CREATE - #define DEBUG_LVM_BLK_OPEN + #define DEBUG_DEVICE #define DEBUG_KFREE */ -#endif /* #ifdef __KERNEL__ */ #include #include +#else +#define __KERNEL__ +#include +#include +#undef __KERNEL__ +#endif /* #ifndef __KERNEL__ */ #include #include @@ -107,6 +125,7 @@ #include #endif /* #ifdef __KERNEL__ */ + #include #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR) @@ -117,7 +136,7 @@ #undef BLOCK_SIZE #endif -#ifdef CONFIG_ARCH_S390 +#ifdef CONFIG_ARCH_S390 #define BLOCK_SIZE 4096 #else #define BLOCK_SIZE 1024 @@ -127,24 +146,11 @@ #define SECTOR_SIZE 512 #endif -#define LVM_STRUCT_VERSION 1 /* structure version */ +/* structure version */ +#define LVM_STRUCT_VERSION 1 #define LVM_DIR_PREFIX "/dev/" -/* set the default structure version */ -#if ( LVM_STRUCT_VERSION == 1) -#define pv_t pv_v2_t -#define lv_t lv_v4_t -#define vg_t vg_v3_t -#define pv_disk_t pv_disk_v2_t -#define lv_disk_t lv_disk_v3_t -#define vg_disk_t vg_disk_v2_t -#define lv_block_exception_t lv_block_exception_v1_t -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t -#endif - - - /* * i/o protocol version * @@ -218,40 +224,11 @@ */ /* DONT TOUCH THESE !!! */ -/* base of PV structure in disk partition */ -#define LVM_PV_DISK_BASE 0L -/* size reserved for PV structure on disk */ -#define LVM_PV_DISK_SIZE 1024L -/* base of VG structure in disk partition */ -#define LVM_VG_DISK_BASE LVM_PV_DISK_SIZE -/* size reserved for VG structure */ -#define LVM_VG_DISK_SIZE ( 9 * 512L) - -/* size reserved for timekeeping */ -#define LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE + LVM_VG_DISK_SIZE) -#define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */ - -/* name list of physical volumes on disk */ -#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ - LVM_TIMESTAMP_DISK_SIZE) - -/* now for the dynamically calculated parts of the VGDA */ -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \ - sizeof ( lv_disk_t) * b) -#define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \ - (pv)->pe_on_disk.size) -#define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \ - ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE)) -#define LVM_PE_ON_DISK_BASE(pv) \ - { int rest; \ - pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \ - if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \ - pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \ - } -/* END default disk spaces and offsets for PVs */ + + /* @@ -284,7 +261,7 @@ #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ #define LVM_MAX_MIRRORS 2 /* future use */ #define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ -#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */ +#define LVM_DEFAULT_READ_AHEAD 1024 /* sectors for 512k scsi segments */ #define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ #define LVM_PARTITION 0xfe /* LVM partition id */ @@ -296,28 +273,15 @@ #define LVM_SNAPSHOT_MIN_CHUNK (PAGE_SIZE/1024) /* 4 or 8 KB */ #define UNDEF -1 -#define FALSE 0 -#define TRUE 1 - - -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \ - vg->pe_size / lv->lv_chunk_size) - -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \ -{ \ - int COW_table_entries_per_PE; \ - int COW_table_chunks_per_PE; \ -\ - COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \ - COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \ - COW_table_entries_per_PE - COW_table_chunks_per_PE;}) - /* * ioctls + * FIXME: the last parameter to _IO{W,R,WR} is a data type. The macro will + * expand this using sizeof(), so putting "1" there is misleading + * because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine! */ /* volume group */ -#define VG_CREATE _IOW ( 0xfe, 0x00, 1) +#define VG_CREATE_OLD _IOW ( 0xfe, 0x00, 1) #define VG_REMOVE _IOW ( 0xfe, 0x01, 1) #define VG_EXTEND _IOW ( 0xfe, 0x03, 1) @@ -330,6 +294,8 @@ #define VG_SET_EXTENDABLE _IOW ( 0xfe, 0x08, 1) #define VG_RENAME _IOW ( 0xfe, 0x09, 1) +/* Since 0.9beta6 */ +#define VG_CREATE _IOW ( 0xfe, 0x0a, 1) /* logical volume */ #define LV_CREATE _IOW ( 0xfe, 0x20, 1) @@ -376,7 +342,12 @@ #endif /* lock the logical volume manager */ +#if LVM_DRIVER_IOP_VERSION > 11 +#define LVM_LOCK_LVM _IO ( 0xfe, 0x9A) +#else +/* This is actually the same as _IO ( 0xff, 0x00), oops. Remove for IOP 12+ */ #define LVM_LOCK_LVM _IO ( 0xfe, 0x100) +#endif /* END ioctls */ @@ -412,6 +383,9 @@ #define PV_ALLOCATABLE 0x02 /* pv_allocatable */ +/* misc */ +#define LVM_SNAPSHOT_DROPPED_SECTOR 1 + /* * Structure definitions core/disk follow * @@ -424,21 +398,21 @@ #define UUID_LEN 32 /* don't change!!! */ /* copy on write tables in disk format */ -typedef struct { +typedef struct lv_COW_table_disk_v1 { uint64_t pv_org_number; uint64_t pv_org_rsector; uint64_t pv_snap_number; uint64_t pv_snap_rsector; -} lv_COW_table_disk_v1_t; +} lv_COW_table_disk_t; /* remap physical sector/rdev pairs including hash */ -typedef struct { +typedef struct lv_block_exception_v1 { struct list_head hash; - ulong rsector_org; - kdev_t rdev_org; - ulong rsector_new; - kdev_t rdev_new; -} lv_block_exception_v1_t; + uint32_t rsector_org; + kdev_t rdev_org; + uint32_t rsector_new; + kdev_t rdev_new; +} lv_block_exception_t; /* disk stored pe information */ typedef struct { @@ -454,37 +428,11 @@ /* - * Structure Physical Volume (PV) Version 1 + * physical volume structures */ /* core */ -typedef struct { - char id[2]; /* Identifier */ - unsigned short version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - char pv_name[NAME_LEN]; - char vg_name[NAME_LEN]; - char system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint pv_number; - uint pv_status; - uint pv_allocatable; - uint pv_size; /* HM */ - uint lv_cur; - uint pe_size; - uint pe_total; - uint pe_allocated; - uint pe_stale; /* for future use */ - pe_disk_t *pe; /* HM */ - struct inode *inode; /* HM */ -} pv_v1_t; - -/* core */ -typedef struct { +typedef struct pv_v2 { char id[2]; /* Identifier */ unsigned short version; /* HM lvm version */ lvm_disk_data_t pv_on_disk; @@ -506,36 +454,17 @@ uint pe_allocated; uint pe_stale; /* for future use */ pe_disk_t *pe; /* HM */ - struct inode *inode; /* HM */ + struct block_device *bd; char pv_uuid[UUID_LEN+1]; -} pv_v2_t; +#ifndef __KERNEL__ + uint32_t pe_start; /* in sectors */ +#endif +} pv_t; -/* disk */ -typedef struct { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - uint32_t pv_major; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; -} pv_disk_v1_t; /* disk */ -typedef struct { +typedef struct pv_disk_v2 { uint8_t id[2]; /* Identifier */ uint16_t version; /* HM lvm version */ lvm_disk_data_t pv_on_disk; @@ -555,7 +484,11 @@ uint32_t pe_size; uint32_t pe_total; uint32_t pe_allocated; -} pv_disk_v2_t; + + /* new in struct version 2 */ + uint32_t pe_start; /* in sectors */ + +} pv_disk_t; /* @@ -565,17 +498,17 @@ /* core PE information */ typedef struct { kdev_t dev; - ulong pe; /* to be changed if > 2TB */ - ulong reads; - ulong writes; + uint32_t pe; /* to be changed if > 2TB */ + uint32_t reads; + uint32_t writes; } pe_t; typedef struct { char lv_name[NAME_LEN]; kdev_t old_dev; kdev_t new_dev; - ulong old_pe; - ulong new_pe; + uint32_t old_pe; + uint32_t new_pe; } le_remap_req_t; typedef struct lv_bmap { @@ -588,7 +521,7 @@ */ /* core */ -typedef struct lv_v4 { +typedef struct lv_v5 { char lv_name[NAME_LEN]; char vg_name[NAME_LEN]; uint lv_access; @@ -611,9 +544,9 @@ uint lv_read_ahead; /* delta to version 1 starts here */ - struct lv_v4 *lv_snapshot_org; - struct lv_v4 *lv_snapshot_prev; - struct lv_v4 *lv_snapshot_next; + struct lv_v5 *lv_snapshot_org; + struct lv_v5 *lv_snapshot_prev; + struct lv_v5 *lv_snapshot_next; lv_block_exception_t *lv_block_exception; uint lv_remap_ptr; uint lv_remap_end; @@ -621,23 +554,23 @@ uint lv_snapshot_minor; #ifdef __KERNEL__ struct kiobuf *lv_iobuf; - struct semaphore lv_snapshot_sem; + struct kiobuf *lv_COW_table_iobuf; + struct rw_semaphore lv_lock; struct list_head *lv_snapshot_hash_table; - ulong lv_snapshot_hash_table_size; - ulong lv_snapshot_hash_mask; - struct page *lv_COW_table_page; + uint32_t lv_snapshot_hash_table_size; + uint32_t lv_snapshot_hash_mask; wait_queue_head_t lv_snapshot_wait; int lv_snapshot_use_rate; - void *vg; + struct vg_v3 *vg; uint lv_allocated_snapshot_le; #else char dummy[200]; #endif -} lv_v4_t; +} lv_t; /* disk */ -typedef struct { +typedef struct lv_disk_v3 { uint8_t lv_name[NAME_LEN]; uint8_t vg_name[NAME_LEN]; uint32_t lv_access; @@ -659,36 +592,14 @@ uint32_t lv_allocation; uint32_t lv_io_timeout; /* for future use */ uint32_t lv_read_ahead; /* HM */ -} lv_disk_v3_t; +} lv_disk_t; /* * Structure Volume Group (VG) Version 1 */ /* core */ -typedef struct { - char vg_name[NAME_LEN]; /* volume group name */ - uint vg_number; /* volume group number */ - uint vg_access; /* read/write */ - uint vg_status; /* active or not */ - uint lv_max; /* maximum logical volumes */ - uint lv_cur; /* current logical volumes */ - uint lv_open; /* open logical volumes */ - uint pv_max; /* maximum physical volumes */ - uint pv_cur; /* current physical volumes FU */ - uint pv_act; /* active physical volumes */ - uint dummy; /* was obsolete max_pe_per_pv */ - uint vgda; /* volume group descriptor arrays FU */ - uint pe_size; /* physical extent size in sectors */ - uint pe_total; /* total of physical extents */ - uint pe_allocated; /* allocated physical extents */ - uint pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ -} vg_v1_t; - -typedef struct { +typedef struct vg_v3 { char vg_name[NAME_LEN]; /* volume group name */ uint vg_number; /* volume group number */ uint vg_access; /* read/write */ @@ -716,30 +627,11 @@ #else char dummy1[200]; #endif -} vg_v3_t; +} vg_t; /* disk */ -typedef struct { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ -} vg_disk_v1_t; - -typedef struct { +typedef struct vg_disk_v2 { uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */ uint32_t vg_number; /* volume group number */ @@ -757,7 +649,7 @@ uint32_t pe_total; /* total of physical extents */ uint32_t pe_allocated; /* allocated physical extents */ uint32_t pvg_total; /* physical volume groups FU */ -} vg_disk_v2_t; +} vg_disk_t; /* @@ -785,7 +677,7 @@ struct { kdev_t lv_dev; kdev_t pv_dev; - ulong pv_offset; + uint32_t pv_offset; } data; } pe_lock_req_t; @@ -798,7 +690,7 @@ /* Request structure LV_STATUS_BYINDEX */ typedef struct { - ulong lv_index; + uint32_t lv_index; lv_t *lv; /* Transfer size because user space and kernel space differ */ ushort size; @@ -807,7 +699,7 @@ /* Request structure LV_STATUS_BYDEV... */ typedef struct { dev_t dev; - pv_t *lv; + lv_t *lv; } lv_status_bydev_req_t; @@ -817,4 +709,41 @@ int rate; } lv_snapshot_use_rate_req_t; + + +/* useful inlines */ +static inline ulong round_up(ulong n, ulong size) { + size--; + return (n + size) & ~size; +} + +static inline ulong div_up(ulong n, ulong size) { + return round_up(n, size) / size; +} + +/* FIXME: nasty capital letters */ +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) { + return vg->pe_size / lv->lv_chunk_size; +} + +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) { + ulong chunks = vg->pe_size / lv->lv_chunk_size; + ulong entry_size = sizeof(lv_COW_table_disk_t); + ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE; + ulong entries = (vg->pe_size * SECTOR_SIZE) / + (entry_size + chunk_size); + + if(chunks < 2) + return 0; + + for(; entries; entries--) + if((div_up(entries * entry_size, chunk_size) + entries) <= + chunks) + break; + + return entries; +} + + #endif /* #ifndef _LVM_H_INCLUDE */ + --- linux/drivers/md/lvm.c.orig Thu Oct 25 22:58:35 2001 +++ linux/drivers/md/lvm.c Tue Nov 13 09:46:52 2001 @@ -1,13 +1,13 @@ /* * kernel/lvm.c * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software + * Copyright (C) 1997 - 2001 Heinz Mauelshagen, Sistina Software * * February-November 1997 * April-May,July-August,November 1998 * January-March,May,July,September,October 1999 * January,February,July,September-November 2000 - * January 2001 + * January-April 2001 * * * LVM driver is free software; you can redistribute it and/or modify @@ -43,7 +43,8 @@ * support for free (eg. longer) logical volume names * 12/05/1998 - added spin_locks (thanks to Pascal van Dam * ) - * 25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl() + * 25/05/1998 - fixed handling of locked PEs in lvm_map() and + * lvm_chr_ioctl() * 26/05/1998 - reactivated verify_area by access_ok * 07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go * beyond 128/256 KB max allocation limit per call @@ -125,7 +126,8 @@ * 14/02/2000 - support for 2.3.43 * - integrated Andrea Arcagneli's snapshot code * 25/06/2000 - james (chip) , IKKHAYD! roffl - * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume support + * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume + * support * 06/09/2000 - added devfs support * 07/09/2000 - changed IOP version to 9 * - started to add new char ioctl LV_STATUS_BYDEV_T to support @@ -148,28 +150,87 @@ * procfs is always supported now. (JT) * 12/01/2001 - avoided flushing logical volume in case of shrinking * because of unecessary overhead in case of heavy updates - * 05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it - * destroys stacking devices. call b_end_io on failed maps. - * (Jens Axboe) + * 25/01/2001 - Allow RO open of an inactive LV so it can be reactivated. + * 31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be + * handled by the proper devices. + * - If you try and BMAP a snapshot you now get an -EPERM + * 01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4 + * - factored __remap_snapshot out of lvm_map + * 12/02/2001 - move devfs code to create VG before LVs + * 13/02/2001 - allow VG_CREATE on /dev/lvm + * 14/02/2001 - removed modversions.h + * - tidied device defines for blk.h + * - tidied debug statements + * - bug: vg[] member not set back to NULL if activation fails + * - more lvm_map tidying + * 15/02/2001 - register /dev/lvm with devfs correctly (major/minor + * were swapped) + * 19/02/2001 - preallocated buffer_heads for rawio when using + * snapshots [JT] + * 28/02/2001 - introduced the P_DEV macro and changed some internel + * functions to be static [AD] + * 28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD] + * - fixed user address accessing bug in lvm_do_lv_create() + * where the check for an existing LV takes place right at + * the beginning + * 01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility + * 02/03/2001 - Don't destroy usermode pointers in lv_t structures duing + * LV_STATUS_BYxxx + * and remove redundant lv_t variables from same. + * - avoid compilation of lvm_dummy_device_request in case of + * Linux >= 2.3.0 to avoid a warning + * - added lvm_name argument to printk in buffer allocation + * in order to avoid a warning + * 04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION + * macros + * 05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For + * lvdisplay -v (PC) + * - restore copying pe_t array in lvm_do_lv_status_byindex (HM) + * - added copying pe_t array in lvm_do_lv_status_bydev (HM) + * - enhanced lvm_do_lv_status_by{name,index,dev} to be capable + * to copy the lv_block_exception_t array to userspace (HM) + * 08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots; + * removed obsolete lv_ptr->lv_COW_table_page initialization + * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM) + * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock + * when the locking process closes. + * 05/04/2001 - Defer writes to an extent that is being moved [JT] + * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in + * lvm_map() in order to make stacking devices more happy (HM) + * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the + * rw flag, instead WRITEA's are just dropped [JT] + * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather + * than get_hardblocksize() call + * 03/05/2001 - Use copy_to/from_user to preserve pointers in + * lvm_do_status_by* + * 11/05/2001 - avoid accesses to inactive snapshot data in + * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW) + * 28/05/2001 - implemented missing BLKSSZGET ioctl + * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs + * locked. Make buffer queue flush not need locking. + * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED] + * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have + * the same hardsectsize (very likely) before scanning all LEs + * in the LV each time. [AED] + * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+ + * 01/11/2001 - Backport read_ahead change from Linus kernel [AED] * */ +#include -static char *lvm_version = "LVM version 0.9.1_beta2 by Heinz Mauelshagen (18/01/2001)\n"; -static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)"; - -#define MAJOR_NR LVM_BLK_MAJOR -#define DEVICE_OFF(device) +#define MAJOR_NR LVM_BLK_MAJOR +#define DEVICE_OFF(device) +#define LOCAL_END_REQUEST /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ /* #define LVM_VFS_ENHANCEMENT */ #include -#include #include - #include #include + #include #include @@ -180,6 +241,9 @@ #include #include #include + + +#include #include #include #include @@ -195,7 +259,7 @@ #include #include -#include "lvm-snap.h" +#include "lvm-internal.h" #define LVM_CORRECT_READ_AHEAD(a) \ do { \ @@ -209,24 +273,6 @@ # define WRITEA WRITE #endif -/* debug macros */ -#ifdef DEBUG_IOCTL -#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args) -#else -#define P_IOCTL(fmt, args...) -#endif - -#ifdef DEBUG_MAP -#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args) -#else -#define P_MAP(fmt, args...) -#endif - -#ifdef DEBUG_KFREE -#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args) -#else -#define P_KFREE(fmt, args...) -#endif /* * External function prototypes @@ -236,27 +282,14 @@ static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); -static int lvm_chr_open(struct inode *, struct file *); - -static int lvm_chr_close(struct inode *, struct file *); static int lvm_blk_close(struct inode *, struct file *); +static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg); static int lvm_user_bmap(struct inode *, struct lv_bmap *); +static int lvm_chr_open(struct inode *, struct file *); +static int lvm_chr_close(struct inode *, struct file *); static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); -int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *); -int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *); -int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *); -static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *); - -void lvm_do_create_devfs_entry_of_vg ( vg_t *); - -void lvm_do_create_proc_entry_of_vg ( vg_t *); -void lvm_do_remove_proc_entry_of_vg ( vg_t *); -void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *); -void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *); -void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *); -void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *); /* End external function prototypes */ @@ -288,34 +321,41 @@ static int lvm_do_pv_change(vg_t*, void*); static int lvm_do_pv_status(vg_t *, void *); +static int lvm_do_pv_flush(void *); -static int lvm_do_vg_create(int, void *); +static int lvm_do_vg_create(void *, int minor); static int lvm_do_vg_extend(vg_t *, void *); static int lvm_do_vg_reduce(vg_t *, void *); static int lvm_do_vg_rename(vg_t *, void *); static int lvm_do_vg_remove(int); static void lvm_geninit(struct gendisk *); -static char *lvm_show_uuid ( char *); +static void __update_hardsectsize(lv_t *lv); + + +static void _queue_io(struct buffer_head *bh, int rw); +static struct buffer_head *_dequeue_io(void); +static void _flush_io(struct buffer_head *bh); + +static int _open_pv(pv_t *pv); +static void _close_pv(pv_t *pv); + +static unsigned long _sectors_to_k(unsigned long sect); + #ifdef LVM_HD_NAME void lvm_hd_name(char *, int); #endif /* END Internal function prototypes */ -/* volume group descriptor area pointers */ -static vg_t *vg[ABS_MAX_VG]; +/* variables */ +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")"; +ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; +int loadtime = 0; +const char *const lvm_name = LVM_NAME; -static devfs_handle_t lvm_devfs_handle; -static devfs_handle_t vg_devfs_handle[MAX_VG]; -static devfs_handle_t ch_devfs_handle[MAX_VG]; -static devfs_handle_t lv_devfs_handle[MAX_LV]; - -static pv_t *pvp = NULL; -static lv_t *lvp = NULL; -static pe_t *pep = NULL; -static pe_t *pep1 = NULL; -static char *basename = NULL; +/* volume group descriptor area pointers */ +vg_t *vg[ABS_MAX_VG]; /* map from block minor number to VG and LV numbers */ typedef struct { @@ -327,9 +367,8 @@ /* Request structures (lvm_chr_ioctl()) */ static pv_change_req_t pv_change_req; -static pv_flush_req_t pv_flush_req; static pv_status_req_t pv_status_req; -static pe_lock_req_t pe_lock_req; +volatile static pe_lock_req_t pe_lock_req; static le_remap_req_t le_remap_req; static lv_req_t lv_req; @@ -339,36 +378,29 @@ static char pv_name[NAME_LEN]; /* static char rootvg[NAME_LEN] = { 0, }; */ -const char *const lvm_name = LVM_NAME; static int lock = 0; -static int loadtime = 0; +static int _lock_open_count = 0; static uint vg_count = 0; static long lvm_chr_open_count = 0; -static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION; static DECLARE_WAIT_QUEUE_HEAD(lvm_wait); -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait); static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static struct proc_dir_entry *lvm_proc_dir = NULL; -static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; -struct proc_dir_entry *pde = NULL; +static struct buffer_head *_pe_requests; +static DECLARE_RWSEM(_pe_lock); -static struct file_operations lvm_chr_fops = -{ - owner: THIS_MODULE, + +struct file_operations lvm_chr_fops = { open: lvm_chr_open, release: lvm_chr_close, ioctl: lvm_chr_ioctl, }; - /* block device operations structure needed for 2.3.38? and above */ -static struct block_device_operations lvm_blk_dops = +struct block_device_operations lvm_blk_dops = { - owner: THIS_MODULE, - open: lvm_blk_open, + open: lvm_blk_open, release: lvm_blk_close, ioctl: lvm_blk_ioctl, }; @@ -376,10 +408,10 @@ /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; -static int lvm_blocksizes[MAX_LV] = -{0,}; -static int lvm_size[MAX_LV] = -{0,}; +static int lvm_blocksizes[MAX_LV]; +static int lvm_hardsectsizes[MAX_LV]; +static int lvm_size[MAX_LV]; + static struct gendisk lvm_gendisk = { major: MAJOR_NR, @@ -391,38 +423,33 @@ nr_real: MAX_LV, }; + /* * Driver initialization... */ int lvm_init(void) { - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) { - printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); + if (devfs_register_chrdev(LVM_CHAR_MAJOR, + lvm_name, &lvm_chr_fops) < 0) { + printk(KERN_ERR "%s -- devfs_register_chrdev failed\n", + lvm_name); return -EIO; } - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) + if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) { - printk("%s -- register_blkdev failed\n", lvm_name); - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); + printk("%s -- devfs_register_blkdev failed\n", lvm_name); + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) + printk(KERN_ERR + "%s -- devfs_unregister_chrdev failed\n", + lvm_name); return -EIO; } - lvm_devfs_handle = devfs_register( - 0 , "lvm", 0, 0, LVM_CHAR_MAJOR, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); - - lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); - if (lvm_proc_dir != NULL) { - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); - pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; - } - + lvm_init_fs(); lvm_init_vars(); lvm_geninit(&lvm_gendisk); + /* insert our gendisk at the corresponding major */ add_gendisk(&lvm_gendisk); #ifdef LVM_HD_NAME @@ -433,65 +460,64 @@ blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); + /* initialise the pe lock */ + pe_lock_req.lock = UNLOCK_PE; + /* optional read root VGDA */ /* if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); */ - printk(KERN_INFO - "%s%s -- " #ifdef MODULE - "Module" + printk(KERN_INFO "%s module loaded\n", lvm_version); #else - "Driver" + printk(KERN_INFO "%s\n", lvm_version); #endif - " successfully initialized\n", - lvm_version, lvm_name); return 0; } /* lvm_init() */ - /* * cleanup... */ + static void lvm_cleanup(void) { - devfs_unregister (lvm_devfs_handle); + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) + printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n", + lvm_name); + if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0) + printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n", + lvm_name); - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); - } - if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) { - printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name); - } + /* delete our gendisk from chain */ del_gendisk(&lvm_gendisk); blk_size[MAJOR_NR] = NULL; blksize_size[MAJOR_NR] = NULL; hardsect_size[MAJOR_NR] = NULL; - remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); - remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); - remove_proc_entry(LVM_DIR, &proc_root); - #ifdef LVM_HD_NAME /* reference from linux/drivers/block/genhd.c */ lvm_hd_name_ptr = NULL; #endif + /* unregister with procfs and devfs */ + lvm_fin_fs(); + +#ifdef MODULE printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name); +#endif return; } /* lvm_cleanup() */ - /* * support function to initialize lvm variables */ -void __init lvm_init_vars(void) +static void __init lvm_init_vars(void) { int v; @@ -500,8 +526,8 @@ lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = \ - pe_lock_req.data.pv_dev = \ + pe_lock_req.data.lv_dev = 0; + pe_lock_req.data.pv_dev = 0; pe_lock_req.data.pv_offset = 0; /* Initialize VG pointers */ @@ -524,19 +550,18 @@ * ********************************************************************/ +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \ + (mode) & FMODE_WRITE ? "WRITE" : "" + /* * character device open routine */ -static int lvm_chr_open(struct inode *inode, - struct file *file) +static int lvm_chr_open(struct inode *inode, struct file *file) { int minor = MINOR(inode->i_rdev); -#ifdef DEBUG - printk(KERN_DEBUG - "%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n", - lvm_name, minor, VG_CHR(minor), file->f_mode, lock); -#endif + P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n", + minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock); /* super user validation */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -544,8 +569,15 @@ /* Group special file open */ if (VG_CHR(minor) > MAX_VG) return -ENXIO; + spin_lock(&lvm_lock); + if(lock == current->pid) + _lock_open_count++; + spin_unlock(&lvm_lock); + lvm_chr_open_count++; + MOD_INC_USE_COUNT; + return 0; } /* lvm_chr_open() */ @@ -569,9 +601,8 @@ /* otherwise cc will complain about unused variables */ (void) lvm_lock; - P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d " - "VG#: %d mode: 0x%X\n", - lvm_name, command, minor, VG_CHR(minor), file->f_mode); + P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n", + minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode)); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) return -EACCES; @@ -619,9 +650,13 @@ physical volume (move's done in user space's pvmove) */ return lvm_do_pe_lock_unlock(vg_ptr,arg); - case VG_CREATE: + case VG_CREATE_OLD: /* create a VGDA */ - return lvm_do_vg_create(minor, arg); + return lvm_do_vg_create(arg, minor); + + case VG_CREATE: + /* create a VGDA, assume VG number is filled in */ + return lvm_do_vg_create(arg, -1); case VG_EXTEND: /* extend a volume group */ @@ -672,7 +707,7 @@ case VG_STATUS_GET_NAMELIST: - /* get volume group count */ + /* get volume group names */ for (l = v = 0; v < ABS_MAX_VG; v++) { if (vg[v] != NULL) { if (copy_to_user(arg + l * NAME_LEN, @@ -727,6 +762,7 @@ case LV_STATUS_BYDEV: + /* get status of a logical volume by device */ return lvm_do_lv_status_bydev(vg_ptr, arg); @@ -742,18 +778,12 @@ case PV_FLUSH: /* physical volume buffer flush/invalidate */ - if (copy_from_user(&pv_flush_req, arg, - sizeof(pv_flush_req)) != 0) - return -EFAULT; - - fsync_dev(pv_flush_req.pv_dev); - invalidate_buffers(pv_flush_req.pv_dev); - return 0; + return lvm_do_pv_flush(arg); default: printk(KERN_WARNING - "%s -- lvm_chr_ioctl: unknown command %x\n", + "%s -- lvm_chr_ioctl: unknown command 0x%x\n", lvm_name, command); return -EINVAL; } @@ -767,11 +797,8 @@ */ static int lvm_chr_close(struct inode *inode, struct file *file) { -#ifdef DEBUG - int minor = MINOR(inode->i_rdev); - printk(KERN_DEBUG - "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor)); -#endif + P_DEV("chr_close MINOR: %d VG#: %d\n", + MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev))); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) { @@ -781,10 +808,19 @@ #endif if (lvm_chr_open_count > 0) lvm_chr_open_count--; - if (lock == current->pid) { - lock = 0; /* release lock */ - wake_up_interruptible(&lvm_wait); + + spin_lock(&lvm_lock); + if(lock == current->pid) { + if(!_lock_open_count) { + P_DEV("chr_close: unlocking LVM for pid %d\n", lock); + lock = 0; + wake_up_interruptible(&lvm_wait); + } else + _lock_open_count--; } + spin_unlock(&lvm_lock); + + MOD_DEC_USE_COUNT; return 0; } /* lvm_chr_close() */ @@ -806,11 +842,8 @@ lv_t *lv_ptr; vg_t *vg_ptr = vg[VG_BLK(minor)]; -#ifdef DEBUG_LVM_BLK_OPEN - printk(KERN_DEBUG - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode); -#endif + P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n", + minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode)); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) @@ -827,8 +860,12 @@ if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM; /* Check inactive LV and open for read/write */ - if (!(lv_ptr->lv_status & LV_ACTIVE)) - return -EPERM; + /* We need to be able to "read" an inactive LV + to re-activate it again */ + if ((file->f_mode & FMODE_WRITE) && + (!(lv_ptr->lv_status & LV_ACTIVE))) + return -EPERM; + if (!(lv_ptr->lv_access & LV_WRITE) && (file->f_mode & FMODE_WRITE)) return -EACCES; @@ -838,12 +875,9 @@ if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; lv_ptr->lv_open++; -#ifdef DEBUG_LVM_BLK_OPEN - printk(KERN_DEBUG - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d size: %d\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), - lv_ptr->lv_size); -#endif + MOD_INC_USE_COUNT; + + P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size); return 0; } @@ -863,31 +897,34 @@ void *arg = (void *) a; struct hd_geometry *hd = (struct hd_geometry *) a; - P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %X " - "VG#: %dl LV#: %d\n", - lvm_name, minor, command, (ulong) arg, - VG_BLK(minor), LV_BLK(minor)); + P_IOCTL("blk MINOR: %d command: 0x%X arg: %p VG#: %d LV#: %d " + "mode: %s%s\n", minor, command, arg, VG_BLK(minor), + LV_BLK(minor), MODE_TO_STR(file->f_mode)); switch (command) { + case BLKSSZGET: + /* get block device sector size as needed e.g. by fdisk */ + return put_user(lvm_sectsize(inode->i_rdev), (int *) arg); + case BLKGETSIZE: /* return device size */ - P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n", - lvm_name, lv_ptr->lv_size); + P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size); if (put_user(lv_ptr->lv_size, (unsigned long *)arg)) return -EFAULT; break; +#ifdef BLKGETSIZE64 case BLKGETSIZE64: if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg)) return -EFAULT; break; - +#endif case BLKFLSBUF: /* flush buffer cache */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name); + P_IOCTL("BLKFLSBUF\n"); fsync_dev(inode->i_rdev); invalidate_buffers(inode->i_rdev); @@ -898,8 +935,8 @@ /* set read ahead for block device */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n", - lvm_name, (long) arg, MAJOR(inode->i_rdev), minor); + P_IOCTL("BLKRASET: %ld sectors for %s\n", + (long) arg, kdevname(inode->i_rdev)); if ((long) arg < LVM_MIN_READ_AHEAD || (long) arg > LVM_MAX_READ_AHEAD) @@ -911,7 +948,7 @@ case BLKRAGET: /* get current read ahead setting */ - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name); + P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead); if (put_user(lv_ptr->lv_read_ahead, (long *)arg)) return -EFAULT; break; @@ -937,10 +974,10 @@ copy_to_user((long *) &hd->start, &start, sizeof(start)) != 0) return -EFAULT; - } - P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n", - lvm_name, lv_ptr->lv_size / heads / sectors); + P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n", + lvm_name, cylinders); + } break; @@ -979,40 +1016,11 @@ break; case LV_SNAPSHOT_USE_RATE: - if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM; - { - lv_snapshot_use_rate_req_t lv_snapshot_use_rate_req; - - if (copy_from_user(&lv_snapshot_use_rate_req, arg, - sizeof(lv_snapshot_use_rate_req_t))) - return -EFAULT; - if (lv_snapshot_use_rate_req.rate < 0 || - lv_snapshot_use_rate_req.rate > 100) return -EFAULT; - - switch (lv_snapshot_use_rate_req.block) - { - case 0: - lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate; - if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate) - interruptible_sleep_on (&lv_ptr->lv_snapshot_wait); - break; - - case O_NONBLOCK: - break; - - default: - return -EFAULT; - } - lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end; - if (copy_to_user(arg, &lv_snapshot_use_rate_req, - sizeof(lv_snapshot_use_rate_req_t))) - return -EFAULT; - } - break; + return lvm_get_snapshot_use_rate(lv_ptr, arg); default: printk(KERN_WARNING - "%s -- lvm_blk_ioctl: unknown command %d\n", + "%s -- lvm_blk_ioctl: unknown command 0x%x\n", lvm_name, command); return -EINVAL; } @@ -1030,18 +1038,49 @@ vg_t *vg_ptr = vg[VG_BLK(minor)]; lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)]; -#ifdef DEBUG - printk(KERN_DEBUG - "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n", - lvm_name, minor, VG_BLK(minor), LV_BLK(minor)); -#endif + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n", + minor, VG_BLK(minor), LV_BLK(minor)); if (lv_ptr->lv_open == 1) vg_ptr->lv_open--; lv_ptr->lv_open--; + MOD_DEC_USE_COUNT; + return 0; } /* lvm_blk_close() */ +static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg) +{ + lv_snapshot_use_rate_req_t lv_rate_req; + + if (!(lv->lv_access & LV_SNAPSHOT)) + return -EPERM; + + if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req))) + return -EFAULT; + + if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100) + return -EINVAL; + + switch (lv_rate_req.block) { + case 0: + lv->lv_snapshot_use_rate = lv_rate_req.rate; + if (lv->lv_remap_ptr * 100 / lv->lv_remap_end < + lv->lv_snapshot_use_rate) + interruptible_sleep_on(&lv->lv_snapshot_wait); + break; + + case O_NONBLOCK: + break; + + default: + return -EINVAL; + } + lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end; + + return copy_to_user(arg, &lv_rate_req, + sizeof(lv_rate_req)) ? -EFAULT : 0; +} static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) { @@ -1056,6 +1095,7 @@ bh.b_blocknr = block; bh.b_dev = bh.b_rdev = inode->i_rdev; bh.b_size = lvm_get_blksize(bh.b_dev); + bh.b_rsector = block * (bh.b_size >> 9); if ((err=lvm_map(&bh, READ)) < 0) { printk("lvm map failed: %d\n", err); return -EINVAL; @@ -1063,562 +1103,206 @@ return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ? - -EFAULT : 0; + -EFAULT : 0; } /* - * provide VG info for proc filesystem use (global) + * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c + * (see init_module/lvm_init) */ -int lvm_vg_info(vg_t *vg_ptr, char *buf) { - int sz = 0; - char inactive_flag = ' '; - - if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; - sz = sprintf(buf, - "\nVG: %c%s [%d PV, %d LV/%d open] " - " PE Size: %d KB\n" - " Usage [KB/PE]: %d /%d total " - "%d /%d used %d /%d free", - inactive_flag, - vg_ptr->vg_name, - vg_ptr->pv_cur, - vg_ptr->lv_cur, - vg_ptr->lv_open, - vg_ptr->pe_size >> 1, - vg_ptr->pe_size * vg_ptr->pe_total >> 1, - vg_ptr->pe_total, - vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, - vg_ptr->pe_allocated, - (vg_ptr->pe_total - vg_ptr->pe_allocated) * - vg_ptr->pe_size >> 1, - vg_ptr->pe_total - vg_ptr->pe_allocated); - return sz; -} +static void __remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + /* copy a chunk from the origin to a snapshot device */ + down_write(&lv->lv_lock); -/* - * provide LV info for proc filesystem use (global) - */ -int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { - int sz = 0; - char inactive_flag = 'A', allocation_flag = ' ', - stripes_flag = ' ', rw_flag = ' '; - - if (!(lv_ptr->lv_status & LV_ACTIVE)) - inactive_flag = 'I'; - rw_flag = 'R'; - if (lv_ptr->lv_access & LV_WRITE) - rw_flag = 'W'; - allocation_flag = 'D'; - if (lv_ptr->lv_allocation & LV_CONTIGUOUS) - allocation_flag = 'C'; - stripes_flag = 'L'; - if (lv_ptr->lv_stripes > 1) - stripes_flag = 'S'; - sz += sprintf(buf+sz, - "[%c%c%c%c", - inactive_flag, - rw_flag, - allocation_flag, - stripes_flag); - if (lv_ptr->lv_stripes > 1) - sz += sprintf(buf+sz, "%-2d", - lv_ptr->lv_stripes); - else - sz += sprintf(buf+sz, " "); - basename = strrchr(lv_ptr->lv_name, '/'); - if ( basename == 0) basename = lv_ptr->lv_name; - else basename++; - sz += sprintf(buf+sz, "] %-25s", basename); - if (strlen(basename) > 25) - sz += sprintf(buf+sz, - "\n "); - sz += sprintf(buf+sz, "%9d /%-6d ", - lv_ptr->lv_size >> 1, - lv_ptr->lv_size / vg_ptr->pe_size); + /* we must redo lvm_snapshot_remap_block in order to avoid a + race condition in the gap where no lock was held */ + if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && + !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv)) + lvm_write_COW_table_block(vg, lv); - if (lv_ptr->lv_open == 0) - sz += sprintf(buf+sz, "close"); - else - sz += sprintf(buf+sz, "%dx open", - lv_ptr->lv_open); - - return sz; + up_write(&lv->lv_lock); } - -/* - * provide PV info for proc filesystem use (global) - */ -int lvm_pv_info(pv_t *pv_ptr, char *buf) { - int sz = 0; - char inactive_flag = 'A', allocation_flag = ' '; - char *pv_name = NULL; - - if (!(pv_ptr->pv_status & PV_ACTIVE)) - inactive_flag = 'I'; - allocation_flag = 'A'; - if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) - allocation_flag = 'N'; - pv_name = strrchr(pv_ptr->pv_name+1,'/'); - if ( pv_name == 0) pv_name = pv_ptr->pv_name; - else pv_name++; - sz = sprintf(buf, - "[%c%c] %-21s %8d /%-6d " - "%8d /%-6d %8d /%-6d", - inactive_flag, - allocation_flag, - pv_name, - pv_ptr->pe_total * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total, - pv_ptr->pe_allocated * - pv_ptr->pe_size >> 1, - pv_ptr->pe_allocated, - (pv_ptr->pe_total - - pv_ptr->pe_allocated) * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total - - pv_ptr->pe_allocated); - return sz; +static inline void _remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { + int r; + + /* check to see if this chunk is already in the snapshot */ + down_read(&lv->lv_lock); + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); + up_read(&lv->lv_lock); + + if (!r) + /* we haven't yet copied this block to the snapshot */ + __remap_snapshot(rdev, rsector, pe_start, lv, vg); } /* - * Support functions /proc-Filesystem + * extents destined for a pe that is on the move should be deferred */ +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) { + return ((pe_lock_req.lock == LOCK_PE) && + (pv == pe_lock_req.data.pv_dev) && + (sector >= pe_lock_req.data.pv_offset) && + (sector < (pe_lock_req.data.pv_offset + pe_size))); +} -#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) - -/* - * provide global LVM information - */ -static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data) +static inline int _defer_extent(struct buffer_head *bh, int rw, + kdev_t pv, ulong sector, uint32_t pe_size) { - int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, - lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; - static off_t sz; - off_t sz_last; - static char *buf = NULL; - static char dummy_buf[160]; /* sized for 2 lines */ - vg_t *vg_ptr; - lv_t *lv_ptr; - pv_t *pv_ptr; - - -#ifdef DEBUG_LVM_PROC_GET_INFO - printk(KERN_DEBUG - "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n", - lvm_name, pos, count, whence); -#endif - - MOD_INC_USE_COUNT; - - if (pos == 0 || buf == NULL) { - sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ - lv_open_total = pe_t_bytes = hash_table_bytes = \ - lv_block_exception_t_bytes = 0; - - /* search for activity */ - for (v = 0; v < ABS_MAX_VG; v++) { - if ((vg_ptr = vg[v]) != NULL) { - vg_counter++; - pv_counter += vg_ptr->pv_cur; - lv_counter += vg_ptr->lv_cur; - if (vg_ptr->lv_cur > 0) { - for (l = 0; l < vg[v]->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - pe_t_bytes += lv_ptr->lv_allocated_le; - hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; - if (lv_ptr->lv_block_exception != NULL) - lv_block_exception_t_bytes += lv_ptr->lv_remap_end; - if (lv_ptr->lv_open > 0) { - lv_open_counter++; - lv_open_total += lv_ptr->lv_open; - } - } - } - } - } - } - pe_t_bytes *= sizeof(pe_t); - lv_block_exception_t_bytes *= sizeof(lv_block_exception_t); - - if (buf != NULL) { - P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__); - lock_kernel(); - vfree(buf); - unlock_kernel(); - buf = NULL; - } - /* 2 times: first to get size to allocate buffer, - 2nd to fill the malloced buffer */ - for (i = 0; i < 2; i++) { - sz = 0; - sz += sprintf(LVM_PROC_BUF, - "LVM " -#ifdef MODULE - "module" -#else - "driver" -#endif - " %s\n\n" - "Total: %d VG%s %d PV%s %d LV%s ", - lvm_short_version, - vg_counter, vg_counter == 1 ? "" : "s", - pv_counter, pv_counter == 1 ? "" : "s", - lv_counter, lv_counter == 1 ? "" : "s"); - sz += sprintf(LVM_PROC_BUF, - "(%d LV%s open", - lv_open_counter, - lv_open_counter == 1 ? "" : "s"); - if (lv_open_total > 0) - sz += sprintf(LVM_PROC_BUF, - " %d times)\n", - lv_open_total); - else - sz += sprintf(LVM_PROC_BUF, ")"); - sz += sprintf(LVM_PROC_BUF, - "\nGlobal: %lu bytes malloced IOP version: %d ", - vg_counter * sizeof(vg_t) + - pv_counter * sizeof(pv_t) + - lv_counter * sizeof(lv_t) + - pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, - lvm_iop_version); - - seconds = CURRENT_TIME - loadtime; - if (seconds < 0) - loadtime = CURRENT_TIME + seconds; - if (seconds / 86400 > 0) { - sz += sprintf(LVM_PROC_BUF, "%d day%s ", - seconds / 86400, - seconds / 86400 == 0 || - seconds / 86400 > 1 ? "s" : ""); - } - sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n", - (seconds % 86400) / 3600, - (seconds % 3600) / 60, - seconds % 60); - - if (vg_counter > 0) { - for (v = 0; v < ABS_MAX_VG; v++) { - /* volume group */ - if ((vg_ptr = vg[v]) != NULL) { - sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF); - - /* physical volumes */ - sz += sprintf(LVM_PROC_BUF, - "\n PV%s ", - vg_ptr->pv_cur == 1 ? ": " : "s:"); - c = 0; - for (p = 0; p < vg_ptr->pv_max; p++) { - if ((pv_ptr = vg_ptr->pv[p]) != NULL) { - sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF); - - c++; - if (c < vg_ptr->pv_cur) - sz += sprintf(LVM_PROC_BUF, - "\n "); - } - } - - /* logical volumes */ - sz += sprintf(LVM_PROC_BUF, - "\n LV%s ", - vg_ptr->lv_cur == 1 ? ": " : "s:"); - c = 0; - for (l = 0; l < vg_ptr->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); - c++; - if (c < vg_ptr->lv_cur) - sz += sprintf(LVM_PROC_BUF, - "\n "); - } - } - if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none"); - sz += sprintf(LVM_PROC_BUF, "\n"); - } - } - } - if (buf == NULL) { - lock_kernel(); - buf = vmalloc(sz); - unlock_kernel(); - if (buf == NULL) { - sz = 0; - MOD_DEC_USE_COUNT; - return sprintf(page, "%s - vmalloc error at line %d\n", - lvm_name, __LINE__); - } - } - sz_last = sz; + if (pe_lock_req.lock == LOCK_PE) { + down_read(&_pe_lock); + if (_should_defer(pv, sector, pe_size)) { + up_read(&_pe_lock); + down_write(&_pe_lock); + if (_should_defer(pv, sector, pe_size)) + _queue_io(bh, rw); + up_write(&_pe_lock); + return 1; } + up_read(&_pe_lock); } - MOD_DEC_USE_COUNT; - if (pos > sz - 1) { - lock_kernel(); - vfree(buf); - unlock_kernel(); - buf = NULL; - return 0; - } - *start = &buf[pos]; - if (sz - pos < count) - return sz - pos; - else - return count; -} /* lvm_proc_get_global_info() */ - - -/* - * provide VG information - */ -int lvm_proc_read_vg_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - vg_t *vg = data; - - sz += sprintf ( page+sz, "name: %s\n", vg->vg_name); - sz += sprintf ( page+sz, "size: %u\n", - vg->pe_total * vg->pe_size / 2); - sz += sprintf ( page+sz, "access: %u\n", vg->vg_access); - sz += sprintf ( page+sz, "status: %u\n", vg->vg_status); - sz += sprintf ( page+sz, "number: %u\n", vg->vg_number); - sz += sprintf ( page+sz, "LV max: %u\n", vg->lv_max); - sz += sprintf ( page+sz, "LV current: %u\n", vg->lv_cur); - sz += sprintf ( page+sz, "LV open: %u\n", vg->lv_open); - sz += sprintf ( page+sz, "PV max: %u\n", vg->pv_max); - sz += sprintf ( page+sz, "PV current: %u\n", vg->pv_cur); - sz += sprintf ( page+sz, "PV active: %u\n", vg->pv_act); - sz += sprintf ( page+sz, "PE size: %u\n", vg->pe_size / 2); - sz += sprintf ( page+sz, "PE total: %u\n", vg->pe_total); - sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated); - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(vg->vg_uuid)); - - return sz; -} - - -/* - * provide LV information - */ -int lvm_proc_read_lv_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - lv_t *lv = data; - - sz += sprintf ( page+sz, "name: %s\n", lv->lv_name); - sz += sprintf ( page+sz, "size: %u\n", lv->lv_size); - sz += sprintf ( page+sz, "access: %u\n", lv->lv_access); - sz += sprintf ( page+sz, "status: %u\n", lv->lv_status); - sz += sprintf ( page+sz, "number: %u\n", lv->lv_number); - sz += sprintf ( page+sz, "open: %u\n", lv->lv_open); - sz += sprintf ( page+sz, "allocation: %u\n", lv->lv_allocation); - sz += sprintf ( page+sz, "device: %02u:%02u\n", - MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); - - return sz; -} - - -/* - * provide PV information - */ -int lvm_proc_read_pv_info(char *page, char **start, off_t off, - int count, int *eof, void *data) { - int sz = 0; - pv_t *pv = data; - - sz += sprintf ( page+sz, "name: %s\n", pv->pv_name); - sz += sprintf ( page+sz, "size: %u\n", pv->pv_size); - sz += sprintf ( page+sz, "status: %u\n", pv->pv_status); - sz += sprintf ( page+sz, "number: %u\n", pv->pv_number); - sz += sprintf ( page+sz, "allocatable: %u\n", pv->pv_allocatable); - sz += sprintf ( page+sz, "LV current: %u\n", pv->lv_cur); - sz += sprintf ( page+sz, "PE size: %u\n", pv->pe_size / 2); - sz += sprintf ( page+sz, "PE total: %u\n", pv->pe_total); - sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated); - sz += sprintf ( page+sz, "device: %02u:%02u\n", - MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(pv->pv_uuid)); - - - return sz; + return 0; } -/* - * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c - * (see init_module/lvm_init) - */ static int lvm_map(struct buffer_head *bh, int rw) { int minor = MINOR(bh->b_rdev); - int ret = 0; ulong index; ulong pe_start; ulong size = bh->b_size >> 9; - ulong rsector_tmp = bh->b_rsector; - ulong rsector_sav; - kdev_t rdev_tmp = bh->b_rdev; - kdev_t rdev_sav; + ulong rsector_org = bh->b_rsector; + ulong rsector_map; + kdev_t rdev_map; vg_t *vg_this = vg[VG_BLK(minor)]; lv_t *lv = vg_this->lv[LV_BLK(minor)]; + down_read(&lv->lv_lock); if (!(lv->lv_status & LV_ACTIVE)) { printk(KERN_ALERT "%s - lvm_map: ll_rw_blk for inactive LV %s\n", lvm_name, lv->lv_name); - return -1; + goto bad; } if ((rw == WRITE || rw == WRITEA) && !(lv->lv_access & LV_WRITE)) { printk(KERN_CRIT - "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", + "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", lvm_name, lv->lv_name); - return -1; + goto bad; } - P_MAP("%s - lvm_map minor:%d *rdev: %02d:%02d *rsector: %lu " - "size:%lu\n", + P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n", lvm_name, minor, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp, size); + kdevname(bh->b_rdev), + rsector_org, size); - if (rsector_tmp + size > lv->lv_size) { + if (rsector_org + size > lv->lv_size) { printk(KERN_ALERT "%s - lvm_map access beyond end of device; *rsector: " "%lu or size: %lu wrong for minor: %2d\n", - lvm_name, rsector_tmp, size, minor); - return -1; + lvm_name, rsector_org, size, minor); + goto bad; } - rsector_sav = rsector_tmp; - rdev_sav = rdev_tmp; -lvm_second_remap: - /* linear mapping */ - if (lv->lv_stripes < 2) { + + if (lv->lv_stripes < 2) { /* linear mapping */ /* get the index */ - index = rsector_tmp / vg_this->pe_size; + index = rsector_org / vg_this->pe_size; pe_start = lv->lv_current_pe[index].pe; - rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % vg_this->pe_size); - rdev_tmp = lv->lv_current_pe[index].dev; - - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d " - "rsector:%ld\n", - index, - lv->lv_current_pe[index].pe, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp); + rsector_map = lv->lv_current_pe[index].pe + + (rsector_org % vg_this->pe_size); + rdev_map = lv->lv_current_pe[index].dev; + + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n", + index, lv->lv_current_pe[index].pe, + kdevname(rdev_map), rsector_map); - /* striped mapping */ - } else { + } else { /* striped mapping */ ulong stripe_index; ulong stripe_length; stripe_length = vg_this->pe_size * lv->lv_stripes; - stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize; - index = rsector_tmp / stripe_length + - (stripe_index % lv->lv_stripes) * - (lv->lv_allocated_le / lv->lv_stripes); + stripe_index = (rsector_org % stripe_length) / + lv->lv_stripesize; + index = rsector_org / stripe_length + + (stripe_index % lv->lv_stripes) * + (lv->lv_allocated_le / lv->lv_stripes); pe_start = lv->lv_current_pe[index].pe; - rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % stripe_length) - - (stripe_index % lv->lv_stripes) * lv->lv_stripesize - - stripe_index / lv->lv_stripes * - (lv->lv_stripes - 1) * lv->lv_stripesize; - rdev_tmp = lv->lv_current_pe[index].dev; - } - - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d rsector:%ld\n" - "stripe_length: %ld stripe_index: %ld\n", - index, - lv->lv_current_pe[index].pe, - MAJOR(rdev_tmp), - MINOR(rdev_tmp), - rsector_tmp, - stripe_length, - stripe_index); + rsector_map = lv->lv_current_pe[index].pe + + (rsector_org % stripe_length) - + (stripe_index % lv->lv_stripes) * lv->lv_stripesize - + stripe_index / lv->lv_stripes * + (lv->lv_stripes - 1) * lv->lv_stripesize; + rdev_map = lv->lv_current_pe[index].dev; + + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n" + "stripe_length: %ld stripe_index: %ld\n", + index, lv->lv_current_pe[index].pe, kdevname(rdev_map), + rsector_map, stripe_length, stripe_index); + } + + /* + * Queue writes to physical extents on the move until move completes. + * Don't get _pe_lock until there is a reasonable expectation that + * we need to queue this request, because this is in the fast path. + */ + if (rw == WRITE || rw == WRITEA) { + if(_defer_extent(bh, rw, rdev_map, + rsector_map, vg_this->pe_size)) { - /* handle physical extents on the move */ - if (pe_lock_req.lock == LOCK_PE) { - if (rdev_tmp == pe_lock_req.data.pv_dev && - rsector_tmp >= pe_lock_req.data.pv_offset && - rsector_tmp < (pe_lock_req.data.pv_offset + - vg_this->pe_size)) { - sleep_on(&lvm_map_wait); - rsector_tmp = rsector_sav; - rdev_tmp = rdev_sav; - goto lvm_second_remap; + up_read(&lv->lv_lock); + return 0; } - } - /* statistic */ - if (rw == WRITE || rw == WRITEA) - lv->lv_current_pe[index].writes++; - else - lv->lv_current_pe[index].reads++; + + lv->lv_current_pe[index].writes++; /* statistic */ + } else + lv->lv_current_pe[index].reads++; /* statistic */ /* snapshot volume exception handling on physical device address base */ - if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) { - /* original logical volume */ - if (lv->lv_access & LV_SNAPSHOT_ORG) { - /* Serializes the access to the lv_snapshot_next list */ - down(&lv->lv_snapshot_sem); - if (rw == WRITE || rw == WRITEA) - { - lv_t *lv_ptr; - - /* start with first snapshot and loop thrugh all of them */ - for (lv_ptr = lv->lv_snapshot_next; - lv_ptr != NULL; - lv_ptr = lv_ptr->lv_snapshot_next) { - /* Check for inactive snapshot */ - if (!(lv_ptr->lv_status & LV_ACTIVE)) continue; - /* Serializes the COW with the accesses to the snapshot device */ - down(&lv_ptr->lv_snapshot_sem); - /* do we still have exception storage for this snapshot free? */ - if (lv_ptr->lv_block_exception != NULL) { - rdev_sav = rdev_tmp; - rsector_sav = rsector_tmp; - if (!lvm_snapshot_remap_block(&rdev_tmp, - &rsector_tmp, - pe_start, - lv_ptr)) { - /* create a new mapping */ - if (!(ret = lvm_snapshot_COW(rdev_tmp, - rsector_tmp, - pe_start, - rsector_sav, - lv_ptr))) - ret = lvm_write_COW_table_block(vg_this, - lv_ptr); - } - rdev_tmp = rdev_sav; - rsector_tmp = rsector_sav; - } - up(&lv_ptr->lv_snapshot_sem); - } - } - up(&lv->lv_snapshot_sem); - } else { - /* remap snapshot logical volume */ - down(&lv->lv_snapshot_sem); - if (lv->lv_block_exception != NULL) - lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv); - up(&lv->lv_snapshot_sem); + if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG))) + goto out; + + if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */ + if (lvm_snapshot_remap_block(&rdev_map, &rsector_map, + pe_start, lv) < 0) + goto bad; + + } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */ + lv_t *snap; + + /* start with first snapshot and loop through all of + them */ + for (snap = lv->lv_snapshot_next; snap; + snap = snap->lv_snapshot_next) { + /* Check for inactive snapshot */ + if (!(snap->lv_status & LV_ACTIVE)) + continue; + + /* Serializes the COW with the accesses to the + snapshot device */ + _remap_snapshot(rdev_map, rsector_map, + pe_start, snap, vg_this); } } - bh->b_rdev = rdev_tmp; - bh->b_rsector = rsector_tmp; - return ret; + out: + bh->b_rdev = rdev_map; + bh->b_rsector = rsector_map; + up_read(&lv->lv_lock); + return 1; + + bad: + buffer_IO_error(bh); + up_read(&lv->lv_lock); + return -1; } /* lvm_map() */ @@ -1646,6 +1330,8 @@ #endif + + /* * make request function */ @@ -1653,11 +1339,7 @@ int rw, struct buffer_head *bh) { - if (lvm_map(bh, rw) >= 0) - return 1; - - buffer_IO_error(bh); - return 0; + return (lvm_map(bh, rw) <= 0) ? 0 : 1; } @@ -1674,8 +1356,7 @@ lock_try_again: spin_lock(&lvm_lock); if (lock != 0 && lock != current->pid) { - P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n", - lvm_name, lock); + P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock); spin_unlock(&lvm_lock); interruptible_sleep_on(&lvm_wait); if (current->sigpending != 0) @@ -1687,6 +1368,7 @@ goto lock_try_again; } lock = current->pid; + P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock); spin_unlock(&lvm_lock); return 0; } /* lvm_do_lock_lvm */ @@ -1697,33 +1379,60 @@ */ static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) { + pe_lock_req_t new_lock; + struct buffer_head *bh; uint p; if (vg_ptr == NULL) return -ENXIO; - if (copy_from_user(&pe_lock_req, arg, - sizeof(pe_lock_req_t)) != 0) return -EFAULT; + if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0) + return -EFAULT; - switch (pe_lock_req.lock) { + switch (new_lock.lock) { case LOCK_PE: for (p = 0; p < vg_ptr->pv_max; p++) { if (vg_ptr->pv[p] != NULL && - pe_lock_req.data.pv_dev == - vg_ptr->pv[p]->pv_dev) + new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev) break; } if (p == vg_ptr->pv_max) return -ENXIO; - pe_lock_req.lock = UNLOCK_PE; + /* + * this sync releaves memory pressure to lessen the + * likelyhood of pvmove being paged out - resulting in + * deadlock. + * + * This method of doing a pvmove is broken + */ fsync_dev(pe_lock_req.data.lv_dev); + + down_write(&_pe_lock); + if (pe_lock_req.lock == LOCK_PE) { + up_write(&_pe_lock); + return -EBUSY; + } + + /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */ pe_lock_req.lock = LOCK_PE; + pe_lock_req.data.lv_dev = new_lock.data.lv_dev; + pe_lock_req.data.pv_dev = new_lock.data.pv_dev; + pe_lock_req.data.pv_offset = new_lock.data.pv_offset; + up_write(&_pe_lock); + + /* some requests may have got through since the fsync */ + fsync_dev(pe_lock_req.data.pv_dev); break; case UNLOCK_PE: + down_write(&_pe_lock); pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = \ - pe_lock_req.data.pv_dev = \ + pe_lock_req.data.lv_dev = 0; + pe_lock_req.data.pv_dev = 0; pe_lock_req.data.pv_offset = 0; - wake_up(&lvm_map_wait); + bh = _dequeue_io(); + up_write(&_pe_lock); + + /* handle all deferred io for this PE */ + _flush_io(bh); break; default: @@ -1760,6 +1469,8 @@ le_remap_req.new_dev; lv_ptr->lv_current_pe[le].pe = le_remap_req.new_pe; + + __update_hardsectsize(lv_ptr); return 0; } } @@ -1773,7 +1484,7 @@ /* * character device support function VGDA create */ -int lvm_do_vg_create(int minor, void *arg) +static int lvm_do_vg_create(void *arg, int minor) { int ret = 0; ulong l, ls = 0, p, size; @@ -1781,8 +1492,6 @@ vg_t *vg_ptr; lv_t **snap_lv_ptr; - if (vg[VG_CHR(minor)] != NULL) return -EPERM; - if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) { printk(KERN_CRIT "%s -- VG_CREATE: kmalloc error VG at line %d\n", @@ -1791,35 +1500,51 @@ } /* get the volume group structure */ if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) { + P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n", + arg, sizeof(vg_t)); kfree(vg_ptr); return -EFAULT; } + /* VG_CREATE now uses minor number in VG structure */ + if (minor == -1) minor = vg_ptr->vg_number; + + /* Validate it */ + if (vg[VG_CHR(minor)] != NULL) { + P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor); + kfree(vg_ptr); + return -EPERM; + } + /* we are not that active so far... */ vg_ptr->vg_status &= ~VG_ACTIVE; - vg[VG_CHR(minor)] = vg_ptr; - vg[VG_CHR(minor)]->pe_allocated = 0; + vg_ptr->pe_allocated = 0; if (vg_ptr->pv_max > ABS_MAX_PV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_PV too small\n", lvm_name); kfree(vg_ptr); - vg[VG_CHR(minor)] = NULL; return -EPERM; } + if (vg_ptr->lv_max > ABS_MAX_LV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n", lvm_name, vg_ptr->lv_max); kfree(vg_ptr); - vg_ptr = NULL; return -EPERM; } + /* create devfs and procfs entries */ + lvm_fs_create_vg(vg_ptr); + + vg[VG_CHR(minor)] = vg_ptr; + /* get the physical volume structures */ vg_ptr->pv_act = vg_ptr->pv_cur = 0; for (p = 0; p < vg_ptr->pv_max; p++) { + pv_t *pvp; /* user space address */ if ((pvp = vg_ptr->pv[p]) != NULL) { ret = lvm_do_pv_create(pvp, vg_ptr, p); @@ -1843,9 +1568,12 @@ /* get the logical volume structures */ vg_ptr->lv_cur = 0; for (l = 0; l < vg_ptr->lv_max; l++) { + lv_t *lvp; /* user space address */ if ((lvp = vg_ptr->lv[l]) != NULL) { if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { + P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n", + lvp, sizeof(lv_t)); lvm_do_vg_remove(minor); return -EFAULT; } @@ -1864,12 +1592,10 @@ } } - lvm_do_create_devfs_entry_of_vg ( vg_ptr); - /* Second path to correct snapshot logical volumes which are not in place during first path above */ for (l = 0; l < ls; l++) { - lvp = snap_lv_ptr[l]; + lv_t *lvp = snap_lv_ptr[l]; if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { lvm_do_vg_remove(minor); return -EFAULT; @@ -1880,8 +1606,6 @@ } } - lvm_do_create_proc_entry_of_vg ( vg_ptr); - vfree(snap_lv_ptr); vg_count++; @@ -1913,7 +1637,6 @@ if ( ret != 0) return ret; pv_ptr = vg_ptr->pv[p]; vg_ptr->pe_total += pv_ptr->pe_total; - lvm_do_create_proc_entry_of_pv(vg_ptr, pv_ptr); return 0; } } @@ -1963,10 +1686,13 @@ lv_t *lv_ptr = NULL; pv_t *pv_ptr = NULL; + /* If the VG doesn't exist in the kernel then just exit */ + if (!vg_ptr) return 0; + if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0) return -EFAULT; - lvm_do_remove_proc_entry_of_vg ( vg_ptr); + lvm_fs_remove_vg(vg_ptr); strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1); for ( l = 0; l < vg_ptr->lv_max; l++) @@ -1988,7 +1714,7 @@ strncpy(pv_ptr->vg_name, vg_name, NAME_LEN); } - lvm_do_create_proc_entry_of_vg ( vg_ptr); + lvm_fs_create_vg(vg_ptr); return 0; } /* lvm_do_vg_rename */ @@ -2015,6 +1741,9 @@ /* let's go inactive */ vg_ptr->vg_status &= ~VG_ACTIVE; + /* remove from procfs and devfs */ + lvm_fs_remove_vg(vg_ptr); + /* free LVs */ /* first free snapshot logical volumes */ for (i = 0; i < vg_ptr->lv_max; i++) { @@ -2042,11 +1771,6 @@ } } - devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); - devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); - - lvm_do_remove_proc_entry_of_vg ( vg_ptr); - P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); kfree(vg_ptr); vg[VG_CHR(minor)] = NULL; @@ -2063,66 +1787,138 @@ * character device support function physical volume create */ static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) { - pv_t *pv_ptr = NULL; + pv_t *pv; + int err; - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); - if (pv_ptr == NULL) { + pv = kmalloc(sizeof(pv_t),GFP_KERNEL); + if (pv == NULL) { printk(KERN_CRIT - "%s -- VG_CREATE: kmalloc error PV at line %d\n", + "%s -- PV_CREATE: kmalloc error PV at line %d\n", lvm_name, __LINE__); return -ENOMEM; } - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + + memset(pv, 0, sizeof(*pv)); + + if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) { + P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n", + pvp, sizeof(pv_t)); + kfree(pv); return -EFAULT; } + + if ((err = _open_pv(pv))) { + kfree(pv); + return err; + } + /* We don't need the PE list in kernel space as with LVs pe_t list (see below) */ - pv_ptr->pe = NULL; - pv_ptr->pe_allocated = 0; - pv_ptr->pv_status = PV_ACTIVE; + pv->pe = NULL; + pv->pe_allocated = 0; + pv->pv_status = PV_ACTIVE; vg_ptr->pv_act++; vg_ptr->pv_cur++; + lvm_fs_create_pv(vg_ptr, pv); + vg_ptr->pv[p] = pv; return 0; } /* lvm_do_pv_create() */ /* - * character device support function physical volume create + * character device support function physical volume remove */ static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) { - pv_t *pv_ptr = vg_ptr->pv[p]; + pv_t *pv = vg_ptr->pv[p]; - lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr); - vg_ptr->pe_total -= pv_ptr->pe_total; + lvm_fs_remove_pv(vg_ptr, pv); + + vg_ptr->pe_total -= pv->pe_total; vg_ptr->pv_cur--; vg_ptr->pv_act--; -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); + + _close_pv(pv); + kfree(pv); + vg_ptr->pv[p] = NULL; return 0; } +static void __update_hardsectsize(lv_t *lv) +{ + int max_hardsectsize = 0, hardsectsize = 0; + int p; + + /* Check PVs first to see if they all have same sector size */ + for (p = 0; p < lv->vg->pv_cur; p++) { + pv_t *pv = lv->vg->pv[p]; + if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) { + if (max_hardsectsize == 0) + max_hardsectsize = hardsectsize; + else if (hardsectsize != max_hardsectsize) { + P_DEV("%s PV[%d] (%s) sector size %d, not %d\n", + lv->lv_name, p, kdevname(pv->pv_dev), + hardsectsize, max_hardsectsize); + break; + } + } + } + + /* PVs have different block size, need to check each LE sector size */ + if (hardsectsize != max_hardsectsize) { + int le; + for (le = 0; le < lv->lv_allocated_le; le++) { + hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev); + if (hardsectsize > max_hardsectsize) { + P_DEV("%s LE[%d] (%s) blocksize %d not %d\n", + lv->lv_name, le, + kdevname(lv->lv_current_pe[le].dev), + hardsectsize, max_hardsectsize); + max_hardsectsize = hardsectsize; + } + } + + /* only perform this operation on active snapshots */ + if ((lv->lv_access & LV_SNAPSHOT) && + (lv->lv_status & LV_ACTIVE)) { + int e; + for (e = 0; e < lv->lv_remap_end; e++) { + hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new); + if (hardsectsize > max_hardsectsize) + max_hardsectsize = hardsectsize; + } + } + } + + if (max_hardsectsize == 0) + max_hardsectsize = SECTOR_SIZE; + P_DEV("hardblocksize for LV %s is %d\n", + kdevname(lv->lv_dev), max_hardsectsize); + lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize; +} + /* * character device support function logical volume create */ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) { - int e, ret, l, le, l_new, p, size; + int e, ret, l, le, l_new, p, size, activate = 1; ulong lv_status_save; lv_block_exception_t *lvbe = lv->lv_block_exception; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr = NULL; + pe_t *pep; - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; - if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK) + if (!(pep = lv->lv_current_pe)) return -EINVAL; - for (l = 0; l < vg_ptr->lv_max; l++) { + if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK) + return -EINVAL; + + for (l = 0; l < vg_ptr->lv_cur; l++) { if (vg_ptr->lv[l] != NULL && strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) return -EEXIST; @@ -2151,23 +1947,26 @@ lv_status_save = lv_ptr->lv_status; lv_ptr->lv_status &= ~LV_ACTIVE; - lv_ptr->lv_snapshot_org = \ - lv_ptr->lv_snapshot_prev = \ + lv_ptr->lv_snapshot_org = NULL; + lv_ptr->lv_snapshot_prev = NULL; lv_ptr->lv_snapshot_next = NULL; lv_ptr->lv_block_exception = NULL; lv_ptr->lv_iobuf = NULL; + lv_ptr->lv_COW_table_iobuf = NULL; lv_ptr->lv_snapshot_hash_table = NULL; lv_ptr->lv_snapshot_hash_table_size = 0; lv_ptr->lv_snapshot_hash_mask = 0; - lv_ptr->lv_COW_table_page = NULL; - init_MUTEX(&lv_ptr->lv_snapshot_sem); + init_rwsem(&lv_ptr->lv_lock); + lv_ptr->lv_snapshot_use_rate = 0; + vg_ptr->lv[l] = lv_ptr; /* get the PE structures from user space if this - is no snapshot logical volume */ + is not a snapshot logical volume */ if (!(lv_ptr->lv_access & LV_SNAPSHOT)) { size = lv_ptr->lv_allocated_le * sizeof(pe_t); + if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) { printk(KERN_CRIT "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte " @@ -2179,6 +1978,8 @@ return -ENOMEM; } if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) { + P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n", + pep, sizeof(size)); vfree(lv_ptr->lv_current_pe); kfree(lv_ptr); vg_ptr->lv[l] = NULL; @@ -2200,6 +2001,15 @@ vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)]; if (lv_ptr->lv_snapshot_org != NULL) { size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t); + + if (!size) { + printk(KERN_WARNING + "%s -- zero length exception table requested\n", + lvm_name); + kfree(lv_ptr); + return -EINVAL; + } + if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) { printk(KERN_CRIT "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION " @@ -2217,6 +2027,16 @@ vg_ptr->lv[l] = NULL; return -EFAULT; } + + if(lv_ptr->lv_block_exception[0].rsector_org == + LVM_SNAPSHOT_DROPPED_SECTOR) + { + printk(KERN_WARNING + "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n", + lvm_name); + activate = 0; + } + /* point to the original logical volume */ lv_ptr = lv_ptr->lv_snapshot_org; @@ -2250,10 +2070,13 @@ lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); /* need to fill the COW exception table data into the page for disk i/o */ - lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr); + if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) { + kfree(lv_ptr); + vg_ptr->lv[l] = NULL; + return -EINVAL; + } init_waitqueue_head(&lv_ptr->lv_snapshot_wait); } else { - vfree(lv_ptr->lv_block_exception); kfree(lv_ptr); vg_ptr->lv[l] = NULL; return -EFAULT; @@ -2274,22 +2097,9 @@ LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); vg_ptr->lv_cur++; lv_ptr->lv_status = lv_status_save; + lv_ptr->vg = vg_ptr; - { - char *lv_tmp, *lv_buf = lv->lv_name; - - strtok(lv->lv_name, "/"); /* /dev */ - while((lv_tmp = strtok(NULL, "/")) != NULL) - lv_buf = lv_tmp; - - lv_devfs_handle[lv->lv_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number], lv_buf, - DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number, - S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_blk_dops, NULL); - } - - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + __update_hardsectsize(lv_ptr); /* optionally add our new snapshot LV */ if (lv_ptr->lv_access & LV_SNAPSHOT) { @@ -2302,19 +2112,24 @@ fsync_dev_lockfs(org->lv_dev); #endif - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); org->lv_access |= LV_SNAPSHOT_ORG; lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */ + /* Link in the list of snapshot volumes */ for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next); lv_ptr->lv_snapshot_prev = last; last->lv_snapshot_next = lv_ptr; - up(&org->lv_snapshot_sem); + up_write(&org->lv_lock); } /* activate the logical volume */ - lv_ptr->lv_status |= LV_ACTIVE; + if(activate) + lv_ptr->lv_status |= LV_ACTIVE; + else + lv_ptr->lv_status &= ~LV_ACTIVE; + if ( lv_ptr->lv_access & LV_WRITE) set_device_ro(lv_ptr->lv_dev, 0); else @@ -2322,13 +2137,12 @@ #ifdef LVM_VFS_ENHANCEMENT /* VFS function call to unlock the filesystem */ - if (lv_ptr->lv_access & LV_SNAPSHOT) { + if (lv_ptr->lv_access & LV_SNAPSHOT) unlockfs(lv_ptr->lv_snapshot_org->lv_dev); - } #endif - lv_ptr->vg = vg_ptr; - + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = + lvm_fs_create_lv(vg_ptr, lv_ptr); return 0; } /* lvm_do_lv_create() */ @@ -2366,13 +2180,15 @@ lv_ptr->lv_snapshot_next != NULL) return -EPERM; + lvm_fs_remove_lv(vg_ptr, lv_ptr); + if (lv_ptr->lv_access & LV_SNAPSHOT) { /* * Atomically make the the snapshot invisible * to the original lv before playing with it. */ lv_t * org = lv_ptr->lv_snapshot_org; - down(&org->lv_snapshot_sem); + down_write(&org->lv_lock); /* remove this snapshot logical volume from the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; @@ -2380,11 +2196,13 @@ lv_ptr->lv_snapshot_next->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; } - up(&org->lv_snapshot_sem); /* no more snapshots? */ - if (!org->lv_snapshot_next) + if (!org->lv_snapshot_next) { org->lv_access &= ~LV_SNAPSHOT_ORG; + } + up_write(&org->lv_lock); + lvm_snapshot_release(lv_ptr); /* Update the VG PE(s) used by snapshot reserve space. */ @@ -2404,6 +2222,7 @@ /* reset generic hd */ lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1; lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0; lvm_size[MINOR(lv_ptr->lv_dev)] = 0; /* reset VG/LV mapping */ @@ -2427,10 +2246,6 @@ vfree(lv_ptr->lv_current_pe); } - devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]); - - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); - P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__); kfree(lv_ptr); vg_ptr->lv[l] = NULL; @@ -2440,204 +2255,215 @@ /* - * character device support function logical volume extend / reduce + * logical volume extend / reduce */ -static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) -{ - ulong end, l, le, p, size, old_allocated_le; - vg_t *vg_ptr = vg[VG_CHR(minor)]; - lv_t *lv_ptr; - pe_t *pe; +static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { + ulong size; + lv_block_exception_t *lvbe; - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL; + if (!new_lv->lv_block_exception) + return -ENXIO; - for (l = 0; l < vg_ptr->lv_max; l++) { - if (vg_ptr->lv[l] != NULL && - strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0) - break; + size = new_lv->lv_remap_end * sizeof(lv_block_exception_t); + if ((lvbe = vmalloc(size)) == NULL) { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: vmalloc " + "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; } - if (l == vg_ptr->lv_max) return -ENXIO; - lv_ptr = vg_ptr->lv[l]; - /* check for active snapshot */ - if (lv->lv_access & LV_SNAPSHOT) - { - ulong e; - lv_block_exception_t *lvbe, *lvbe_old; - struct list_head * lvs_hash_table_old; - - if (lv->lv_block_exception == NULL) return -ENXIO; - size = lv->lv_remap_end * sizeof ( lv_block_exception_t); - if ((lvbe = vmalloc(size)) == NULL) - { - printk(KERN_CRIT - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION " - "of %lu Byte at line %d\n", - lvm_name, size, __LINE__); - return -ENOMEM; - } - if (lv->lv_remap_end > lv_ptr->lv_remap_end) - { - if (copy_from_user(lvbe, lv->lv_block_exception, size)) - { - vfree(lvbe); - return -EFAULT; - } - } - - lvbe_old = lv_ptr->lv_block_exception; - lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table; - - /* we need to play on the safe side here... */ - down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - if (lv_ptr->lv_block_exception == NULL || - lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end) - { - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - vfree(lvbe); - return -EPERM; - } - memcpy(lvbe, - lv_ptr->lv_block_exception, - (lv->lv_remap_end > lv_ptr->lv_remap_end ? - lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t)); - - lv_ptr->lv_block_exception = lvbe; - lv_ptr->lv_remap_end = lv->lv_remap_end; - if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0) - { - lvm_drop_snapshot(lv_ptr, "no memory for hash table"); - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - vfree(lvbe_old); - vfree(lvs_hash_table_old); - return -ENOMEM; - } - - for (e = 0; e < lv_ptr->lv_remap_ptr; e++) - lvm_hash_link (lv_ptr->lv_block_exception + e, - lv_ptr->lv_block_exception[e].rdev_org, - lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); - - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); - - vfree(lvbe_old); - vfree(lvs_hash_table_old); + if ((new_lv->lv_remap_end > old_lv->lv_remap_end) && + (copy_from_user(lvbe, new_lv->lv_block_exception, size))) { + vfree(lvbe); + return -EFAULT; + } + new_lv->lv_block_exception = lvbe; - return 0; + if (lvm_snapshot_alloc_hash_table(new_lv)) { + vfree(new_lv->lv_block_exception); + return -ENOMEM; } + return 0; +} - /* we drop in here in case it is an original logical volume */ - if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) { +static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) { + ulong size, l, p, end; + pe_t *pe; + + /* allocate space for new pe structures */ + size = new_lv->lv_current_le * sizeof(pe_t); + if ((pe = vmalloc(size)) == NULL) { printk(KERN_CRIT - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE " - "of %lu Byte at line %d\n", + "%s -- lvm_do_lv_extend_reduce: " + "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n", lvm_name, size, __LINE__); return -ENOMEM; } + /* get the PE structures from user space */ - if (copy_from_user(pe, pep, size)) { + if (copy_from_user(pe, new_lv->lv_current_pe, size)) { + if(old_lv->lv_access & LV_SNAPSHOT) + vfree(new_lv->lv_snapshot_hash_table); vfree(pe); return -EFAULT; } + new_lv->lv_current_pe = pe; + /* reduce allocation counters on PV(s) */ - for (le = 0; le < lv_ptr->lv_allocated_le; le++) { + for (l = 0; l < old_lv->lv_allocated_le; l++) { vg_ptr->pe_allocated--; for (p = 0; p < vg_ptr->pv_cur; p++) { if (vg_ptr->pv[p]->pv_dev == - lv_ptr->lv_current_pe[le].dev) { + old_lv->lv_current_pe[l].dev) { vg_ptr->pv[p]->pe_allocated--; break; } } } - - /* save pointer to "old" lv/pe pointer array */ - pep1 = lv_ptr->lv_current_pe; - end = lv_ptr->lv_current_le; - - /* save open counter... */ - lv->lv_open = lv_ptr->lv_open; - lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; - lv->lv_snapshot_next = lv_ptr->lv_snapshot_next; - lv->lv_snapshot_org = lv_ptr->lv_snapshot_org; - - lv->lv_current_pe = pe; - - /* save # of old allocated logical extents */ - old_allocated_le = lv_ptr->lv_allocated_le; - - /* copy preloaded LV */ - memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); - - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; - /* vg_lv_map array doesn't have to be changed here */ - - LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + /* extend the PE count in PVs */ + for (l = 0; l < new_lv->lv_allocated_le; l++) { + vg_ptr->pe_allocated++; + for (p = 0; p < vg_ptr->pv_cur; p++) { + if (vg_ptr->pv[p]->pv_dev == + new_lv->lv_current_pe[l].dev) { + vg_ptr->pv[p]->pe_allocated++; + break; + } + } + } /* save availiable i/o statistic data */ - /* linear logical volume */ - if (lv_ptr->lv_stripes < 2) { - /* Check what last LE shall be used */ - if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le; - for (le = 0; le < end; le++) { - lv_ptr->lv_current_pe[le].reads += pep1[le].reads; - lv_ptr->lv_current_pe[le].writes += pep1[le].writes; + if (old_lv->lv_stripes < 2) { /* linear logical volume */ + end = min(old_lv->lv_current_le, new_lv->lv_current_le); + for (l = 0; l < end; l++) { + new_lv->lv_current_pe[l].reads += + old_lv->lv_current_pe[l].reads; + + new_lv->lv_current_pe[l].writes += + old_lv->lv_current_pe[l].writes; } - /* striped logical volume */ - } else { + + } else { /* striped logical volume */ uint i, j, source, dest, end, old_stripe_size, new_stripe_size; - old_stripe_size = old_allocated_le / lv_ptr->lv_stripes; - new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes; - end = old_stripe_size; - if (end > new_stripe_size) end = new_stripe_size; - for (i = source = dest = 0; - i < lv_ptr->lv_stripes; i++) { + old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes; + new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes; + end = min(old_stripe_size, new_stripe_size); + + for (i = source = dest = 0; i < new_lv->lv_stripes; i++) { for (j = 0; j < end; j++) { - lv_ptr->lv_current_pe[dest + j].reads += - pep1[source + j].reads; - lv_ptr->lv_current_pe[dest + j].writes += - pep1[source + j].writes; + new_lv->lv_current_pe[dest + j].reads += + old_lv->lv_current_pe[source + j].reads; + new_lv->lv_current_pe[dest + j].writes += + old_lv->lv_current_pe[source + j].writes; } source += old_stripe_size; dest += new_stripe_size; } } - /* extend the PE count in PVs */ - for (le = 0; le < lv_ptr->lv_allocated_le; le++) { - vg_ptr->pe_allocated++; - for (p = 0; p < vg_ptr->pv_cur; p++) { - if (vg_ptr->pv[p]->pv_dev == - lv_ptr->lv_current_pe[le].dev) { - vg_ptr->pv[p]->pe_allocated++; - break; - } - } - } + return 0; +} + +static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv) +{ + int r; + ulong l, e, size; + vg_t *vg_ptr = vg[VG_CHR(minor)]; + lv_t *old_lv; + pe_t *pe; - vfree ( pep1); - pep1 = NULL; + if ((pe = new_lv->lv_current_pe) == NULL) + return -EINVAL; - if (lv->lv_access & LV_SNAPSHOT_ORG) - { - /* Correct the snapshot size information */ - while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL) - { - lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; - lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; - lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; - lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + for (l = 0; l < vg_ptr->lv_max; l++) + if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name)) + break; + + if (l == vg_ptr->lv_max) + return -ENXIO; + + old_lv = vg_ptr->lv[l]; + + if (old_lv->lv_access & LV_SNAPSHOT) { + /* only perform this operation on active snapshots */ + if (old_lv->lv_status & LV_ACTIVE) + r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv); + else + r = -EPERM; + + } else + r = __extend_reduce(vg_ptr, old_lv, new_lv); + + if(r) + return r; + + /* copy relevent fields */ + down_write(&old_lv->lv_lock); + + if(new_lv->lv_access & LV_SNAPSHOT) { + size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ? + old_lv->lv_remap_ptr : new_lv->lv_remap_end; + size *= sizeof(lv_block_exception_t); + memcpy(new_lv->lv_block_exception, + old_lv->lv_block_exception, size); + + old_lv->lv_remap_end = new_lv->lv_remap_end; + old_lv->lv_block_exception = new_lv->lv_block_exception; + old_lv->lv_snapshot_hash_table = + new_lv->lv_snapshot_hash_table; + old_lv->lv_snapshot_hash_table_size = + new_lv->lv_snapshot_hash_table_size; + old_lv->lv_snapshot_hash_mask = + new_lv->lv_snapshot_hash_mask; + + for (e = 0; e < new_lv->lv_remap_ptr; e++) + lvm_hash_link(new_lv->lv_block_exception + e, + new_lv->lv_block_exception[e].rdev_org, + new_lv->lv_block_exception[e].rsector_org, + new_lv); + + } else { + + vfree(old_lv->lv_current_pe); + vfree(old_lv->lv_snapshot_hash_table); + + old_lv->lv_size = new_lv->lv_size; + old_lv->lv_allocated_le = new_lv->lv_allocated_le; + old_lv->lv_current_le = new_lv->lv_current_le; + old_lv->lv_current_pe = new_lv->lv_current_pe; + lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects = + old_lv->lv_size; + lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1; + + if (old_lv->lv_access & LV_SNAPSHOT_ORG) { + lv_t *snap; + for(snap = old_lv->lv_snapshot_next; snap; + snap = snap->lv_snapshot_next) { + down_write(&snap->lv_lock); + snap->lv_current_pe = old_lv->lv_current_pe; + snap->lv_allocated_le = + old_lv->lv_allocated_le; + snap->lv_current_le = old_lv->lv_current_le; + snap->lv_size = old_lv->lv_size; + + lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects + = old_lv->lv_size; + lvm_size[MINOR(snap->lv_dev)] = + old_lv->lv_size >> 1; + __update_hardsectsize(snap); + up_write(&snap->lv_lock); + } } } + __update_hardsectsize(old_lv); + up_write(&old_lv->lv_lock); + return 0; } /* lvm_do_lv_extend_reduce() */ @@ -2648,10 +2474,10 @@ static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg) { uint l; - ulong size; - lv_t lv; - lv_t *lv_ptr; lv_status_byname_req_t lv_status_byname_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_byname_req, arg, @@ -2659,28 +2485,30 @@ return -EFAULT; if (lv_status_byname_req.lv == NULL) return -EINVAL; - if (copy_from_user(&lv, lv_status_byname_req.lv, - sizeof(lv_t)) != 0) - return -EFAULT; for (l = 0; l < vg_ptr->lv_max; l++) { - lv_ptr = vg_ptr->lv[l]; - if (lv_ptr != NULL && + if ((lv_ptr = vg_ptr->lv[l]) != NULL && strcmp(lv_ptr->lv_name, - lv_status_byname_req.lv_name) == 0) { - if (copy_to_user(lv_status_byname_req.lv, + lv_status_byname_req.lv_name) == 0) { + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; + if (copy_to_user(lv_status_byname_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; - - if (lv.lv_current_pe != NULL) { - size = lv_ptr->lv_allocated_le * - sizeof(pe_t); - if (copy_to_user(lv.lv_current_pe, + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, lv_ptr->lv_current_pe, - size) != 0) + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) return -EFAULT; } + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0) + return -EFAULT; return 0; } } @@ -2693,34 +2521,41 @@ */ static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) { - ulong size; - lv_t lv; - lv_t *lv_ptr; lv_status_byindex_req_t lv_status_byindex_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_byindex_req, arg, sizeof(lv_status_byindex_req)) != 0) return -EFAULT; - if ((lvp = lv_status_byindex_req.lv) == NULL) + if (lv_status_byindex_req.lv == NULL) return -EINVAL; if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL) return -ENXIO; - if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) - return -EFAULT; + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; - if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0) + if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; - - if (lv.lv_current_pe != NULL) { - size = lv_ptr->lv_allocated_le * sizeof(pe_t); - if (copy_to_user(lv.lv_current_pe, - lv_ptr->lv_current_pe, - size) != 0) + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, + lv_ptr->lv_current_pe, + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) return -EFAULT; } + + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) + return -EFAULT; + return 0; } /* lvm_do_lv_status_byindex() */ @@ -2731,6 +2566,9 @@ static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { int l; lv_status_bydev_req_t lv_status_bydev_req; + void *saved_ptr1; + void *saved_ptr2; + lv_t *lv_ptr; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_status_bydev_req, arg, @@ -2743,10 +2581,26 @@ } if ( l == vg_ptr->lv_max) return -ENXIO; + lv_ptr = vg_ptr->lv[l]; - if (copy_to_user(lv_status_bydev_req.lv, - vg_ptr->lv[l], sizeof(lv_t)) != 0) + /* Save usermode pointers */ + if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0) + return -EFAULT; + if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0) + return -EFAULT; + + if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0) return -EFAULT; + if (saved_ptr1 != NULL) { + if (copy_to_user(saved_ptr1, + lv_ptr->lv_current_pe, + lv_ptr->lv_allocated_le * + sizeof(pe_t)) != 0) + return -EFAULT; + } + /* Restore usermode pointers */ + if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0) + return -EFAULT; return 0; } /* lvm_do_lv_status_bydev() */ @@ -2766,11 +2620,9 @@ if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue; if (lv_ptr->lv_dev == lv->lv_dev) { - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); - strncpy(lv_ptr->lv_name, - lv_req->lv_name, - NAME_LEN); - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + lvm_fs_remove_lv(vg_ptr, lv_ptr); + strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN); + lvm_fs_create_lv(vg_ptr, lv_ptr); break; } } @@ -2787,9 +2639,7 @@ { uint p; pv_t *pv_ptr; -#ifdef LVM_GET_INODE - struct inode *inode_sav; -#endif + struct block_device *bd; if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&pv_change_req, arg, @@ -2801,20 +2651,17 @@ if (pv_ptr != NULL && strcmp(pv_ptr->pv_name, pv_change_req.pv_name) == 0) { -#ifdef LVM_GET_INODE - inode_sav = pv_ptr->inode; -#endif + + bd = pv_ptr->bd; if (copy_from_user(pv_ptr, pv_change_req.pv, sizeof(pv_t)) != 0) return -EFAULT; + pv_ptr->bd = bd; /* We don't need the PE list in kernel space as with LVs pe_t list */ pv_ptr->pe = NULL; -#ifdef LVM_GET_INODE - pv_ptr->inode = inode_sav; -#endif return 0; } } @@ -2850,160 +2697,27 @@ } /* lvm_do_pv_status() */ - /* - * create a devfs entry for a volume group + * character device support function flush and invalidate all buffers of a PV */ -void lvm_do_create_devfs_entry_of_vg ( vg_t *vg_ptr) { - vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); - ch_devfs_handle[vg_ptr->vg_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number] , "group", - DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); -} - - -/* - * create a /proc entry for a logical volume - */ -void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { - char *basename; - - if ( vg_ptr->lv_subdir_pde != NULL) { - basename = strrchr(lv_ptr->lv_name, '/'); - if (basename == NULL) basename = lv_ptr->lv_name; - else basename++; - pde = create_proc_entry(basename, S_IFREG, - vg_ptr->lv_subdir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_lv_info; - pde->data = lv_ptr; - } - } -} - - -/* - * remove a /proc entry for a logical volume - */ -void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { - char *basename; - - if ( vg_ptr->lv_subdir_pde != NULL) { - basename = strrchr(lv_ptr->lv_name, '/'); - if (basename == NULL) basename = lv_ptr->lv_name; - else basename++; - remove_proc_entry(basename, vg_ptr->lv_subdir_pde); - } -} - - -/* - * create a /proc entry for a physical volume - */ -void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { - int offset = 0; - char *basename; - char buffer[NAME_LEN]; - - basename = pv_ptr->pv_name; - if (strncmp(basename, "/dev/", 5) == 0) offset = 5; - strncpy(buffer, basename + offset, sizeof(buffer)); - basename = buffer; - while ( ( basename = strchr ( basename, '/')) != NULL) *basename = '_'; - pde = create_proc_entry(buffer, S_IFREG, vg_ptr->pv_subdir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_pv_info; - pde->data = pv_ptr; - } -} - - -/* - * remove a /proc entry for a physical volume - */ -void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { - char *basename; - - basename = strrchr(pv_ptr->pv_name, '/'); - if ( vg_ptr->pv_subdir_pde != NULL) { - basename = strrchr(pv_ptr->pv_name, '/'); - if (basename == NULL) basename = pv_ptr->pv_name; - else basename++; - remove_proc_entry(basename, vg_ptr->pv_subdir_pde); - } -} - - -/* - * create a /proc entry for a volume group - */ -void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) { - int l, p; - pv_t *pv_ptr; - lv_t *lv_ptr; +static int lvm_do_pv_flush(void *arg) +{ + pv_flush_req_t pv_flush_req; - pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, - lvm_proc_vg_subdir); - if ( pde != NULL) { - vg_ptr->vg_dir_pde = pde; - pde = create_proc_entry("group", S_IFREG, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - pde->read_proc = lvm_proc_read_vg_info; - pde->data = vg_ptr; - } - pde = create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - vg_ptr->lv_subdir_pde = pde; - for ( l = 0; l < vg_ptr->lv_max; l++) { - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); - } - } - pde = create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, - vg_ptr->vg_dir_pde); - if ( pde != NULL) { - vg_ptr->pv_subdir_pde = pde; - for ( p = 0; p < vg_ptr->pv_max; p++) { - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; - lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); - } - } - } -} + if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0) + return -EFAULT; -/* - * remove a /proc entry for a volume group - */ -void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) { - int l, p; - lv_t *lv_ptr; - pv_t *pv_ptr; + fsync_dev(pv_flush_req.pv_dev); + invalidate_buffers(pv_flush_req.pv_dev); - for ( l = 0; l < vg_ptr->lv_max; l++) { - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; - lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]); - } - for ( p = 0; p < vg_ptr->pv_max; p++) { - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; - lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]); - } - if ( vg_ptr->vg_dir_pde != NULL) { - remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); - remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); - remove_proc_entry("group", vg_ptr->vg_dir_pde); - remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); - } + return 0; } /* * support function initialize gendisk variables */ -void __init lvm_geninit(struct gendisk *lvm_gdisk) +static void __init lvm_geninit(struct gendisk *lvm_gdisk) { int i = 0; @@ -3019,36 +2733,94 @@ blk_size[MAJOR_NR] = lvm_size; blksize_size[MAJOR_NR] = lvm_blocksizes; - hardsect_size[MAJOR_NR] = lvm_blocksizes; + hardsect_size[MAJOR_NR] = lvm_hardsectsizes; return; } /* lvm_gen_init() */ + +/* Must have down_write(_pe_lock) when we enqueue buffers */ +static void _queue_io(struct buffer_head *bh, int rw) { + if (bh->b_reqnext) BUG(); + bh->b_reqnext = _pe_requests; + _pe_requests = bh; +} + +/* Must have down_write(_pe_lock) when we dequeue buffers */ +static struct buffer_head *_dequeue_io(void) +{ + struct buffer_head *bh = _pe_requests; + _pe_requests = NULL; + return bh; +} + /* - * return a pointer to a '-' padded uuid + * We do not need to hold _pe_lock to flush buffers. bh should be taken from + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set + * NULL and we drop _pe_lock. Any new buffers defered at this time will be + * added to a new list, and the old buffers can have their I/O restarted + * asynchronously. + * + * If, for some reason, the same PE is locked again before all of these writes + * have finished, then these buffers will just be re-queued (i.e. no danger). */ -static char *lvm_show_uuid ( char *uuidstr) { - int i, j; - static char uuid[NAME_LEN] = { 0, }; +static void _flush_io(struct buffer_head *bh) +{ + while (bh) { + struct buffer_head *next = bh->b_reqnext; + bh->b_reqnext = NULL; + /* resubmit this buffer head */ + generic_make_request(WRITE, bh); + bh = next; + } +} - memset ( uuid, 0, NAME_LEN); - i = 6; - memcpy ( uuid, uuidstr, i); - uuidstr += i; +/* + * we must open the pv's before we use them + */ +static int _open_pv(pv_t *pv) { + int err; + struct block_device *bd; + + if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev)))) + return -ENOMEM; - for ( j = 0; j < 6; j++) { - uuid[i++] = '-'; - memcpy ( &uuid[i], uuidstr, 4); - uuidstr += 4; - i += 4; + err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE); + if (err) { + bdput(bd); + return err; } - memcpy ( &uuid[i], uuidstr, 2 ); + pv->bd = bd; + return 0; +} + +static void _close_pv(pv_t *pv) { + if(!pv || !pv->bd) + return; - return uuid; + blkdev_put(pv->bd, BDEV_FILE); + bdput(pv->bd); + pv->bd = 0; } + +static unsigned long _sectors_to_k(unsigned long sect) +{ + if(SECTOR_SIZE > 1024) { + return sect * (SECTOR_SIZE / 1024); + } + + return sect / (1024 / SECTOR_SIZE); +} + +MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software"); +MODULE_DESCRIPTION("Logical Volume Manager"); +#ifdef MODULE_LICENSE +MODULE_LICENSE("GPL"); +#endif + module_init(lvm_init); module_exit(lvm_cleanup); --- linux/drivers/md/lvm-internal.h.orig Tue Nov 13 08:46:52 2001 +++ linux/drivers/md/lvm-internal.h Tue Nov 13 08:46:52 2001 @@ -0,0 +1,108 @@ + +/* + * kernel/lvm_internal.h + * + * Copyright (C) 2001 Sistina Software + * + * + * LVM driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* + * Changelog + * + * 05/01/2001 - Factored this file out of lvm.c (Joe Thornber) + * 11/01/2001 - Renamed lvm_internal and added declarations + * for lvm_fs.c stuff + * + */ + +#ifndef LVM_INTERNAL_H +#define LVM_INTERNAL_H + +#include + +#define _LVM_INTERNAL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")" + +/* global variables, defined in lvm.c */ +extern char *lvm_version; +extern ushort lvm_iop_version; +extern int loadtime; +extern const char *const lvm_name; + + +extern uint vg_count; +extern vg_t *vg[]; +extern struct file_operations lvm_chr_fops; + +extern struct block_device_operations lvm_blk_dops; + +#define lvm_sectsize(dev) get_hardsect_size(dev) + +/* 2.4.8 had no global min/max macros, and 2.4.9's were flawed */ + +/* debug macros */ +#ifdef DEBUG_IOCTL +#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args) +#else +#define P_IOCTL(fmt, args...) +#endif + +#ifdef DEBUG_MAP +#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args) +#else +#define P_MAP(fmt, args...) +#endif + +#ifdef DEBUG_KFREE +#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args) +#else +#define P_KFREE(fmt, args...) +#endif + +#ifdef DEBUG_DEVICE +#define P_DEV(fmt, args...) printk(KERN_DEBUG "lvm device: " fmt, ## args) +#else +#define P_DEV(fmt, args...) +#endif + + +/* lvm-snap.c */ +int lvm_get_blksize(kdev_t); +int lvm_snapshot_alloc(lv_t *); +int lvm_snapshot_fill_COW_page(vg_t *, lv_t *); +int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, vg_t *vg, lv_t *); +int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); +void lvm_snapshot_release(lv_t *); +int lvm_write_COW_table_block(vg_t *, lv_t *); +void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *); +int lvm_snapshot_alloc_hash_table(lv_t *); +void lvm_drop_snapshot(vg_t *vg, lv_t *, const char *); + + +/* lvm_fs.c */ +void lvm_init_fs(void); +void lvm_fin_fs(void); + +void lvm_fs_create_vg(vg_t *vg_ptr); +void lvm_fs_remove_vg(vg_t *vg_ptr); +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv); +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv); +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv); +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv); + +#endif --- linux/drivers/md/lvm-snap.c.orig Mon Sep 10 17:00:55 2001 +++ linux/drivers/md/lvm-snap.c Tue Nov 13 09:46:52 2001 @@ -2,22 +2,22 @@ * kernel/lvm-snap.c * * Copyright (C) 2000 Andrea Arcangeli SuSE - * Heinz Mauelshagen, Sistina Software (persistent snapshots) + * 2000 - 2001 Heinz Mauelshagen, Sistina Software * * LVM snapshot driver is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. - * + * * LVM snapshot driver is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with GNU CC; see the file COPYING. If not, write to * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * Boston, MA 02111-1307, USA. * */ @@ -26,6 +26,21 @@ * * 05/07/2000 - implemented persistent snapshot support * 23/11/2000 - used cpu_to_le64 rather than my own macro + * 25/01/2001 - Put LockPage back in + * 01/02/2001 - A dropped snapshot is now set as inactive + * 14/02/2001 - tidied debug statements + * 19/02/2001 - changed rawio calls to pass in preallocated buffer_heads + * 26/02/2001 - introduced __brw_kiovec to remove a lot of conditional + * compiles. + * 07/03/2001 - fixed COW exception table not persistent on 2.2 (HM) + * 12/03/2001 - lvm_pv_get_number changes: + * o made it static + * o renamed it to _pv_get_number + * o pv number is returned in new uint * arg + * o -1 returned on error + * lvm_snapshot_fill_COW_table has a return value too. + * 15/10/2001 - fix snapshot alignment problem [CM] + * - fix snapshot full oops (always check lv_block_exception) [CM] * */ @@ -36,28 +51,52 @@ #include #include #include +#include + +#include "lvm-internal.h" -#include "lvm-snap.h" +static char *lvm_snap_version __attribute__ ((unused)) = "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n"; -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9.1_beta2 snapshot code (18/01/2001)\n"; extern const char *const lvm_name; extern int lvm_blocksizes[]; void lvm_snapshot_release(lv_t *); -uint lvm_pv_get_number(vg_t * vg, kdev_t rdev) +static int _write_COW_table_block(vg_t *vg, lv_t *lv, int idx, + const char **reason); +static void _disable_snapshot(vg_t *vg, lv_t *lv); + + +static inline int __brw_kiovec(int rw, int nr, struct kiobuf *iovec[], + kdev_t dev, unsigned long b[], int size, + lv_t *lv) { + return brw_kiovec(rw, nr, iovec, dev, b, size); +} + + +static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn) { uint p; + for (p = 0; p < vg->pv_max; p++) { + if (vg->pv[p] == NULL) + continue; - for ( p = 0; p < vg->pv_max; p++) - { - if ( vg->pv[p] == NULL) continue; - if ( vg->pv[p]->pv_dev == rdev) break; + if (vg->pv[p]->pv_dev == rdev) + break; } - return vg->pv[p]->pv_number; + if (p >= vg->pv_max) { + /* bad news, the snapshot COW table is probably corrupt */ + printk(KERN_ERR + "%s -- _pv_get_number failed for rdev = %u\n", + lvm_name, rdev); + return -1; + } + + *pvn = vg->pv[p]->pv_number; + return 0; } @@ -105,10 +144,20 @@ unsigned long mask = lv->lv_snapshot_hash_mask; int chunk_size = lv->lv_chunk_size; + if (!hash_table) + BUG(); hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)]; list_add(&exception->hash, hash_table); } +/* + * Determine if we already have a snapshot chunk for this block. + * Return: 1 if it the chunk already exists + * 0 if we need to COW this block and allocate a new chunk + * -1 if the snapshot was disabled because it ran out of space + * + * We need to be holding at least a read lock on lv->lv_lock. + */ int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector, unsigned long pe_start, lv_t * lv) { @@ -118,6 +167,9 @@ int chunk_size = lv->lv_chunk_size; lv_block_exception_t * exception; + if (!lv->lv_block_exception) + return -1; + pe_off = pe_start % chunk_size; pe_adjustment = (*org_sector-pe_off) % chunk_size; __org_start = *org_sector - pe_adjustment; @@ -133,7 +185,7 @@ return ret; } -void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) +void lvm_drop_snapshot(vg_t *vg, lv_t *lv_snap, const char *reason) { kdev_t last_dev; int i; @@ -142,6 +194,9 @@ or error on this snapshot --> release it */ invalidate_buffers(lv_snap->lv_dev); + /* wipe the snapshot since it's inconsistent now */ + _disable_snapshot(vg, lv_snap); + for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) { if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) { last_dev = lv_snap->lv_block_exception[i].rdev_new; @@ -150,26 +205,33 @@ } lvm_snapshot_release(lv_snap); + lv_snap->lv_status &= ~LV_ACTIVE; printk(KERN_INFO - "%s -- giving up to snapshot %s on %s due %s\n", + "%s -- giving up to snapshot %s on %s: %s\n", lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name, reason); } -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks, +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks, unsigned long start, int nr_sectors, int blocksize) { int i, sectors_per_block, nr_blocks; - sectors_per_block = blocksize >> 9; + sectors_per_block = blocksize / SECTOR_SIZE; + + if (start & (sectors_per_block - 1)) + return 0; + nr_blocks = nr_sectors / sectors_per_block; start /= sectors_per_block; for (i = 0; i < nr_blocks; i++) blocks[i] = start++; + + return 1; } inline int lvm_get_blksize(kdev_t dev) @@ -209,128 +271,61 @@ #endif -void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) +int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) { - int id = 0, is = lv_snap->lv_remap_ptr; - ulong blksize_snap; - lv_COW_table_disk_t * lv_COW_table = - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + int id = 0, is = lv_snap->lv_remap_ptr; + ulong blksize_snap; + lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *) + page_address(lv_snap->lv_COW_table_iobuf->maplist[0]); + + if (is == 0) + return 0; - if (is == 0) return; is--; - blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); - is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t)); + blksize_snap = + lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); + is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t)); memset(lv_COW_table, 0, blksize_snap); for ( ; is < lv_snap->lv_remap_ptr; is++, id++) { /* store new COW_table entry */ - lv_COW_table[id].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org)); - lv_COW_table[id].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_org); - lv_COW_table[id].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new)); - lv_COW_table[id].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_new); - } -} + lv_block_exception_t *be = lv_snap->lv_block_exception + is; + uint pvn; + if (_pv_get_number(vg, be->rdev_org, &pvn)) + goto bad; -/* - * writes a COW exception table sector to disk (HM) - * - */ - -int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap) -{ - int blksize_snap; - int end_of_table; - int idx = lv_snap->lv_remap_ptr, idx_COW_table; - int nr_pages_tmp; - int length_tmp; - ulong snap_pe_start, COW_table_sector_offset, - COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; - const char * reason; - kdev_t snap_phys_dev; - struct kiobuf * iobuf = lv_snap->lv_iobuf; - struct page * page_tmp; - lv_COW_table_disk_t * lv_COW_table = - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + lv_COW_table[id].pv_org_number = cpu_to_le64(pvn); + lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org); - idx--; + if (_pv_get_number(vg, be->rdev_new, &pvn)) + goto bad; - COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); - COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); - - /* get physical addresse of destination chunk */ - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; - - blksize_snap = lvm_get_blksize(snap_phys_dev); - - COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); - idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; - - if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); - - /* sector offset into the on disk COW table */ - COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); - - /* COW table block to write next */ - iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); - - /* store new COW_table entry */ - lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org)); - lv_COW_table[idx_COW_table].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_org); - lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, snap_phys_dev)); - lv_COW_table[idx_COW_table].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_new); - - length_tmp = iobuf->length; - iobuf->length = blksize_snap; - page_tmp = iobuf->maplist[0]; - iobuf->maplist[0] = lv_snap->lv_COW_table_page; - nr_pages_tmp = iobuf->nr_pages; - iobuf->nr_pages = 1; - - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) - goto fail_raw_write; - - - /* initialization of next COW exception table block with zeroes */ - end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; - if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) - { - /* don't go beyond the end */ - if (idx + 1 >= lv_snap->lv_remap_end) goto good_out; - - memset(lv_COW_table, 0, blksize_snap); - - if (end_of_table) - { - idx++; - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; - blksize_snap = lvm_get_blksize(snap_phys_dev); - iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10); - } else iobuf->blocks[0]++; - - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != blksize_snap) - goto fail_raw_write; + lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn); + lv_COW_table[id].pv_snap_rsector = cpu_to_le64(be->rsector_new); } - - good_out: - iobuf->length = length_tmp; - iobuf->maplist[0] = page_tmp; - iobuf->nr_pages = nr_pages_tmp; return 0; - /* slow path */ - out: - lvm_drop_snapshot(lv_snap, reason); - return 1; + bad: + printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name); + return -1; +} - fail_raw_write: - reason = "write error"; - goto out; + +/* + * writes a COW exception table sector to disk (HM) + * + * We need to hold a write lock on lv_snap->lv_lock. + */ +int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap) +{ + int r; + const char *err; + if((r = _write_COW_table_block(vg, lv_snap, + lv_snap->lv_remap_ptr - 1, &err))) + lvm_drop_snapshot(vg, lv_snap, err); + return r; } /* @@ -340,17 +335,23 @@ * if there is no exception storage space free any longer --> release snapshot. * * this routine gets called for each _first_ write to a physical chunk. + * + * We need to hold a write lock on lv_snap->lv_lock. It is assumed that + * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block()) + * when this function is called. */ int lvm_snapshot_COW(kdev_t org_phys_dev, unsigned long org_phys_sector, unsigned long org_pe_start, unsigned long org_virt_sector, - lv_t * lv_snap) + vg_t *vg, lv_t* lv_snap) { const char * reason; unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; + unsigned long phys_start; int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; struct kiobuf * iobuf; + unsigned long blocks[KIO_MAX_SECTORS]; int blksize_snap, blksize_org, min_blksize, max_blksize; int max_sectors, nr_sectors; @@ -370,13 +371,11 @@ #ifdef DEBUG_SNAPSHOT printk(KERN_INFO "%s -- COW: " - "org %02d:%02d faulting %lu start %lu, " - "snap %02d:%02d start %lu, " + "org %s faulting %lu start %lu, snap %s start %lu, " "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n", lvm_name, - MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector, - org_start, - MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start, + kdevname(org_phys_dev), org_phys_sector, org_start, + kdevname(snap_phys_dev), snap_start, chunk_size, org_pe_start, pe_off, org_virt_sector); @@ -384,8 +383,8 @@ iobuf = lv_snap->lv_iobuf; - blksize_org = lvm_get_blksize(org_phys_dev); - blksize_snap = lvm_get_blksize(snap_phys_dev); + blksize_org = lvm_sectsize(org_phys_dev); + blksize_snap = lvm_sectsize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); @@ -393,6 +392,9 @@ if (chunk_size % (max_blksize>>9)) goto fail_blksize; + /* Don't change org_start, we need it to fill in the exception table */ + phys_start = org_start; + while (chunk_size) { nr_sectors = min(chunk_size, max_sectors); @@ -400,17 +402,24 @@ iobuf->length = nr_sectors << 9; - lvm_snapshot_prepare_blocks(iobuf->blocks, org_start, - nr_sectors, blksize_org); - if (brw_kiovec(READ, 1, &iobuf, org_phys_dev, - iobuf->blocks, blksize_org) != (nr_sectors<<9)) + if (!lvm_snapshot_prepare_blocks(blocks, phys_start, + nr_sectors, blksize_org)) + goto fail_prepare; + + if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks, + blksize_org, lv_snap) != (nr_sectors<<9)) goto fail_raw_read; - lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start, - nr_sectors, blksize_snap); - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, - iobuf->blocks, blksize_snap) != (nr_sectors<<9)) + if (!lvm_snapshot_prepare_blocks(blocks, snap_start, + nr_sectors, blksize_snap)) + goto fail_prepare; + + if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks, + blksize_snap, lv_snap) != (nr_sectors<<9)) goto fail_raw_write; + + phys_start += nr_sectors; + snap_start += nr_sectors; } #ifdef DEBUG_SNAPSHOT @@ -434,52 +443,57 @@ return 0; /* slow path */ - out: - lvm_drop_snapshot(lv_snap, reason); +out: + lvm_drop_snapshot(vg, lv_snap, reason); return 1; - fail_out_of_space: +fail_out_of_space: reason = "out of space"; goto out; - fail_raw_read: +fail_raw_read: reason = "read error"; goto out; - fail_raw_write: +fail_raw_write: reason = "write error"; goto out; - fail_blksize: +fail_blksize: reason = "blocksize error"; goto out; + +fail_prepare: + reason = "couldn't prepare kiovec blocks " + "(start probably isn't block aligned)"; + goto out; } int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) { int bytes, nr_pages, err, i; - bytes = sectors << 9; + bytes = sectors * SECTOR_SIZE; nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT; err = expand_kiobuf(iobuf, nr_pages); - if (err) - goto out; + if (err) goto out; err = -ENOMEM; - iobuf->locked = 0; + iobuf->locked = 1; iobuf->nr_pages = 0; for (i = 0; i < nr_pages; i++) { struct page * page; page = alloc_page(GFP_KERNEL); - if (!page) - goto out; + if (!page) goto out; iobuf->maplist[i] = page; + LockPage(page); iobuf->nr_pages++; } iobuf->offset = 0; err = 0; - out: + +out: return err; } @@ -521,40 +535,46 @@ while (buckets--) INIT_LIST_HEAD(hash+buckets); err = 0; - out: +out: return err; } int lvm_snapshot_alloc(lv_t * lv_snap) { - int err, blocksize, max_sectors; + int ret, max_sectors; - err = alloc_kiovec(1, &lv_snap->lv_iobuf); - if (err) - goto out; + /* allocate kiovec to do chunk io */ + ret = alloc_kiovec(1, &lv_snap->lv_iobuf); + if (ret) goto out; - blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)]; max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9); - err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); - if (err) - goto out_free_kiovec; + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors); + if (ret) goto out_free_kiovec; - err = lvm_snapshot_alloc_hash_table(lv_snap); - if (err) - goto out_free_kiovec; + /* allocate kiovec to do exception table io */ + ret = alloc_kiovec(1, &lv_snap->lv_COW_table_iobuf); + if (ret) goto out_free_kiovec; + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_COW_table_iobuf, + PAGE_SIZE/SECTOR_SIZE); + if (ret) goto out_free_both_kiovecs; - lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL); - if (!lv_snap->lv_COW_table_page) - goto out_free_kiovec; + ret = lvm_snapshot_alloc_hash_table(lv_snap); + if (ret) goto out_free_both_kiovecs; - out: - return err; +out: + return ret; + +out_free_both_kiovecs: + unmap_kiobuf(lv_snap->lv_COW_table_iobuf); + free_kiovec(1, &lv_snap->lv_COW_table_iobuf); + lv_snap->lv_COW_table_iobuf = NULL; - out_free_kiovec: +out_free_kiovec: unmap_kiobuf(lv_snap->lv_iobuf); free_kiovec(1, &lv_snap->lv_iobuf); + lv_snap->lv_iobuf = NULL; vfree(lv_snap->lv_snapshot_hash_table); lv_snap->lv_snapshot_hash_table = NULL; goto out; @@ -580,9 +600,125 @@ free_kiovec(1, &lv->lv_iobuf); lv->lv_iobuf = NULL; } - if (lv->lv_COW_table_page) + if (lv->lv_COW_table_iobuf) { - free_page((ulong)lv->lv_COW_table_page); - lv->lv_COW_table_page = NULL; + kiobuf_wait_for_io(lv->lv_COW_table_iobuf); + unmap_kiobuf(lv->lv_COW_table_iobuf); + free_kiovec(1, &lv->lv_COW_table_iobuf); + lv->lv_COW_table_iobuf = NULL; + } +} + + +static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap, + int idx, const char **reason) { + int blksize_snap; + int end_of_table; + int idx_COW_table; + uint pvn; + ulong snap_pe_start, COW_table_sector_offset, + COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; + ulong blocks[1]; + kdev_t snap_phys_dev; + lv_block_exception_t *be; + struct kiobuf *COW_table_iobuf = lv_snap->lv_COW_table_iobuf; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_iobuf->maplist[0]); + + COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); + COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); + + /* get physical addresse of destination chunk */ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + + blksize_snap = lvm_sectsize(snap_phys_dev); + + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; + + if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); + + /* sector offset into the on disk COW table */ + COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); + + /* COW table block to write next */ + blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + + /* store new COW_table entry */ + be = lv_snap->lv_block_exception + idx; + if(_pv_get_number(vg, be->rdev_org, &pvn)) + goto fail_pv_get_number; + + lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn); + lv_COW_table[idx_COW_table].pv_org_rsector = + cpu_to_le64(be->rsector_org); + if(_pv_get_number(vg, snap_phys_dev, &pvn)) + goto fail_pv_get_number; + + lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn); + lv_COW_table[idx_COW_table].pv_snap_rsector = + cpu_to_le64(be->rsector_new); + + COW_table_iobuf->length = blksize_snap; + /* COW_table_iobuf->nr_pages = 1; */ + + if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + blocks, blksize_snap, lv_snap) != blksize_snap) + goto fail_raw_write; + + /* initialization of next COW exception table block with zeroes */ + end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; + if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) + { + /* don't go beyond the end */ + if (idx + 1 >= lv_snap->lv_remap_end) goto out; + + memset(lv_COW_table, 0, blksize_snap); + + if (end_of_table) + { + idx++; + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + blksize_snap = lvm_sectsize(snap_phys_dev); + blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else blocks[0]++; + + if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev, + blocks, blksize_snap, lv_snap) != + blksize_snap) + goto fail_raw_write; + } + +out: + return 0; + +fail_raw_write: + *reason = "write error"; + return 1; + +fail_pv_get_number: + *reason = "_pv_get_number failed"; + return 1; +} + +/* + * FIXME_1.2 + * This function is a bit of a hack; we need to ensure that the + * snapshot is never made active again, because it will surely be + * corrupt. At the moment we do not have access to the LVM metadata + * from within the kernel. So we set the first exception to point to + * sector 1 (which will always be within the metadata, and as such + * invalid). User land tools will check for this when they are asked + * to activate the snapshot and prevent this from happening. + */ + +static void _disable_snapshot(vg_t *vg, lv_t *lv) { + const char *err; + lv->lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR; + if(_write_COW_table_block(vg, lv, 0, &err) < 0) { + printk(KERN_ERR "%s -- couldn't disable snapshot: %s\n", + lvm_name, err); } } --- linux/drivers/md/lvm-fs.c.orig Tue Nov 13 08:46:52 2001 +++ linux/drivers/md/lvm-fs.c Tue Nov 13 08:46:52 2001 @@ -0,0 +1,619 @@ +/* + * kernel/lvm-fs.c + * + * Copyright (C) 2001 Sistina Software + * + * January-April 2001 + * + * LVM driver is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * LVM driver is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, write to + * the Free Software Foundation, 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + * + */ + +/* + * Changelog + * + * 11/01/2001 - First version (Joe Thornber) + * 21/03/2001 - added display of stripes and stripe size (HM) + * 04/10/2001 - corrected devfs_register() call in lvm_init_fs() + * 11/04/2001 - don't devfs_register("lvm") as user-space always does it + * 10/05/2001 - show more of PV name in /proc/lvm/global + * + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include "lvm-internal.h" + + +static int _proc_read_vg(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_lv(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_pv(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int _proc_read_global(char *page, char **start, off_t off, + int count, int *eof, void *data); + +static int _vg_info(vg_t *vg_ptr, char *buf); +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf); +static int _pv_info(pv_t *pv_ptr, char *buf); + +static void _show_uuid(const char *src, char *b, char *e); + +#if 0 +static devfs_handle_t lvm_devfs_handle; +#endif +static devfs_handle_t vg_devfs_handle[MAX_VG]; +static devfs_handle_t ch_devfs_handle[MAX_VG]; +static devfs_handle_t lv_devfs_handle[MAX_LV]; + +static struct proc_dir_entry *lvm_proc_dir = NULL; +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; + +/* inline functions */ + +/* public interface */ +void __init lvm_init_fs() { + struct proc_dir_entry *pde; + +/* User-space has already registered this */ +#if 0 + lvm_devfs_handle = devfs_register( + 0 , "lvm", 0, LVM_CHAR_MAJOR, 0, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); +#endif + lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root); + if (lvm_proc_dir) { + lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR, + lvm_proc_dir); + pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); + if ( pde != NULL) pde->read_proc = _proc_read_global; + } +} + +void lvm_fin_fs() { +#if 0 + devfs_unregister (lvm_devfs_handle); +#endif + remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); + remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); + remove_proc_entry(LVM_DIR, &proc_root); +} + +void lvm_fs_create_vg(vg_t *vg_ptr) { + struct proc_dir_entry *pde; + + vg_devfs_handle[vg_ptr->vg_number] = + devfs_mk_dir(0, vg_ptr->vg_name, NULL); + + ch_devfs_handle[vg_ptr->vg_number] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number] , "group", + DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); + + vg_ptr->vg_dir_pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, + lvm_proc_vg_subdir); + + if((pde = create_proc_entry("group", S_IFREG, vg_ptr->vg_dir_pde))) { + pde->read_proc = _proc_read_vg; + pde->data = vg_ptr; + } + + vg_ptr->lv_subdir_pde = + create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde); + + vg_ptr->pv_subdir_pde = + create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde); +} + +void lvm_fs_remove_vg(vg_t *vg_ptr) { + int i; + + devfs_unregister(ch_devfs_handle[vg_ptr->vg_number]); + devfs_unregister(vg_devfs_handle[vg_ptr->vg_number]); + + /* remove lv's */ + for(i = 0; i < vg_ptr->lv_max; i++) + if(vg_ptr->lv[i]) lvm_fs_remove_lv(vg_ptr, vg_ptr->lv[i]); + + /* remove pv's */ + for(i = 0; i < vg_ptr->pv_max; i++) + if(vg_ptr->pv[i]) lvm_fs_remove_pv(vg_ptr, vg_ptr->pv[i]); + + if(vg_ptr->vg_dir_pde) { + remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); + vg_ptr->lv_subdir_pde = NULL; + + remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); + vg_ptr->pv_subdir_pde = NULL; + + remove_proc_entry("group", vg_ptr->vg_dir_pde); + vg_ptr->vg_dir_pde = NULL; + + remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); + } +} + + +static inline const char *_basename(const char *str) { + const char *name = strrchr(str, '/'); + name = name ? name + 1 : str; + return name; +} + +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) { + struct proc_dir_entry *pde; + const char *name = _basename(lv->lv_name); + + lv_devfs_handle[MINOR(lv->lv_dev)] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number], name, + DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, MINOR(lv->lv_dev), + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_blk_dops, NULL); + + if(vg_ptr->lv_subdir_pde && + (pde = create_proc_entry(name, S_IFREG, vg_ptr->lv_subdir_pde))) { + pde->read_proc = _proc_read_lv; + pde->data = lv; + } + return lv_devfs_handle[MINOR(lv->lv_dev)]; +} + +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) { + devfs_unregister(lv_devfs_handle[MINOR(lv->lv_dev)]); + + if(vg_ptr->lv_subdir_pde) { + const char *name = _basename(lv->lv_name); + remove_proc_entry(name, vg_ptr->lv_subdir_pde); + } +} + + +static inline void _make_pv_name(const char *src, char *b, char *e) { + int offset = strlen(LVM_DIR_PREFIX); + if(strncmp(src, LVM_DIR_PREFIX, offset)) + offset = 0; + + e--; + src += offset; + while(*src && (b != e)) { + *b++ = (*src == '/') ? '_' : *src; + src++; + } + *b = '\0'; +} + +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv) { + struct proc_dir_entry *pde; + char name[NAME_LEN]; + + if(!vg_ptr->pv_subdir_pde) + return; + + _make_pv_name(pv->pv_name, name, name + sizeof(name)); + if((pde = create_proc_entry(name, S_IFREG, vg_ptr->pv_subdir_pde))) { + pde->read_proc = _proc_read_pv; + pde->data = pv; + } +} + +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv) { + char name[NAME_LEN]; + + if(!vg_ptr->pv_subdir_pde) + return; + + _make_pv_name(pv->pv_name, name, name + sizeof(name)); + remove_proc_entry(name, vg_ptr->pv_subdir_pde); +} + + +static int _proc_read_vg(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + vg_t *vg_ptr = data; + char uuid[NAME_LEN]; + + sz += sprintf(page + sz, "name: %s\n", vg_ptr->vg_name); + sz += sprintf(page + sz, "size: %u\n", + vg_ptr->pe_total * vg_ptr->pe_size / 2); + sz += sprintf(page + sz, "access: %u\n", vg_ptr->vg_access); + sz += sprintf(page + sz, "status: %u\n", vg_ptr->vg_status); + sz += sprintf(page + sz, "number: %u\n", vg_ptr->vg_number); + sz += sprintf(page + sz, "LV max: %u\n", vg_ptr->lv_max); + sz += sprintf(page + sz, "LV current: %u\n", vg_ptr->lv_cur); + sz += sprintf(page + sz, "LV open: %u\n", vg_ptr->lv_open); + sz += sprintf(page + sz, "PV max: %u\n", vg_ptr->pv_max); + sz += sprintf(page + sz, "PV current: %u\n", vg_ptr->pv_cur); + sz += sprintf(page + sz, "PV active: %u\n", vg_ptr->pv_act); + sz += sprintf(page + sz, "PE size: %u\n", vg_ptr->pe_size / 2); + sz += sprintf(page + sz, "PE total: %u\n", vg_ptr->pe_total); + sz += sprintf(page + sz, "PE allocated: %u\n", vg_ptr->pe_allocated); + + _show_uuid(vg_ptr->vg_uuid, uuid, uuid + sizeof(uuid)); + sz += sprintf(page + sz, "uuid: %s\n", uuid); + + return sz; +} + +static int _proc_read_lv(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + lv_t *lv = data; + + sz += sprintf(page + sz, "name: %s\n", lv->lv_name); + sz += sprintf(page + sz, "size: %u\n", lv->lv_size); + sz += sprintf(page + sz, "access: %u\n", lv->lv_access); + sz += sprintf(page + sz, "status: %u\n", lv->lv_status); + sz += sprintf(page + sz, "number: %u\n", lv->lv_number); + sz += sprintf(page + sz, "open: %u\n", lv->lv_open); + sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation); + if(lv->lv_stripes > 1) { + sz += sprintf(page + sz, "stripes: %u\n", + lv->lv_stripes); + sz += sprintf(page + sz, "stripesize: %u\n", + lv->lv_stripesize); + } + sz += sprintf(page + sz, "device: %02u:%02u\n", + MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); + + return sz; +} + +static int _proc_read_pv(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + pv_t *pv = data; + char uuid[NAME_LEN]; + + sz += sprintf(page + sz, "name: %s\n", pv->pv_name); + sz += sprintf(page + sz, "size: %u\n", pv->pv_size); + sz += sprintf(page + sz, "status: %u\n", pv->pv_status); + sz += sprintf(page + sz, "number: %u\n", pv->pv_number); + sz += sprintf(page + sz, "allocatable: %u\n", pv->pv_allocatable); + sz += sprintf(page + sz, "LV current: %u\n", pv->lv_cur); + sz += sprintf(page + sz, "PE size: %u\n", pv->pe_size / 2); + sz += sprintf(page + sz, "PE total: %u\n", pv->pe_total); + sz += sprintf(page + sz, "PE allocated: %u\n", pv->pe_allocated); + sz += sprintf(page + sz, "device: %02u:%02u\n", + MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); + + _show_uuid(pv->pv_uuid, uuid, uuid + sizeof(uuid)); + sz += sprintf(page + sz, "uuid: %s\n", uuid); + + return sz; +} + +static int _proc_read_global(char *page, char **start, off_t pos, int count, + int *eof, void *data) { + +#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) + + int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, + lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; + static off_t sz; + off_t sz_last; + static char *buf = NULL; + static char dummy_buf[160]; /* sized for 2 lines */ + vg_t *vg_ptr; + lv_t *lv_ptr; + pv_t *pv_ptr; + + +#ifdef DEBUG_LVM_PROC_GET_INFO + printk(KERN_DEBUG + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d\n", + lvm_name, pos, count); +#endif + + if(pos != 0 && buf != NULL) + goto out; + + sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ + lv_open_total = pe_t_bytes = hash_table_bytes = \ + lv_block_exception_t_bytes = 0; + + /* get some statistics */ + for (v = 0; v < ABS_MAX_VG; v++) { + if ((vg_ptr = vg[v]) != NULL) { + vg_counter++; + pv_counter += vg_ptr->pv_cur; + lv_counter += vg_ptr->lv_cur; + if (vg_ptr->lv_cur > 0) { + for (l = 0; l < vg[v]->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + pe_t_bytes += lv_ptr->lv_allocated_le; + hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; + if (lv_ptr->lv_block_exception != NULL) + lv_block_exception_t_bytes += lv_ptr->lv_remap_end; + if (lv_ptr->lv_open > 0) { + lv_open_counter++; + lv_open_total += lv_ptr->lv_open; + } + } + } + } + } + } + + pe_t_bytes *= sizeof(pe_t); + lv_block_exception_t_bytes *= sizeof(lv_block_exception_t); + + if (buf != NULL) { + P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__); + lock_kernel(); + vfree(buf); + unlock_kernel(); + buf = NULL; + } + /* 2 times: first to get size to allocate buffer, + 2nd to fill the malloced buffer */ + for (i = 0; i < 2; i++) { + sz = 0; + sz += sprintf(LVM_PROC_BUF, + "LVM " +#ifdef MODULE + "module" +#else + "driver" +#endif + " %s\n\n" + "Total: %d VG%s %d PV%s %d LV%s ", + lvm_version, + vg_counter, vg_counter == 1 ? "" : "s", + pv_counter, pv_counter == 1 ? "" : "s", + lv_counter, lv_counter == 1 ? "" : "s"); + sz += sprintf(LVM_PROC_BUF, + "(%d LV%s open", + lv_open_counter, + lv_open_counter == 1 ? "" : "s"); + if (lv_open_total > 0) + sz += sprintf(LVM_PROC_BUF, + " %d times)\n", + lv_open_total); + else + sz += sprintf(LVM_PROC_BUF, ")"); + sz += sprintf(LVM_PROC_BUF, + "\nGlobal: %lu bytes malloced IOP version: %d ", + vg_counter * sizeof(vg_t) + + pv_counter * sizeof(pv_t) + + lv_counter * sizeof(lv_t) + + pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, + lvm_iop_version); + + seconds = CURRENT_TIME - loadtime; + if (seconds < 0) + loadtime = CURRENT_TIME + seconds; + if (seconds / 86400 > 0) { + sz += sprintf(LVM_PROC_BUF, "%d day%s ", + seconds / 86400, + seconds / 86400 == 0 || + seconds / 86400 > 1 ? "s" : ""); + } + sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n", + (seconds % 86400) / 3600, + (seconds % 3600) / 60, + seconds % 60); + + if (vg_counter > 0) { + for (v = 0; v < ABS_MAX_VG; v++) { + /* volume group */ + if ((vg_ptr = vg[v]) != NULL) { + sz += _vg_info(vg_ptr, LVM_PROC_BUF); + + /* physical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n PV%s ", + vg_ptr->pv_cur == 1 ? ": " : "s:"); + c = 0; + for (p = 0; p < vg_ptr->pv_max; p++) { + if ((pv_ptr = vg_ptr->pv[p]) != NULL) { + sz += _pv_info(pv_ptr, LVM_PROC_BUF); + + c++; + if (c < vg_ptr->pv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + + /* logical volumes */ + sz += sprintf(LVM_PROC_BUF, + "\n LV%s ", + vg_ptr->lv_cur == 1 ? ": " : "s:"); + c = 0; + for (l = 0; l < vg_ptr->lv_max; l++) { + if ((lv_ptr = vg_ptr->lv[l]) != NULL) { + sz += _lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); + c++; + if (c < vg_ptr->lv_cur) + sz += sprintf(LVM_PROC_BUF, + "\n "); + } + } + if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none"); + sz += sprintf(LVM_PROC_BUF, "\n"); + } + } + } + if (buf == NULL) { + lock_kernel(); + buf = vmalloc(sz); + unlock_kernel(); + if (buf == NULL) { + sz = 0; + return sprintf(page, "%s - vmalloc error at line %d\n", + lvm_name, __LINE__); + } + } + sz_last = sz; + } + + out: + if (pos > sz - 1) { + lock_kernel(); + vfree(buf); + unlock_kernel(); + buf = NULL; + return 0; + } + *start = &buf[pos]; + if (sz - pos < count) + return sz - pos; + else + return count; + +#undef LVM_PROC_BUF +} + +/* + * provide VG info for proc filesystem use (global) + */ +static int _vg_info(vg_t *vg_ptr, char *buf) { + int sz = 0; + char inactive_flag = ' '; + + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; + sz = sprintf(buf, + "\nVG: %c%s [%d PV, %d LV/%d open] " + " PE Size: %d KB\n" + " Usage [KB/PE]: %d /%d total " + "%d /%d used %d /%d free", + inactive_flag, + vg_ptr->vg_name, + vg_ptr->pv_cur, + vg_ptr->lv_cur, + vg_ptr->lv_open, + vg_ptr->pe_size >> 1, + vg_ptr->pe_size * vg_ptr->pe_total >> 1, + vg_ptr->pe_total, + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, + vg_ptr->pe_allocated, + (vg_ptr->pe_total - vg_ptr->pe_allocated) * + vg_ptr->pe_size >> 1, + vg_ptr->pe_total - vg_ptr->pe_allocated); + return sz; +} + + +/* + * provide LV info for proc filesystem use (global) + */ +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' ', + stripes_flag = ' ', rw_flag = ' ', *basename; + + if (!(lv_ptr->lv_status & LV_ACTIVE)) + inactive_flag = 'I'; + rw_flag = 'R'; + if (lv_ptr->lv_access & LV_WRITE) + rw_flag = 'W'; + allocation_flag = 'D'; + if (lv_ptr->lv_allocation & LV_CONTIGUOUS) + allocation_flag = 'C'; + stripes_flag = 'L'; + if (lv_ptr->lv_stripes > 1) + stripes_flag = 'S'; + sz += sprintf(buf+sz, + "[%c%c%c%c", + inactive_flag, + rw_flag, + allocation_flag, + stripes_flag); + if (lv_ptr->lv_stripes > 1) + sz += sprintf(buf+sz, "%-2d", + lv_ptr->lv_stripes); + else + sz += sprintf(buf+sz, " "); + + /* FIXME: use _basename */ + basename = strrchr(lv_ptr->lv_name, '/'); + if ( basename == 0) basename = lv_ptr->lv_name; + else basename++; + sz += sprintf(buf+sz, "] %-25s", basename); + if (strlen(basename) > 25) + sz += sprintf(buf+sz, + "\n "); + sz += sprintf(buf+sz, "%9d /%-6d ", + lv_ptr->lv_size >> 1, + lv_ptr->lv_size / vg_ptr->pe_size); + + if (lv_ptr->lv_open == 0) + sz += sprintf(buf+sz, "close"); + else + sz += sprintf(buf+sz, "%dx open", + lv_ptr->lv_open); + + return sz; +} + + +/* + * provide PV info for proc filesystem use (global) + */ +static int _pv_info(pv_t *pv, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' '; + char *pv_name = NULL; + + if (!(pv->pv_status & PV_ACTIVE)) + inactive_flag = 'I'; + allocation_flag = 'A'; + if (!(pv->pv_allocatable & PV_ALLOCATABLE)) + allocation_flag = 'N'; + pv_name = strchr(pv->pv_name+1,'/'); + if ( pv_name == 0) pv_name = pv->pv_name; + else pv_name++; + sz = sprintf(buf, + "[%c%c] %-21s %8d /%-6d " + "%8d /%-6d %8d /%-6d", + inactive_flag, + allocation_flag, + pv_name, + pv->pe_total * pv->pe_size >> 1, + pv->pe_total, + pv->pe_allocated * pv->pe_size >> 1, + pv->pe_allocated, + (pv->pe_total - pv->pe_allocated) * + pv->pe_size >> 1, + pv->pe_total - pv->pe_allocated); + return sz; +} + +static void _show_uuid(const char *src, char *b, char *e) { + int i; + + e--; + for(i = 0; *src && (b != e); i++) { + if(i && !(i & 0x3)) + *b++ = '-'; + *b++ = *src++; + } + *b = '\0'; +} diff -ruN -X /home/joe/packages/dontdiff linux_2.4.1/drivers/md/lvm-snap.h linux/drivers/md/lvm-snap.h --- linux_2.4.1/drivers/md/lvm-snap.h Fri Feb 16 14:51:26 2001 +++ linux/drivers/md/lvm-snap.h Thu Jan 1 01:00:00 1970 @@ -1,47 +0,0 @@ -/* - * kernel/lvm-snap.h - * - * Copyright (C) 2001 Sistina Software - * - * - * LVM driver is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * LVM driver is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU CC; see the file COPYING. If not, write to - * the Free Software Foundation, 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. - * - */ - -/* - * Changelog - * - * 05/01/2001:Joe Thornber - Factored this file out of lvm.c - * - */ - -#ifndef LVM_SNAP_H -#define LVM_SNAP_H - -/* external snapshot calls */ -extern inline int lvm_get_blksize(kdev_t); -extern int lvm_snapshot_alloc(lv_t *); -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *); -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); -extern void lvm_snapshot_release(lv_t *); -extern int lvm_write_COW_table_block(vg_t *, lv_t *); -extern inline void lvm_hash_link(lv_block_exception_t *, - kdev_t, ulong, lv_t *); -extern int lvm_snapshot_alloc_hash_table(lv_t *); -extern void lvm_drop_snapshot(lv_t *, const char *); - -#endif