1 --- linux/include/linux/lvm.h.orig Mon Sep 17 22:25:26 2001
2 +++ linux/include/linux/lvm.h Tue Nov 13 09:46:51 2001
7 - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software
8 + * Copyright (C) 1997 - 2001 Heinz Mauelshagen, Sistina Software
10 * February-November 1997
12 * January-March,July,September,October,Dezember 1999
13 * January,February,July,November 2000
15 + * January-March,June,July 2001
17 * lvm is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2, or (at your option)
23 * lvm is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
29 * You should have received a copy of the GNU General Public License
30 * along with GNU CC; see the file COPYING. If not, write to
31 * the Free Software Foundation, 59 Temple Place - Suite 330,
32 - * Boston, MA 02111-1307, USA.
33 + * Boston, MA 02111-1307, USA.
38 * 08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit
39 * 01/01/2000 - extended lv_v2 core structure by wait_queue member
40 * 12/02/2000 - integrated Andrea Arcagnelli's snapshot work
41 - * 18/02/2000 - seperated user and kernel space parts by
42 + * 18/02/2000 - seperated user and kernel space parts by
43 * #ifdef them with __KERNEL__
44 * 08/03/2000 - implemented cluster/shared bits for vg_access
45 * 26/06/2000 - implemented snapshot persistency and resizing support
47 * 12/11/2000 - removed unneeded timestamp definitions
48 * 24/12/2000 - removed LVM_TO_{CORE,DISK}*, use cpu_{from, to}_le*
49 * instead - Christoph Hellwig
50 + * 22/01/2001 - Change ulong to uint32_t
51 + * 14/02/2001 - changed LVM_SNAPSHOT_MIN_CHUNK to 1 page
52 + * 20/02/2001 - incremented IOP version to 11 because of incompatible
53 + * change in VG activation (in order to support devfs better)
54 + * 01/03/2001 - Revert to IOP10 and add VG_CREATE_OLD call for compatibility
55 + * 08/03/2001 - new lv_t (in core) version number 5: changed page member
56 + * to (struct kiobuf *) to use for COW exception table io
57 + * 26/03/2001 - changed lv_v4 to lv_v5 in structure definition (HM)
58 + * 21/06/2001 - changed BLOCK_SIZE back to 1024 for non S/390
59 + * 22/06/2001 - added Andreas Dilger's PE on 4k boundary alignment enhancements
60 + * 19/07/2001 - added rwsem compatibility macros for 2.2 kernels
65 #ifndef _LVM_H_INCLUDE
66 #define _LVM_H_INCLUDE
68 -#define _LVM_KERNEL_H_VERSION "LVM 0.9.1_beta2 (18/01/2001)"
69 +#define LVM_RELEASE_NAME "1.0.1-rc4"
70 +#define LVM_RELEASE_DATE "03/10/2001"
72 +#define _LVM_KERNEL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
74 -#include <linux/config.h>
75 #include <linux/version.h>
81 #define DEBUG_VG_CREATE
82 - #define DEBUG_LVM_BLK_OPEN
83 + #define DEBUG_DEVICE
86 -#endif /* #ifdef __KERNEL__ */
88 #include <linux/kdev_t.h>
89 #include <linux/list.h>
92 +#include <linux/kdev_t.h>
93 +#include <linux/list.h>
95 +#endif /* #ifndef __KERNEL__ */
97 #include <asm/types.h>
98 #include <linux/major.h>
100 #include <asm/semaphore.h>
101 #endif /* #ifdef __KERNEL__ */
104 #include <asm/page.h>
106 #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
111 -#ifdef CONFIG_ARCH_S390
112 +#ifdef CONFIG_ARCH_S390
113 #define BLOCK_SIZE 4096
115 #define BLOCK_SIZE 1024
116 @@ -127,24 +146,11 @@
117 #define SECTOR_SIZE 512
120 -#define LVM_STRUCT_VERSION 1 /* structure version */
121 +/* structure version */
122 +#define LVM_STRUCT_VERSION 1
124 #define LVM_DIR_PREFIX "/dev/"
126 -/* set the default structure version */
127 -#if ( LVM_STRUCT_VERSION == 1)
128 -#define pv_t pv_v2_t
129 -#define lv_t lv_v4_t
130 -#define vg_t vg_v3_t
131 -#define pv_disk_t pv_disk_v2_t
132 -#define lv_disk_t lv_disk_v3_t
133 -#define vg_disk_t vg_disk_v2_t
134 -#define lv_block_exception_t lv_block_exception_v1_t
135 -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t
141 * i/o protocol version
143 @@ -218,40 +224,11 @@
146 /* DONT TOUCH THESE !!! */
147 -/* base of PV structure in disk partition */
148 -#define LVM_PV_DISK_BASE 0L
150 -/* size reserved for PV structure on disk */
151 -#define LVM_PV_DISK_SIZE 1024L
153 -/* base of VG structure in disk partition */
154 -#define LVM_VG_DISK_BASE LVM_PV_DISK_SIZE
156 -/* size reserved for VG structure */
157 -#define LVM_VG_DISK_SIZE ( 9 * 512L)
159 -/* size reserved for timekeeping */
160 -#define LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE + LVM_VG_DISK_SIZE)
161 -#define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */
163 -/* name list of physical volumes on disk */
164 -#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
165 - LVM_TIMESTAMP_DISK_SIZE)
167 -/* now for the dynamically calculated parts of the VGDA */
168 -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \
169 - sizeof ( lv_disk_t) * b)
170 -#define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \
171 - (pv)->pe_on_disk.size)
172 -#define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \
173 - ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
174 -#define LVM_PE_ON_DISK_BASE(pv) \
176 - pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
177 - if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
178 - pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
180 -/* END default disk spaces and offsets for PVs */
187 #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */
188 #define LVM_MAX_MIRRORS 2 /* future use */
189 #define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */
190 -#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */
191 +#define LVM_DEFAULT_READ_AHEAD 1024 /* sectors for 512k scsi segments */
192 #define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */
193 #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */
194 #define LVM_PARTITION 0xfe /* LVM partition id */
195 @@ -296,28 +273,15 @@
196 #define LVM_SNAPSHOT_MIN_CHUNK (PAGE_SIZE/1024) /* 4 or 8 KB */
203 -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \
204 - vg->pe_size / lv->lv_chunk_size)
206 -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \
208 - int COW_table_entries_per_PE; \
209 - int COW_table_chunks_per_PE; \
211 - COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \
212 - COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \
213 - COW_table_entries_per_PE - COW_table_chunks_per_PE;})
218 + * FIXME: the last parameter to _IO{W,R,WR} is a data type. The macro will
219 + * expand this using sizeof(), so putting "1" there is misleading
220 + * because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine!
223 -#define VG_CREATE _IOW ( 0xfe, 0x00, 1)
224 +#define VG_CREATE_OLD _IOW ( 0xfe, 0x00, 1)
225 #define VG_REMOVE _IOW ( 0xfe, 0x01, 1)
227 #define VG_EXTEND _IOW ( 0xfe, 0x03, 1)
229 #define VG_SET_EXTENDABLE _IOW ( 0xfe, 0x08, 1)
230 #define VG_RENAME _IOW ( 0xfe, 0x09, 1)
232 +/* Since 0.9beta6 */
233 +#define VG_CREATE _IOW ( 0xfe, 0x0a, 1)
236 #define LV_CREATE _IOW ( 0xfe, 0x20, 1)
240 /* lock the logical volume manager */
241 +#if LVM_DRIVER_IOP_VERSION > 11
242 +#define LVM_LOCK_LVM _IO ( 0xfe, 0x9A)
244 +/* This is actually the same as _IO ( 0xff, 0x00), oops. Remove for IOP 12+ */
245 #define LVM_LOCK_LVM _IO ( 0xfe, 0x100)
251 #define PV_ALLOCATABLE 0x02 /* pv_allocatable */
255 +#define LVM_SNAPSHOT_DROPPED_SECTOR 1
258 * Structure definitions core/disk follow
260 @@ -424,21 +398,21 @@
261 #define UUID_LEN 32 /* don't change!!! */
263 /* copy on write tables in disk format */
265 +typedef struct lv_COW_table_disk_v1 {
266 uint64_t pv_org_number;
267 uint64_t pv_org_rsector;
268 uint64_t pv_snap_number;
269 uint64_t pv_snap_rsector;
270 -} lv_COW_table_disk_v1_t;
271 +} lv_COW_table_disk_t;
273 /* remap physical sector/rdev pairs including hash */
275 +typedef struct lv_block_exception_v1 {
276 struct list_head hash;
281 -} lv_block_exception_v1_t;
282 + uint32_t rsector_org;
284 + uint32_t rsector_new;
286 +} lv_block_exception_t;
288 /* disk stored pe information */
290 @@ -454,37 +428,11 @@
294 - * Structure Physical Volume (PV) Version 1
295 + * physical volume structures
300 - char id[2]; /* Identifier */
301 - unsigned short version; /* HM lvm version */
302 - lvm_disk_data_t pv_on_disk;
303 - lvm_disk_data_t vg_on_disk;
304 - lvm_disk_data_t pv_namelist_on_disk;
305 - lvm_disk_data_t lv_on_disk;
306 - lvm_disk_data_t pe_on_disk;
307 - char pv_name[NAME_LEN];
308 - char vg_name[NAME_LEN];
309 - char system_id[NAME_LEN]; /* for vgexport/vgimport */
313 - uint pv_allocatable;
314 - uint pv_size; /* HM */
319 - uint pe_stale; /* for future use */
320 - pe_disk_t *pe; /* HM */
321 - struct inode *inode; /* HM */
326 +typedef struct pv_v2 {
327 char id[2]; /* Identifier */
328 unsigned short version; /* HM lvm version */
329 lvm_disk_data_t pv_on_disk;
330 @@ -506,36 +454,17 @@
332 uint pe_stale; /* for future use */
333 pe_disk_t *pe; /* HM */
334 - struct inode *inode; /* HM */
335 + struct block_device *bd;
336 char pv_uuid[UUID_LEN+1];
340 + uint32_t pe_start; /* in sectors */
346 - uint8_t id[2]; /* Identifier */
347 - uint16_t version; /* HM lvm version */
348 - lvm_disk_data_t pv_on_disk;
349 - lvm_disk_data_t vg_on_disk;
350 - lvm_disk_data_t pv_namelist_on_disk;
351 - lvm_disk_data_t lv_on_disk;
352 - lvm_disk_data_t pe_on_disk;
353 - uint8_t pv_name[NAME_LEN];
354 - uint8_t vg_name[NAME_LEN];
355 - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */
357 - uint32_t pv_number;
358 - uint32_t pv_status;
359 - uint32_t pv_allocatable;
360 - uint32_t pv_size; /* HM */
364 - uint32_t pe_allocated;
369 +typedef struct pv_disk_v2 {
370 uint8_t id[2]; /* Identifier */
371 uint16_t version; /* HM lvm version */
372 lvm_disk_data_t pv_on_disk;
376 uint32_t pe_allocated;
379 + /* new in struct version 2 */
380 + uint32_t pe_start; /* in sectors */
386 @@ -565,17 +498,17 @@
387 /* core PE information */
390 - ulong pe; /* to be changed if > 2TB */
393 + uint32_t pe; /* to be changed if > 2TB */
399 char lv_name[NAME_LEN];
408 typedef struct lv_bmap {
413 -typedef struct lv_v4 {
414 +typedef struct lv_v5 {
415 char lv_name[NAME_LEN];
416 char vg_name[NAME_LEN];
421 /* delta to version 1 starts here */
422 - struct lv_v4 *lv_snapshot_org;
423 - struct lv_v4 *lv_snapshot_prev;
424 - struct lv_v4 *lv_snapshot_next;
425 + struct lv_v5 *lv_snapshot_org;
426 + struct lv_v5 *lv_snapshot_prev;
427 + struct lv_v5 *lv_snapshot_next;
428 lv_block_exception_t *lv_block_exception;
431 @@ -621,23 +554,23 @@
432 uint lv_snapshot_minor;
434 struct kiobuf *lv_iobuf;
435 - struct semaphore lv_snapshot_sem;
436 + struct kiobuf *lv_COW_table_iobuf;
437 + struct rw_semaphore lv_lock;
438 struct list_head *lv_snapshot_hash_table;
439 - ulong lv_snapshot_hash_table_size;
440 - ulong lv_snapshot_hash_mask;
441 - struct page *lv_COW_table_page;
442 + uint32_t lv_snapshot_hash_table_size;
443 + uint32_t lv_snapshot_hash_mask;
444 wait_queue_head_t lv_snapshot_wait;
445 int lv_snapshot_use_rate;
449 uint lv_allocated_snapshot_le;
458 +typedef struct lv_disk_v3 {
459 uint8_t lv_name[NAME_LEN];
460 uint8_t vg_name[NAME_LEN];
462 @@ -659,36 +592,14 @@
463 uint32_t lv_allocation;
464 uint32_t lv_io_timeout; /* for future use */
465 uint32_t lv_read_ahead; /* HM */
470 * Structure Volume Group (VG) Version 1
475 - char vg_name[NAME_LEN]; /* volume group name */
476 - uint vg_number; /* volume group number */
477 - uint vg_access; /* read/write */
478 - uint vg_status; /* active or not */
479 - uint lv_max; /* maximum logical volumes */
480 - uint lv_cur; /* current logical volumes */
481 - uint lv_open; /* open logical volumes */
482 - uint pv_max; /* maximum physical volumes */
483 - uint pv_cur; /* current physical volumes FU */
484 - uint pv_act; /* active physical volumes */
485 - uint dummy; /* was obsolete max_pe_per_pv */
486 - uint vgda; /* volume group descriptor arrays FU */
487 - uint pe_size; /* physical extent size in sectors */
488 - uint pe_total; /* total of physical extents */
489 - uint pe_allocated; /* allocated physical extents */
490 - uint pvg_total; /* physical volume groups FU */
491 - struct proc_dir_entry *proc;
492 - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */
493 - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */
497 +typedef struct vg_v3 {
498 char vg_name[NAME_LEN]; /* volume group name */
499 uint vg_number; /* volume group number */
500 uint vg_access; /* read/write */
501 @@ -716,30 +627,11 @@
511 - uint8_t vg_name[NAME_LEN]; /* volume group name */
512 - uint32_t vg_number; /* volume group number */
513 - uint32_t vg_access; /* read/write */
514 - uint32_t vg_status; /* active or not */
515 - uint32_t lv_max; /* maximum logical volumes */
516 - uint32_t lv_cur; /* current logical volumes */
517 - uint32_t lv_open; /* open logical volumes */
518 - uint32_t pv_max; /* maximum physical volumes */
519 - uint32_t pv_cur; /* current physical volumes FU */
520 - uint32_t pv_act; /* active physical volumes */
522 - uint32_t vgda; /* volume group descriptor arrays FU */
523 - uint32_t pe_size; /* physical extent size in sectors */
524 - uint32_t pe_total; /* total of physical extents */
525 - uint32_t pe_allocated; /* allocated physical extents */
526 - uint32_t pvg_total; /* physical volume groups FU */
530 +typedef struct vg_disk_v2 {
531 uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */
532 uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */
533 uint32_t vg_number; /* volume group number */
535 uint32_t pe_total; /* total of physical extents */
536 uint32_t pe_allocated; /* allocated physical extents */
537 uint32_t pvg_total; /* physical volume groups FU */
548 + uint32_t pv_offset;
554 /* Request structure LV_STATUS_BYINDEX */
559 /* Transfer size because user space and kernel space differ */
562 /* Request structure LV_STATUS_BYDEV... */
567 } lv_status_bydev_req_t;
572 } lv_snapshot_use_rate_req_t;
576 +/* useful inlines */
577 +static inline ulong round_up(ulong n, ulong size) {
579 + return (n + size) & ~size;
582 +static inline ulong div_up(ulong n, ulong size) {
583 + return round_up(n, size) / size;
586 +/* FIXME: nasty capital letters */
587 +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) {
588 + return vg->pe_size / lv->lv_chunk_size;
591 +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) {
592 + ulong chunks = vg->pe_size / lv->lv_chunk_size;
593 + ulong entry_size = sizeof(lv_COW_table_disk_t);
594 + ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE;
595 + ulong entries = (vg->pe_size * SECTOR_SIZE) /
596 + (entry_size + chunk_size);
601 + for(; entries; entries--)
602 + if((div_up(entries * entry_size, chunk_size) + entries) <=
610 #endif /* #ifndef _LVM_H_INCLUDE */
612 --- linux/drivers/md/lvm.c.orig Thu Oct 25 22:58:35 2001
613 +++ linux/drivers/md/lvm.c Tue Nov 13 09:46:52 2001
618 - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software
619 + * Copyright (C) 1997 - 2001 Heinz Mauelshagen, Sistina Software
621 * February-November 1997
622 * April-May,July-August,November 1998
623 * January-March,May,July,September,October 1999
624 * January,February,July,September-November 2000
626 + * January-April 2001
629 * LVM driver is free software; you can redistribute it and/or modify
631 * support for free (eg. longer) logical volume names
632 * 12/05/1998 - added spin_locks (thanks to Pascal van Dam
633 * <pascal@ramoth.xs4all.nl>)
634 - * 25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
635 + * 25/05/1998 - fixed handling of locked PEs in lvm_map() and
637 * 26/05/1998 - reactivated verify_area by access_ok
638 * 07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
639 * beyond 128/256 KB max allocation limit per call
641 * 14/02/2000 - support for 2.3.43
642 * - integrated Andrea Arcagneli's snapshot code
643 * 25/06/2000 - james (chip) , IKKHAYD! roffl
644 - * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume support
645 + * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
647 * 06/09/2000 - added devfs support
648 * 07/09/2000 - changed IOP version to 9
649 * - started to add new char ioctl LV_STATUS_BYDEV_T to support
650 @@ -148,28 +150,87 @@
651 * procfs is always supported now. (JT)
652 * 12/01/2001 - avoided flushing logical volume in case of shrinking
653 * because of unecessary overhead in case of heavy updates
654 - * 05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it
655 - * destroys stacking devices. call b_end_io on failed maps.
657 + * 25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
658 + * 31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
659 + * handled by the proper devices.
660 + * - If you try and BMAP a snapshot you now get an -EPERM
661 + * 01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
662 + * - factored __remap_snapshot out of lvm_map
663 + * 12/02/2001 - move devfs code to create VG before LVs
664 + * 13/02/2001 - allow VG_CREATE on /dev/lvm
665 + * 14/02/2001 - removed modversions.h
666 + * - tidied device defines for blk.h
667 + * - tidied debug statements
668 + * - bug: vg[] member not set back to NULL if activation fails
669 + * - more lvm_map tidying
670 + * 15/02/2001 - register /dev/lvm with devfs correctly (major/minor
672 + * 19/02/2001 - preallocated buffer_heads for rawio when using
674 + * 28/02/2001 - introduced the P_DEV macro and changed some internel
675 + * functions to be static [AD]
676 + * 28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
677 + * - fixed user address accessing bug in lvm_do_lv_create()
678 + * where the check for an existing LV takes place right at
680 + * 01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
681 + * 02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
683 + * and remove redundant lv_t variables from same.
684 + * - avoid compilation of lvm_dummy_device_request in case of
685 + * Linux >= 2.3.0 to avoid a warning
686 + * - added lvm_name argument to printk in buffer allocation
687 + * in order to avoid a warning
688 + * 04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
690 + * 05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
691 + * lvdisplay -v (PC)
692 + * - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
693 + * - added copying pe_t array in lvm_do_lv_status_bydev (HM)
694 + * - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
695 + * to copy the lv_block_exception_t array to userspace (HM)
696 + * 08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
697 + * removed obsolete lv_ptr->lv_COW_table_page initialization
698 + * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
699 + * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock
700 + * when the locking process closes.
701 + * 05/04/2001 - Defer writes to an extent that is being moved [JT]
702 + * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
703 + * lvm_map() in order to make stacking devices more happy (HM)
704 + * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
705 + * rw flag, instead WRITEA's are just dropped [JT]
706 + * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
707 + * than get_hardblocksize() call
708 + * 03/05/2001 - Use copy_to/from_user to preserve pointers in
709 + * lvm_do_status_by*
710 + * 11/05/2001 - avoid accesses to inactive snapshot data in
711 + * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
712 + * 28/05/2001 - implemented missing BLKSSZGET ioctl
713 + * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
714 + * locked. Make buffer queue flush not need locking.
715 + * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
716 + * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
717 + * the same hardsectsize (very likely) before scanning all LEs
718 + * in the LV each time. [AED]
719 + * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
720 + * 01/11/2001 - Backport read_ahead change from Linus kernel [AED]
724 +#include <linux/version.h>
726 -static char *lvm_version = "LVM version 0.9.1_beta2 by Heinz Mauelshagen (18/01/2001)\n";
727 -static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)";
729 -#define MAJOR_NR LVM_BLK_MAJOR
730 -#define DEVICE_OFF(device)
731 +#define MAJOR_NR LVM_BLK_MAJOR
732 +#define DEVICE_OFF(device)
733 +#define LOCAL_END_REQUEST
735 /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
736 /* #define LVM_VFS_ENHANCEMENT */
738 #include <linux/config.h>
739 -#include <linux/version.h>
740 #include <linux/module.h>
742 #include <linux/kernel.h>
743 #include <linux/vmalloc.h>
745 #include <linux/slab.h>
746 #include <linux/init.h>
749 #include <linux/blkdev.h>
750 #include <linux/genhd.h>
751 #include <linux/locks.h>
754 +#include <linux/devfs_fs_kernel.h>
755 #include <linux/smp_lock.h>
756 #include <asm/ioctl.h>
757 #include <asm/segment.h>
759 #include <linux/errno.h>
760 #include <linux/lvm.h>
762 -#include "lvm-snap.h"
763 +#include "lvm-internal.h"
765 #define LVM_CORRECT_READ_AHEAD(a) \
768 # define WRITEA WRITE
773 -#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args)
775 -#define P_IOCTL(fmt, args...)
779 -#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args)
781 -#define P_MAP(fmt, args...)
785 -#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args)
787 -#define P_KFREE(fmt, args...)
791 * External function prototypes
792 @@ -236,27 +282,14 @@
793 static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
794 static int lvm_blk_open(struct inode *, struct file *);
796 -static int lvm_chr_open(struct inode *, struct file *);
798 -static int lvm_chr_close(struct inode *, struct file *);
799 static int lvm_blk_close(struct inode *, struct file *);
800 +static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg);
801 static int lvm_user_bmap(struct inode *, struct lv_bmap *);
803 +static int lvm_chr_open(struct inode *, struct file *);
804 +static int lvm_chr_close(struct inode *, struct file *);
805 static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
807 -int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *);
808 -int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *);
809 -int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *);
810 -static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *);
812 -void lvm_do_create_devfs_entry_of_vg ( vg_t *);
814 -void lvm_do_create_proc_entry_of_vg ( vg_t *);
815 -void lvm_do_remove_proc_entry_of_vg ( vg_t *);
816 -void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *);
817 -void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *);
818 -void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *);
819 -void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *);
821 /* End external function prototypes */
823 @@ -288,34 +321,41 @@
825 static int lvm_do_pv_change(vg_t*, void*);
826 static int lvm_do_pv_status(vg_t *, void *);
827 +static int lvm_do_pv_flush(void *);
829 -static int lvm_do_vg_create(int, void *);
830 +static int lvm_do_vg_create(void *, int minor);
831 static int lvm_do_vg_extend(vg_t *, void *);
832 static int lvm_do_vg_reduce(vg_t *, void *);
833 static int lvm_do_vg_rename(vg_t *, void *);
834 static int lvm_do_vg_remove(int);
835 static void lvm_geninit(struct gendisk *);
836 -static char *lvm_show_uuid ( char *);
837 +static void __update_hardsectsize(lv_t *lv);
840 +static void _queue_io(struct buffer_head *bh, int rw);
841 +static struct buffer_head *_dequeue_io(void);
842 +static void _flush_io(struct buffer_head *bh);
844 +static int _open_pv(pv_t *pv);
845 +static void _close_pv(pv_t *pv);
847 +static unsigned long _sectors_to_k(unsigned long sect);
850 void lvm_hd_name(char *, int);
852 /* END Internal function prototypes */
855 -/* volume group descriptor area pointers */
856 -static vg_t *vg[ABS_MAX_VG];
858 +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
859 +ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
861 +const char *const lvm_name = LVM_NAME;
863 -static devfs_handle_t lvm_devfs_handle;
864 -static devfs_handle_t vg_devfs_handle[MAX_VG];
865 -static devfs_handle_t ch_devfs_handle[MAX_VG];
866 -static devfs_handle_t lv_devfs_handle[MAX_LV];
868 -static pv_t *pvp = NULL;
869 -static lv_t *lvp = NULL;
870 -static pe_t *pep = NULL;
871 -static pe_t *pep1 = NULL;
872 -static char *basename = NULL;
874 +/* volume group descriptor area pointers */
875 +vg_t *vg[ABS_MAX_VG];
877 /* map from block minor number to VG and LV numbers */
881 /* Request structures (lvm_chr_ioctl()) */
882 static pv_change_req_t pv_change_req;
883 -static pv_flush_req_t pv_flush_req;
884 static pv_status_req_t pv_status_req;
885 -static pe_lock_req_t pe_lock_req;
886 +volatile static pe_lock_req_t pe_lock_req;
887 static le_remap_req_t le_remap_req;
888 static lv_req_t lv_req;
890 @@ -339,36 +378,29 @@
892 static char pv_name[NAME_LEN];
893 /* static char rootvg[NAME_LEN] = { 0, }; */
894 -const char *const lvm_name = LVM_NAME;
896 -static int loadtime = 0;
897 +static int _lock_open_count = 0;
898 static uint vg_count = 0;
899 static long lvm_chr_open_count = 0;
900 -static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
901 static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
902 -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait);
904 static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
905 static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
907 -static struct proc_dir_entry *lvm_proc_dir = NULL;
908 -static struct proc_dir_entry *lvm_proc_vg_subdir = NULL;
909 -struct proc_dir_entry *pde = NULL;
910 +static struct buffer_head *_pe_requests;
911 +static DECLARE_RWSEM(_pe_lock);
913 -static struct file_operations lvm_chr_fops =
915 - owner: THIS_MODULE,
917 +struct file_operations lvm_chr_fops = {
919 release: lvm_chr_close,
920 ioctl: lvm_chr_ioctl,
924 /* block device operations structure needed for 2.3.38? and above */
925 -static struct block_device_operations lvm_blk_dops =
926 +struct block_device_operations lvm_blk_dops =
928 - owner: THIS_MODULE,
929 - open: lvm_blk_open,
930 + open: lvm_blk_open,
931 release: lvm_blk_close,
932 ioctl: lvm_blk_ioctl,
934 @@ -376,10 +408,10 @@
936 /* gendisk structures */
937 static struct hd_struct lvm_hd_struct[MAX_LV];
938 -static int lvm_blocksizes[MAX_LV] =
940 -static int lvm_size[MAX_LV] =
942 +static int lvm_blocksizes[MAX_LV];
943 +static int lvm_hardsectsizes[MAX_LV];
944 +static int lvm_size[MAX_LV];
946 static struct gendisk lvm_gendisk =
949 @@ -391,38 +423,33 @@
955 * Driver initialization...
959 - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
960 - printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
961 + if (devfs_register_chrdev(LVM_CHAR_MAJOR,
962 + lvm_name, &lvm_chr_fops) < 0) {
963 + printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
967 - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
968 + if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
970 - printk("%s -- register_blkdev failed\n", lvm_name);
971 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
972 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
973 + printk("%s -- devfs_register_blkdev failed\n", lvm_name);
974 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
976 + "%s -- devfs_unregister_chrdev failed\n",
981 - lvm_devfs_handle = devfs_register(
982 - 0 , "lvm", 0, 0, LVM_CHAR_MAJOR,
983 - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
984 - &lvm_chr_fops, NULL);
986 - lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root);
987 - if (lvm_proc_dir != NULL) {
988 - lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir);
989 - pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir);
990 - if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info;
995 lvm_geninit(&lvm_gendisk);
997 + /* insert our gendisk at the corresponding major */
998 add_gendisk(&lvm_gendisk);
1001 @@ -433,65 +460,64 @@
1002 blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
1005 + /* initialise the pe lock */
1006 + pe_lock_req.lock = UNLOCK_PE;
1008 /* optional read root VGDA */
1010 if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
1017 + printk(KERN_INFO "%s module loaded\n", lvm_version);
1020 + printk(KERN_INFO "%s\n", lvm_version);
1022 - " successfully initialized\n",
1023 - lvm_version, lvm_name);
1033 static void lvm_cleanup(void)
1035 - devfs_unregister (lvm_devfs_handle);
1036 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
1037 + printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
1039 + if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
1040 + printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
1043 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
1044 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
1046 - if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) {
1047 - printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
1051 + /* delete our gendisk from chain */
1052 del_gendisk(&lvm_gendisk);
1054 blk_size[MAJOR_NR] = NULL;
1055 blksize_size[MAJOR_NR] = NULL;
1056 hardsect_size[MAJOR_NR] = NULL;
1058 - remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
1059 - remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
1060 - remove_proc_entry(LVM_DIR, &proc_root);
1063 /* reference from linux/drivers/block/genhd.c */
1064 lvm_hd_name_ptr = NULL;
1067 + /* unregister with procfs and devfs */
1071 printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
1075 } /* lvm_cleanup() */
1079 * support function to initialize lvm variables
1081 -void __init lvm_init_vars(void)
1082 +static void __init lvm_init_vars(void)
1087 lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
1089 pe_lock_req.lock = UNLOCK_PE;
1090 - pe_lock_req.data.lv_dev = \
1091 - pe_lock_req.data.pv_dev = \
1092 + pe_lock_req.data.lv_dev = 0;
1093 + pe_lock_req.data.pv_dev = 0;
1094 pe_lock_req.data.pv_offset = 0;
1096 /* Initialize VG pointers */
1097 @@ -524,19 +550,18 @@
1099 ********************************************************************/
1101 +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
1102 + (mode) & FMODE_WRITE ? "WRITE" : ""
1105 * character device open routine
1107 -static int lvm_chr_open(struct inode *inode,
1108 - struct file *file)
1109 +static int lvm_chr_open(struct inode *inode, struct file *file)
1111 int minor = MINOR(inode->i_rdev);
1115 - "%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n",
1116 - lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
1118 + P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n",
1119 + minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
1121 /* super user validation */
1122 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1123 @@ -544,8 +569,15 @@
1124 /* Group special file open */
1125 if (VG_CHR(minor) > MAX_VG) return -ENXIO;
1127 + spin_lock(&lvm_lock);
1128 + if(lock == current->pid)
1129 + _lock_open_count++;
1130 + spin_unlock(&lvm_lock);
1132 lvm_chr_open_count++;
1134 + MOD_INC_USE_COUNT;
1137 } /* lvm_chr_open() */
1140 /* otherwise cc will complain about unused variables */
1143 - P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d "
1144 - "VG#: %d mode: 0x%X\n",
1145 - lvm_name, command, minor, VG_CHR(minor), file->f_mode);
1146 + P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n",
1147 + minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
1149 #ifdef LVM_TOTAL_RESET
1150 if (lvm_reset_spindown > 0) return -EACCES;
1151 @@ -619,9 +650,13 @@
1152 physical volume (move's done in user space's pvmove) */
1153 return lvm_do_pe_lock_unlock(vg_ptr,arg);
1156 + case VG_CREATE_OLD:
1158 - return lvm_do_vg_create(minor, arg);
1159 + return lvm_do_vg_create(arg, minor);
1162 + /* create a VGDA, assume VG number is filled in */
1163 + return lvm_do_vg_create(arg, -1);
1166 /* extend a volume group */
1170 case VG_STATUS_GET_NAMELIST:
1171 - /* get volume group count */
1172 + /* get volume group names */
1173 for (l = v = 0; v < ABS_MAX_VG; v++) {
1174 if (vg[v] != NULL) {
1175 if (copy_to_user(arg + l * NAME_LEN,
1179 case LV_STATUS_BYDEV:
1180 + /* get status of a logical volume by device */
1181 return lvm_do_lv_status_bydev(vg_ptr, arg);
1184 @@ -742,18 +778,12 @@
1187 /* physical volume buffer flush/invalidate */
1188 - if (copy_from_user(&pv_flush_req, arg,
1189 - sizeof(pv_flush_req)) != 0)
1192 - fsync_dev(pv_flush_req.pv_dev);
1193 - invalidate_buffers(pv_flush_req.pv_dev);
1195 + return lvm_do_pv_flush(arg);
1200 - "%s -- lvm_chr_ioctl: unknown command %x\n",
1201 + "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
1205 @@ -767,11 +797,8 @@
1207 static int lvm_chr_close(struct inode *inode, struct file *file)
1210 - int minor = MINOR(inode->i_rdev);
1212 - "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor));
1214 + P_DEV("chr_close MINOR: %d VG#: %d\n",
1215 + MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
1217 #ifdef LVM_TOTAL_RESET
1218 if (lvm_reset_spindown > 0) {
1219 @@ -781,10 +808,19 @@
1222 if (lvm_chr_open_count > 0) lvm_chr_open_count--;
1223 - if (lock == current->pid) {
1224 - lock = 0; /* release lock */
1225 - wake_up_interruptible(&lvm_wait);
1227 + spin_lock(&lvm_lock);
1228 + if(lock == current->pid) {
1229 + if(!_lock_open_count) {
1230 + P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
1232 + wake_up_interruptible(&lvm_wait);
1234 + _lock_open_count--;
1236 + spin_unlock(&lvm_lock);
1238 + MOD_DEC_USE_COUNT;
1241 } /* lvm_chr_close() */
1242 @@ -806,11 +842,8 @@
1244 vg_t *vg_ptr = vg[VG_BLK(minor)];
1246 -#ifdef DEBUG_LVM_BLK_OPEN
1248 - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n",
1249 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
1251 + P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n",
1252 + minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
1254 #ifdef LVM_TOTAL_RESET
1255 if (lvm_reset_spindown > 0)
1256 @@ -827,8 +860,12 @@
1257 if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM;
1259 /* Check inactive LV and open for read/write */
1260 - if (!(lv_ptr->lv_status & LV_ACTIVE))
1262 + /* We need to be able to "read" an inactive LV
1263 + to re-activate it again */
1264 + if ((file->f_mode & FMODE_WRITE) &&
1265 + (!(lv_ptr->lv_status & LV_ACTIVE)))
1268 if (!(lv_ptr->lv_access & LV_WRITE) &&
1269 (file->f_mode & FMODE_WRITE))
1271 @@ -838,12 +875,9 @@
1272 if (lv_ptr->lv_open == 0) vg_ptr->lv_open++;
1275 -#ifdef DEBUG_LVM_BLK_OPEN
1277 - "%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d size: %d\n",
1278 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
1281 + MOD_INC_USE_COUNT;
1283 + P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
1287 @@ -863,31 +897,34 @@
1288 void *arg = (void *) a;
1289 struct hd_geometry *hd = (struct hd_geometry *) a;
1291 - P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %X "
1292 - "VG#: %dl LV#: %d\n",
1293 - lvm_name, minor, command, (ulong) arg,
1294 - VG_BLK(minor), LV_BLK(minor));
1295 + P_IOCTL("blk MINOR: %d command: 0x%X arg: %p VG#: %d LV#: %d "
1296 + "mode: %s%s\n", minor, command, arg, VG_BLK(minor),
1297 + LV_BLK(minor), MODE_TO_STR(file->f_mode));
1301 + /* get block device sector size as needed e.g. by fdisk */
1302 + return put_user(lvm_sectsize(inode->i_rdev), (int *) arg);
1305 /* return device size */
1306 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
1307 - lvm_name, lv_ptr->lv_size);
1308 + P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size);
1309 if (put_user(lv_ptr->lv_size, (unsigned long *)arg))
1313 +#ifdef BLKGETSIZE64
1315 if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg))
1322 /* flush buffer cache */
1323 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1325 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
1326 + P_IOCTL("BLKFLSBUF\n");
1328 fsync_dev(inode->i_rdev);
1329 invalidate_buffers(inode->i_rdev);
1331 /* set read ahead for block device */
1332 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1334 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
1335 - lvm_name, (long) arg, MAJOR(inode->i_rdev), minor);
1336 + P_IOCTL("BLKRASET: %ld sectors for %s\n",
1337 + (long) arg, kdevname(inode->i_rdev));
1339 if ((long) arg < LVM_MIN_READ_AHEAD ||
1340 (long) arg > LVM_MAX_READ_AHEAD)
1344 /* get current read ahead setting */
1345 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
1346 + P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead);
1347 if (put_user(lv_ptr->lv_read_ahead, (long *)arg))
1350 @@ -937,10 +974,10 @@
1351 copy_to_user((long *) &hd->start, &start,
1352 sizeof(start)) != 0)
1356 - P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
1357 - lvm_name, lv_ptr->lv_size / heads / sectors);
1358 + P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
1359 + lvm_name, cylinders);
1364 @@ -979,40 +1016,11 @@
1367 case LV_SNAPSHOT_USE_RATE:
1368 - if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM;
1370 - lv_snapshot_use_rate_req_t lv_snapshot_use_rate_req;
1372 - if (copy_from_user(&lv_snapshot_use_rate_req, arg,
1373 - sizeof(lv_snapshot_use_rate_req_t)))
1375 - if (lv_snapshot_use_rate_req.rate < 0 ||
1376 - lv_snapshot_use_rate_req.rate > 100) return -EFAULT;
1378 - switch (lv_snapshot_use_rate_req.block)
1381 - lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate;
1382 - if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate)
1383 - interruptible_sleep_on (&lv_ptr->lv_snapshot_wait);
1392 - lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end;
1393 - if (copy_to_user(arg, &lv_snapshot_use_rate_req,
1394 - sizeof(lv_snapshot_use_rate_req_t)))
1398 + return lvm_get_snapshot_use_rate(lv_ptr, arg);
1402 - "%s -- lvm_blk_ioctl: unknown command %d\n",
1403 + "%s -- lvm_blk_ioctl: unknown command 0x%x\n",
1407 @@ -1030,18 +1038,49 @@
1408 vg_t *vg_ptr = vg[VG_BLK(minor)];
1409 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1413 - "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n",
1414 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
1416 + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n",
1417 + minor, VG_BLK(minor), LV_BLK(minor));
1419 if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
1422 + MOD_DEC_USE_COUNT;
1425 } /* lvm_blk_close() */
1427 +static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg)
1429 + lv_snapshot_use_rate_req_t lv_rate_req;
1431 + if (!(lv->lv_access & LV_SNAPSHOT))
1434 + if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1437 + if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1440 + switch (lv_rate_req.block) {
1442 + lv->lv_snapshot_use_rate = lv_rate_req.rate;
1443 + if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1444 + lv->lv_snapshot_use_rate)
1445 + interruptible_sleep_on(&lv->lv_snapshot_wait);
1454 + lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1456 + return copy_to_user(arg, &lv_rate_req,
1457 + sizeof(lv_rate_req)) ? -EFAULT : 0;
1460 static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1462 @@ -1056,6 +1095,7 @@
1463 bh.b_blocknr = block;
1464 bh.b_dev = bh.b_rdev = inode->i_rdev;
1465 bh.b_size = lvm_get_blksize(bh.b_dev);
1466 + bh.b_rsector = block * (bh.b_size >> 9);
1467 if ((err=lvm_map(&bh, READ)) < 0) {
1468 printk("lvm map failed: %d\n", err);
1470 @@ -1063,562 +1103,206 @@
1472 return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1473 put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
1480 - * provide VG info for proc filesystem use (global)
1481 + * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1482 + * (see init_module/lvm_init)
1484 -int lvm_vg_info(vg_t *vg_ptr, char *buf) {
1486 - char inactive_flag = ' ';
1488 - if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I';
1490 - "\nVG: %c%s [%d PV, %d LV/%d open] "
1491 - " PE Size: %d KB\n"
1492 - " Usage [KB/PE]: %d /%d total "
1493 - "%d /%d used %d /%d free",
1499 - vg_ptr->pe_size >> 1,
1500 - vg_ptr->pe_size * vg_ptr->pe_total >> 1,
1502 - vg_ptr->pe_allocated * vg_ptr->pe_size >> 1,
1503 - vg_ptr->pe_allocated,
1504 - (vg_ptr->pe_total - vg_ptr->pe_allocated) *
1505 - vg_ptr->pe_size >> 1,
1506 - vg_ptr->pe_total - vg_ptr->pe_allocated);
1509 +static void __remap_snapshot(kdev_t rdev, ulong rsector,
1510 + ulong pe_start, lv_t *lv, vg_t *vg) {
1512 + /* copy a chunk from the origin to a snapshot device */
1513 + down_write(&lv->lv_lock);
1516 - * provide LV info for proc filesystem use (global)
1518 -int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
1520 - char inactive_flag = 'A', allocation_flag = ' ',
1521 - stripes_flag = ' ', rw_flag = ' ';
1523 - if (!(lv_ptr->lv_status & LV_ACTIVE))
1524 - inactive_flag = 'I';
1526 - if (lv_ptr->lv_access & LV_WRITE)
1528 - allocation_flag = 'D';
1529 - if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
1530 - allocation_flag = 'C';
1531 - stripes_flag = 'L';
1532 - if (lv_ptr->lv_stripes > 1)
1533 - stripes_flag = 'S';
1534 - sz += sprintf(buf+sz,
1540 - if (lv_ptr->lv_stripes > 1)
1541 - sz += sprintf(buf+sz, "%-2d",
1542 - lv_ptr->lv_stripes);
1544 - sz += sprintf(buf+sz, " ");
1545 - basename = strrchr(lv_ptr->lv_name, '/');
1546 - if ( basename == 0) basename = lv_ptr->lv_name;
1548 - sz += sprintf(buf+sz, "] %-25s", basename);
1549 - if (strlen(basename) > 25)
1550 - sz += sprintf(buf+sz,
1552 - sz += sprintf(buf+sz, "%9d /%-6d ",
1553 - lv_ptr->lv_size >> 1,
1554 - lv_ptr->lv_size / vg_ptr->pe_size);
1555 + /* we must redo lvm_snapshot_remap_block in order to avoid a
1556 + race condition in the gap where no lock was held */
1557 + if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1558 + !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1559 + lvm_write_COW_table_block(vg, lv);
1561 - if (lv_ptr->lv_open == 0)
1562 - sz += sprintf(buf+sz, "close");
1564 - sz += sprintf(buf+sz, "%dx open",
1568 + up_write(&lv->lv_lock);
1573 - * provide PV info for proc filesystem use (global)
1575 -int lvm_pv_info(pv_t *pv_ptr, char *buf) {
1577 - char inactive_flag = 'A', allocation_flag = ' ';
1578 - char *pv_name = NULL;
1580 - if (!(pv_ptr->pv_status & PV_ACTIVE))
1581 - inactive_flag = 'I';
1582 - allocation_flag = 'A';
1583 - if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE))
1584 - allocation_flag = 'N';
1585 - pv_name = strrchr(pv_ptr->pv_name+1,'/');
1586 - if ( pv_name == 0) pv_name = pv_ptr->pv_name;
1589 - "[%c%c] %-21s %8d /%-6d "
1590 - "%8d /%-6d %8d /%-6d",
1594 - pv_ptr->pe_total *
1595 - pv_ptr->pe_size >> 1,
1597 - pv_ptr->pe_allocated *
1598 - pv_ptr->pe_size >> 1,
1599 - pv_ptr->pe_allocated,
1600 - (pv_ptr->pe_total -
1601 - pv_ptr->pe_allocated) *
1602 - pv_ptr->pe_size >> 1,
1603 - pv_ptr->pe_total -
1604 - pv_ptr->pe_allocated);
1606 +static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1607 + ulong pe_start, lv_t *lv, vg_t *vg) {
1610 + /* check to see if this chunk is already in the snapshot */
1611 + down_read(&lv->lv_lock);
1612 + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1613 + up_read(&lv->lv_lock);
1616 + /* we haven't yet copied this block to the snapshot */
1617 + __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1622 - * Support functions /proc-Filesystem
1623 + * extents destined for a pe that is on the move should be deferred
1625 +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1626 + return ((pe_lock_req.lock == LOCK_PE) &&
1627 + (pv == pe_lock_req.data.pv_dev) &&
1628 + (sector >= pe_lock_req.data.pv_offset) &&
1629 + (sector < (pe_lock_req.data.pv_offset + pe_size)));
1632 -#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz])
1635 - * provide global LVM information
1637 -static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data)
1638 +static inline int _defer_extent(struct buffer_head *bh, int rw,
1639 + kdev_t pv, ulong sector, uint32_t pe_size)
1641 - int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
1642 - lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds;
1645 - static char *buf = NULL;
1646 - static char dummy_buf[160]; /* sized for 2 lines */
1652 -#ifdef DEBUG_LVM_PROC_GET_INFO
1654 - "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n",
1655 - lvm_name, pos, count, whence);
1658 - MOD_INC_USE_COUNT;
1660 - if (pos == 0 || buf == NULL) {
1661 - sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
1662 - lv_open_total = pe_t_bytes = hash_table_bytes = \
1663 - lv_block_exception_t_bytes = 0;
1665 - /* search for activity */
1666 - for (v = 0; v < ABS_MAX_VG; v++) {
1667 - if ((vg_ptr = vg[v]) != NULL) {
1669 - pv_counter += vg_ptr->pv_cur;
1670 - lv_counter += vg_ptr->lv_cur;
1671 - if (vg_ptr->lv_cur > 0) {
1672 - for (l = 0; l < vg[v]->lv_max; l++) {
1673 - if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
1674 - pe_t_bytes += lv_ptr->lv_allocated_le;
1675 - hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size;
1676 - if (lv_ptr->lv_block_exception != NULL)
1677 - lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
1678 - if (lv_ptr->lv_open > 0) {
1679 - lv_open_counter++;
1680 - lv_open_total += lv_ptr->lv_open;
1687 - pe_t_bytes *= sizeof(pe_t);
1688 - lv_block_exception_t_bytes *= sizeof(lv_block_exception_t);
1690 - if (buf != NULL) {
1691 - P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__);
1697 - /* 2 times: first to get size to allocate buffer,
1698 - 2nd to fill the malloced buffer */
1699 - for (i = 0; i < 2; i++) {
1701 - sz += sprintf(LVM_PROC_BUF,
1709 - "Total: %d VG%s %d PV%s %d LV%s ",
1710 - lvm_short_version,
1711 - vg_counter, vg_counter == 1 ? "" : "s",
1712 - pv_counter, pv_counter == 1 ? "" : "s",
1713 - lv_counter, lv_counter == 1 ? "" : "s");
1714 - sz += sprintf(LVM_PROC_BUF,
1717 - lv_open_counter == 1 ? "" : "s");
1718 - if (lv_open_total > 0)
1719 - sz += sprintf(LVM_PROC_BUF,
1723 - sz += sprintf(LVM_PROC_BUF, ")");
1724 - sz += sprintf(LVM_PROC_BUF,
1725 - "\nGlobal: %lu bytes malloced IOP version: %d ",
1726 - vg_counter * sizeof(vg_t) +
1727 - pv_counter * sizeof(pv_t) +
1728 - lv_counter * sizeof(lv_t) +
1729 - pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last,
1732 - seconds = CURRENT_TIME - loadtime;
1734 - loadtime = CURRENT_TIME + seconds;
1735 - if (seconds / 86400 > 0) {
1736 - sz += sprintf(LVM_PROC_BUF, "%d day%s ",
1738 - seconds / 86400 == 0 ||
1739 - seconds / 86400 > 1 ? "s" : "");
1741 - sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n",
1742 - (seconds % 86400) / 3600,
1743 - (seconds % 3600) / 60,
1746 - if (vg_counter > 0) {
1747 - for (v = 0; v < ABS_MAX_VG; v++) {
1748 - /* volume group */
1749 - if ((vg_ptr = vg[v]) != NULL) {
1750 - sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF);
1752 - /* physical volumes */
1753 - sz += sprintf(LVM_PROC_BUF,
1755 - vg_ptr->pv_cur == 1 ? ": " : "s:");
1757 - for (p = 0; p < vg_ptr->pv_max; p++) {
1758 - if ((pv_ptr = vg_ptr->pv[p]) != NULL) {
1759 - sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF);
1762 - if (c < vg_ptr->pv_cur)
1763 - sz += sprintf(LVM_PROC_BUF,
1768 - /* logical volumes */
1769 - sz += sprintf(LVM_PROC_BUF,
1771 - vg_ptr->lv_cur == 1 ? ": " : "s:");
1773 - for (l = 0; l < vg_ptr->lv_max; l++) {
1774 - if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
1775 - sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF);
1777 - if (c < vg_ptr->lv_cur)
1778 - sz += sprintf(LVM_PROC_BUF,
1782 - if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none");
1783 - sz += sprintf(LVM_PROC_BUF, "\n");
1787 - if (buf == NULL) {
1789 - buf = vmalloc(sz);
1791 - if (buf == NULL) {
1793 - MOD_DEC_USE_COUNT;
1794 - return sprintf(page, "%s - vmalloc error at line %d\n",
1795 - lvm_name, __LINE__);
1799 + if (pe_lock_req.lock == LOCK_PE) {
1800 + down_read(&_pe_lock);
1801 + if (_should_defer(pv, sector, pe_size)) {
1802 + up_read(&_pe_lock);
1803 + down_write(&_pe_lock);
1804 + if (_should_defer(pv, sector, pe_size))
1805 + _queue_io(bh, rw);
1806 + up_write(&_pe_lock);
1809 + up_read(&_pe_lock);
1811 - MOD_DEC_USE_COUNT;
1812 - if (pos > sz - 1) {
1819 - *start = &buf[pos];
1820 - if (sz - pos < count)
1824 -} /* lvm_proc_get_global_info() */
1828 - * provide VG information
1830 -int lvm_proc_read_vg_info(char *page, char **start, off_t off,
1831 - int count, int *eof, void *data) {
1835 - sz += sprintf ( page+sz, "name: %s\n", vg->vg_name);
1836 - sz += sprintf ( page+sz, "size: %u\n",
1837 - vg->pe_total * vg->pe_size / 2);
1838 - sz += sprintf ( page+sz, "access: %u\n", vg->vg_access);
1839 - sz += sprintf ( page+sz, "status: %u\n", vg->vg_status);
1840 - sz += sprintf ( page+sz, "number: %u\n", vg->vg_number);
1841 - sz += sprintf ( page+sz, "LV max: %u\n", vg->lv_max);
1842 - sz += sprintf ( page+sz, "LV current: %u\n", vg->lv_cur);
1843 - sz += sprintf ( page+sz, "LV open: %u\n", vg->lv_open);
1844 - sz += sprintf ( page+sz, "PV max: %u\n", vg->pv_max);
1845 - sz += sprintf ( page+sz, "PV current: %u\n", vg->pv_cur);
1846 - sz += sprintf ( page+sz, "PV active: %u\n", vg->pv_act);
1847 - sz += sprintf ( page+sz, "PE size: %u\n", vg->pe_size / 2);
1848 - sz += sprintf ( page+sz, "PE total: %u\n", vg->pe_total);
1849 - sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated);
1850 - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(vg->vg_uuid));
1857 - * provide LV information
1859 -int lvm_proc_read_lv_info(char *page, char **start, off_t off,
1860 - int count, int *eof, void *data) {
1864 - sz += sprintf ( page+sz, "name: %s\n", lv->lv_name);
1865 - sz += sprintf ( page+sz, "size: %u\n", lv->lv_size);
1866 - sz += sprintf ( page+sz, "access: %u\n", lv->lv_access);
1867 - sz += sprintf ( page+sz, "status: %u\n", lv->lv_status);
1868 - sz += sprintf ( page+sz, "number: %u\n", lv->lv_number);
1869 - sz += sprintf ( page+sz, "open: %u\n", lv->lv_open);
1870 - sz += sprintf ( page+sz, "allocation: %u\n", lv->lv_allocation);
1871 - sz += sprintf ( page+sz, "device: %02u:%02u\n",
1872 - MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
1879 - * provide PV information
1881 -int lvm_proc_read_pv_info(char *page, char **start, off_t off,
1882 - int count, int *eof, void *data) {
1886 - sz += sprintf ( page+sz, "name: %s\n", pv->pv_name);
1887 - sz += sprintf ( page+sz, "size: %u\n", pv->pv_size);
1888 - sz += sprintf ( page+sz, "status: %u\n", pv->pv_status);
1889 - sz += sprintf ( page+sz, "number: %u\n", pv->pv_number);
1890 - sz += sprintf ( page+sz, "allocatable: %u\n", pv->pv_allocatable);
1891 - sz += sprintf ( page+sz, "LV current: %u\n", pv->lv_cur);
1892 - sz += sprintf ( page+sz, "PE size: %u\n", pv->pe_size / 2);
1893 - sz += sprintf ( page+sz, "PE total: %u\n", pv->pe_total);
1894 - sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated);
1895 - sz += sprintf ( page+sz, "device: %02u:%02u\n",
1896 - MAJOR(pv->pv_dev), MINOR(pv->pv_dev));
1897 - sz += sprintf ( page+sz, "uuid: %s\n", lvm_show_uuid(pv->pv_uuid));
1906 - * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1907 - * (see init_module/lvm_init)
1909 static int lvm_map(struct buffer_head *bh, int rw)
1911 int minor = MINOR(bh->b_rdev);
1915 ulong size = bh->b_size >> 9;
1916 - ulong rsector_tmp = bh->b_rsector;
1917 - ulong rsector_sav;
1918 - kdev_t rdev_tmp = bh->b_rdev;
1920 + ulong rsector_org = bh->b_rsector;
1921 + ulong rsector_map;
1923 vg_t *vg_this = vg[VG_BLK(minor)];
1924 lv_t *lv = vg_this->lv[LV_BLK(minor)];
1927 + down_read(&lv->lv_lock);
1928 if (!(lv->lv_status & LV_ACTIVE)) {
1930 "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1931 lvm_name, lv->lv_name);
1936 if ((rw == WRITE || rw == WRITEA) &&
1937 !(lv->lv_access & LV_WRITE)) {
1939 - "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1940 + "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1941 lvm_name, lv->lv_name);
1946 - P_MAP("%s - lvm_map minor:%d *rdev: %02d:%02d *rsector: %lu "
1948 + P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
1952 - rsector_tmp, size);
1953 + kdevname(bh->b_rdev),
1954 + rsector_org, size);
1956 - if (rsector_tmp + size > lv->lv_size) {
1957 + if (rsector_org + size > lv->lv_size) {
1959 "%s - lvm_map access beyond end of device; *rsector: "
1960 "%lu or size: %lu wrong for minor: %2d\n",
1961 - lvm_name, rsector_tmp, size, minor);
1963 + lvm_name, rsector_org, size, minor);
1966 - rsector_sav = rsector_tmp;
1967 - rdev_sav = rdev_tmp;
1970 - /* linear mapping */
1971 - if (lv->lv_stripes < 2) {
1973 + if (lv->lv_stripes < 2) { /* linear mapping */
1975 - index = rsector_tmp / vg_this->pe_size;
1976 + index = rsector_org / vg_this->pe_size;
1977 pe_start = lv->lv_current_pe[index].pe;
1978 - rsector_tmp = lv->lv_current_pe[index].pe +
1979 - (rsector_tmp % vg_this->pe_size);
1980 - rdev_tmp = lv->lv_current_pe[index].dev;
1982 - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d "
1985 - lv->lv_current_pe[index].pe,
1989 + rsector_map = lv->lv_current_pe[index].pe +
1990 + (rsector_org % vg_this->pe_size);
1991 + rdev_map = lv->lv_current_pe[index].dev;
1993 + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n",
1994 + index, lv->lv_current_pe[index].pe,
1995 + kdevname(rdev_map), rsector_map);
1997 - /* striped mapping */
1999 + } else { /* striped mapping */
2001 ulong stripe_length;
2003 stripe_length = vg_this->pe_size * lv->lv_stripes;
2004 - stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize;
2005 - index = rsector_tmp / stripe_length +
2006 - (stripe_index % lv->lv_stripes) *
2007 - (lv->lv_allocated_le / lv->lv_stripes);
2008 + stripe_index = (rsector_org % stripe_length) /
2009 + lv->lv_stripesize;
2010 + index = rsector_org / stripe_length +
2011 + (stripe_index % lv->lv_stripes) *
2012 + (lv->lv_allocated_le / lv->lv_stripes);
2013 pe_start = lv->lv_current_pe[index].pe;
2014 - rsector_tmp = lv->lv_current_pe[index].pe +
2015 - (rsector_tmp % stripe_length) -
2016 - (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
2017 - stripe_index / lv->lv_stripes *
2018 - (lv->lv_stripes - 1) * lv->lv_stripesize;
2019 - rdev_tmp = lv->lv_current_pe[index].dev;
2022 - P_MAP("lv_current_pe[%ld].pe: %ld rdev: %02d:%02d rsector:%ld\n"
2023 - "stripe_length: %ld stripe_index: %ld\n",
2025 - lv->lv_current_pe[index].pe,
2031 + rsector_map = lv->lv_current_pe[index].pe +
2032 + (rsector_org % stripe_length) -
2033 + (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
2034 + stripe_index / lv->lv_stripes *
2035 + (lv->lv_stripes - 1) * lv->lv_stripesize;
2036 + rdev_map = lv->lv_current_pe[index].dev;
2038 + P_MAP("lv_current_pe[%ld].pe: %d rdev: %s rsector:%ld\n"
2039 + "stripe_length: %ld stripe_index: %ld\n",
2040 + index, lv->lv_current_pe[index].pe, kdevname(rdev_map),
2041 + rsector_map, stripe_length, stripe_index);
2045 + * Queue writes to physical extents on the move until move completes.
2046 + * Don't get _pe_lock until there is a reasonable expectation that
2047 + * we need to queue this request, because this is in the fast path.
2049 + if (rw == WRITE || rw == WRITEA) {
2050 + if(_defer_extent(bh, rw, rdev_map,
2051 + rsector_map, vg_this->pe_size)) {
2053 - /* handle physical extents on the move */
2054 - if (pe_lock_req.lock == LOCK_PE) {
2055 - if (rdev_tmp == pe_lock_req.data.pv_dev &&
2056 - rsector_tmp >= pe_lock_req.data.pv_offset &&
2057 - rsector_tmp < (pe_lock_req.data.pv_offset +
2058 - vg_this->pe_size)) {
2059 - sleep_on(&lvm_map_wait);
2060 - rsector_tmp = rsector_sav;
2061 - rdev_tmp = rdev_sav;
2062 - goto lvm_second_remap;
2063 + up_read(&lv->lv_lock);
2068 - if (rw == WRITE || rw == WRITEA)
2069 - lv->lv_current_pe[index].writes++;
2071 - lv->lv_current_pe[index].reads++;
2073 + lv->lv_current_pe[index].writes++; /* statistic */
2075 + lv->lv_current_pe[index].reads++; /* statistic */
2077 /* snapshot volume exception handling on physical device address base */
2078 - if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) {
2079 - /* original logical volume */
2080 - if (lv->lv_access & LV_SNAPSHOT_ORG) {
2081 - /* Serializes the access to the lv_snapshot_next list */
2082 - down(&lv->lv_snapshot_sem);
2083 - if (rw == WRITE || rw == WRITEA)
2087 - /* start with first snapshot and loop thrugh all of them */
2088 - for (lv_ptr = lv->lv_snapshot_next;
2090 - lv_ptr = lv_ptr->lv_snapshot_next) {
2091 - /* Check for inactive snapshot */
2092 - if (!(lv_ptr->lv_status & LV_ACTIVE)) continue;
2093 - /* Serializes the COW with the accesses to the snapshot device */
2094 - down(&lv_ptr->lv_snapshot_sem);
2095 - /* do we still have exception storage for this snapshot free? */
2096 - if (lv_ptr->lv_block_exception != NULL) {
2097 - rdev_sav = rdev_tmp;
2098 - rsector_sav = rsector_tmp;
2099 - if (!lvm_snapshot_remap_block(&rdev_tmp,
2103 - /* create a new mapping */
2104 - if (!(ret = lvm_snapshot_COW(rdev_tmp,
2109 - ret = lvm_write_COW_table_block(vg_this,
2112 - rdev_tmp = rdev_sav;
2113 - rsector_tmp = rsector_sav;
2115 - up(&lv_ptr->lv_snapshot_sem);
2118 - up(&lv->lv_snapshot_sem);
2120 - /* remap snapshot logical volume */
2121 - down(&lv->lv_snapshot_sem);
2122 - if (lv->lv_block_exception != NULL)
2123 - lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv);
2124 - up(&lv->lv_snapshot_sem);
2125 + if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
2128 + if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
2129 + if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
2130 + pe_start, lv) < 0)
2133 + } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
2136 + /* start with first snapshot and loop through all of
2138 + for (snap = lv->lv_snapshot_next; snap;
2139 + snap = snap->lv_snapshot_next) {
2140 + /* Check for inactive snapshot */
2141 + if (!(snap->lv_status & LV_ACTIVE))
2144 + /* Serializes the COW with the accesses to the
2145 + snapshot device */
2146 + _remap_snapshot(rdev_map, rsector_map,
2147 + pe_start, snap, vg_this);
2150 - bh->b_rdev = rdev_tmp;
2151 - bh->b_rsector = rsector_tmp;
2155 + bh->b_rdev = rdev_map;
2156 + bh->b_rsector = rsector_map;
2157 + up_read(&lv->lv_lock);
2161 + buffer_IO_error(bh);
2162 + up_read(&lv->lv_lock);
2167 @@ -1646,6 +1330,8 @@
2174 * make request function
2176 @@ -1653,11 +1339,7 @@
2178 struct buffer_head *bh)
2180 - if (lvm_map(bh, rw) >= 0)
2183 - buffer_IO_error(bh);
2185 + return (lvm_map(bh, rw) <= 0) ? 0 : 1;
2189 @@ -1674,8 +1356,7 @@
2191 spin_lock(&lvm_lock);
2192 if (lock != 0 && lock != current->pid) {
2193 - P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n",
2195 + P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
2196 spin_unlock(&lvm_lock);
2197 interruptible_sleep_on(&lvm_wait);
2198 if (current->sigpending != 0)
2199 @@ -1687,6 +1368,7 @@
2200 goto lock_try_again;
2202 lock = current->pid;
2203 + P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
2204 spin_unlock(&lvm_lock);
2206 } /* lvm_do_lock_lvm */
2207 @@ -1697,33 +1379,60 @@
2209 static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
2211 + pe_lock_req_t new_lock;
2212 + struct buffer_head *bh;
2215 if (vg_ptr == NULL) return -ENXIO;
2216 - if (copy_from_user(&pe_lock_req, arg,
2217 - sizeof(pe_lock_req_t)) != 0) return -EFAULT;
2218 + if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
2221 - switch (pe_lock_req.lock) {
2222 + switch (new_lock.lock) {
2224 for (p = 0; p < vg_ptr->pv_max; p++) {
2225 if (vg_ptr->pv[p] != NULL &&
2226 - pe_lock_req.data.pv_dev ==
2227 - vg_ptr->pv[p]->pv_dev)
2228 + new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
2231 if (p == vg_ptr->pv_max) return -ENXIO;
2233 - pe_lock_req.lock = UNLOCK_PE;
2235 + * this sync releaves memory pressure to lessen the
2236 + * likelyhood of pvmove being paged out - resulting in
2239 + * This method of doing a pvmove is broken
2241 fsync_dev(pe_lock_req.data.lv_dev);
2243 + down_write(&_pe_lock);
2244 + if (pe_lock_req.lock == LOCK_PE) {
2245 + up_write(&_pe_lock);
2249 + /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
2250 pe_lock_req.lock = LOCK_PE;
2251 + pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
2252 + pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
2253 + pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
2254 + up_write(&_pe_lock);
2256 + /* some requests may have got through since the fsync */
2257 + fsync_dev(pe_lock_req.data.pv_dev);
2261 + down_write(&_pe_lock);
2262 pe_lock_req.lock = UNLOCK_PE;
2263 - pe_lock_req.data.lv_dev = \
2264 - pe_lock_req.data.pv_dev = \
2265 + pe_lock_req.data.lv_dev = 0;
2266 + pe_lock_req.data.pv_dev = 0;
2267 pe_lock_req.data.pv_offset = 0;
2268 - wake_up(&lvm_map_wait);
2269 + bh = _dequeue_io();
2270 + up_write(&_pe_lock);
2272 + /* handle all deferred io for this PE */
2277 @@ -1760,6 +1469,8 @@
2278 le_remap_req.new_dev;
2279 lv_ptr->lv_current_pe[le].pe =
2280 le_remap_req.new_pe;
2282 + __update_hardsectsize(lv_ptr);
2286 @@ -1773,7 +1484,7 @@
2288 * character device support function VGDA create
2290 -int lvm_do_vg_create(int minor, void *arg)
2291 +static int lvm_do_vg_create(void *arg, int minor)
2294 ulong l, ls = 0, p, size;
2295 @@ -1781,8 +1492,6 @@
2299 - if (vg[VG_CHR(minor)] != NULL) return -EPERM;
2301 if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
2303 "%s -- VG_CREATE: kmalloc error VG at line %d\n",
2304 @@ -1791,35 +1500,51 @@
2306 /* get the volume group structure */
2307 if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
2308 + P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
2309 + arg, sizeof(vg_t));
2314 + /* VG_CREATE now uses minor number in VG structure */
2315 + if (minor == -1) minor = vg_ptr->vg_number;
2318 + if (vg[VG_CHR(minor)] != NULL) {
2319 + P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
2324 /* we are not that active so far... */
2325 vg_ptr->vg_status &= ~VG_ACTIVE;
2326 - vg[VG_CHR(minor)] = vg_ptr;
2327 - vg[VG_CHR(minor)]->pe_allocated = 0;
2328 + vg_ptr->pe_allocated = 0;
2330 if (vg_ptr->pv_max > ABS_MAX_PV) {
2332 "%s -- Can't activate VG: ABS_MAX_PV too small\n",
2335 - vg[VG_CHR(minor)] = NULL;
2339 if (vg_ptr->lv_max > ABS_MAX_LV) {
2341 "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
2342 lvm_name, vg_ptr->lv_max);
2348 + /* create devfs and procfs entries */
2349 + lvm_fs_create_vg(vg_ptr);
2351 + vg[VG_CHR(minor)] = vg_ptr;
2353 /* get the physical volume structures */
2354 vg_ptr->pv_act = vg_ptr->pv_cur = 0;
2355 for (p = 0; p < vg_ptr->pv_max; p++) {
2357 /* user space address */
2358 if ((pvp = vg_ptr->pv[p]) != NULL) {
2359 ret = lvm_do_pv_create(pvp, vg_ptr, p);
2360 @@ -1843,9 +1568,12 @@
2361 /* get the logical volume structures */
2363 for (l = 0; l < vg_ptr->lv_max; l++) {
2365 /* user space address */
2366 if ((lvp = vg_ptr->lv[l]) != NULL) {
2367 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
2368 + P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
2369 + lvp, sizeof(lv_t));
2370 lvm_do_vg_remove(minor);
2373 @@ -1864,12 +1592,10 @@
2377 - lvm_do_create_devfs_entry_of_vg ( vg_ptr);
2379 /* Second path to correct snapshot logical volumes which are not
2380 in place during first path above */
2381 for (l = 0; l < ls; l++) {
2382 - lvp = snap_lv_ptr[l];
2383 + lv_t *lvp = snap_lv_ptr[l];
2384 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
2385 lvm_do_vg_remove(minor);
2387 @@ -1880,8 +1606,6 @@
2391 - lvm_do_create_proc_entry_of_vg ( vg_ptr);
2396 @@ -1913,7 +1637,6 @@
2397 if ( ret != 0) return ret;
2398 pv_ptr = vg_ptr->pv[p];
2399 vg_ptr->pe_total += pv_ptr->pe_total;
2400 - lvm_do_create_proc_entry_of_pv(vg_ptr, pv_ptr);
2404 @@ -1963,10 +1686,13 @@
2405 lv_t *lv_ptr = NULL;
2406 pv_t *pv_ptr = NULL;
2408 + /* If the VG doesn't exist in the kernel then just exit */
2409 + if (!vg_ptr) return 0;
2411 if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
2414 - lvm_do_remove_proc_entry_of_vg ( vg_ptr);
2415 + lvm_fs_remove_vg(vg_ptr);
2417 strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1);
2418 for ( l = 0; l < vg_ptr->lv_max; l++)
2419 @@ -1988,7 +1714,7 @@
2420 strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
2423 - lvm_do_create_proc_entry_of_vg ( vg_ptr);
2424 + lvm_fs_create_vg(vg_ptr);
2427 } /* lvm_do_vg_rename */
2428 @@ -2015,6 +1741,9 @@
2429 /* let's go inactive */
2430 vg_ptr->vg_status &= ~VG_ACTIVE;
2432 + /* remove from procfs and devfs */
2433 + lvm_fs_remove_vg(vg_ptr);
2436 /* first free snapshot logical volumes */
2437 for (i = 0; i < vg_ptr->lv_max; i++) {
2438 @@ -2042,11 +1771,6 @@
2442 - devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]);
2443 - devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]);
2445 - lvm_do_remove_proc_entry_of_vg ( vg_ptr);
2447 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2449 vg[VG_CHR(minor)] = NULL;
2450 @@ -2063,66 +1787,138 @@
2451 * character device support function physical volume create
2453 static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
2454 - pv_t *pv_ptr = NULL;
2458 - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL);
2459 - if (pv_ptr == NULL) {
2460 + pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
2463 - "%s -- VG_CREATE: kmalloc error PV at line %d\n",
2464 + "%s -- PV_CREATE: kmalloc error PV at line %d\n",
2465 lvm_name, __LINE__);
2468 - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) {
2470 + memset(pv, 0, sizeof(*pv));
2472 + if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
2473 + P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
2474 + pvp, sizeof(pv_t));
2479 + if ((err = _open_pv(pv))) {
2484 /* We don't need the PE list
2485 in kernel space as with LVs pe_t list (see below) */
2486 - pv_ptr->pe = NULL;
2487 - pv_ptr->pe_allocated = 0;
2488 - pv_ptr->pv_status = PV_ACTIVE;
2490 + pv->pe_allocated = 0;
2491 + pv->pv_status = PV_ACTIVE;
2494 + lvm_fs_create_pv(vg_ptr, pv);
2496 + vg_ptr->pv[p] = pv;
2498 } /* lvm_do_pv_create() */
2502 - * character device support function physical volume create
2503 + * character device support function physical volume remove
2505 static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
2506 - pv_t *pv_ptr = vg_ptr->pv[p];
2507 + pv_t *pv = vg_ptr->pv[p];
2509 - lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr);
2510 - vg_ptr->pe_total -= pv_ptr->pe_total;
2511 + lvm_fs_remove_pv(vg_ptr, pv);
2513 + vg_ptr->pe_total -= pv->pe_total;
2516 -#ifdef LVM_GET_INODE
2517 - lvm_clear_inode(pv_ptr->inode);
2524 vg_ptr->pv[p] = NULL;
2530 +static void __update_hardsectsize(lv_t *lv)
2532 + int max_hardsectsize = 0, hardsectsize = 0;
2535 + /* Check PVs first to see if they all have same sector size */
2536 + for (p = 0; p < lv->vg->pv_cur; p++) {
2537 + pv_t *pv = lv->vg->pv[p];
2538 + if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
2539 + if (max_hardsectsize == 0)
2540 + max_hardsectsize = hardsectsize;
2541 + else if (hardsectsize != max_hardsectsize) {
2542 + P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
2543 + lv->lv_name, p, kdevname(pv->pv_dev),
2544 + hardsectsize, max_hardsectsize);
2550 + /* PVs have different block size, need to check each LE sector size */
2551 + if (hardsectsize != max_hardsectsize) {
2553 + for (le = 0; le < lv->lv_allocated_le; le++) {
2554 + hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
2555 + if (hardsectsize > max_hardsectsize) {
2556 + P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
2558 + kdevname(lv->lv_current_pe[le].dev),
2559 + hardsectsize, max_hardsectsize);
2560 + max_hardsectsize = hardsectsize;
2564 + /* only perform this operation on active snapshots */
2565 + if ((lv->lv_access & LV_SNAPSHOT) &&
2566 + (lv->lv_status & LV_ACTIVE)) {
2568 + for (e = 0; e < lv->lv_remap_end; e++) {
2569 + hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
2570 + if (hardsectsize > max_hardsectsize)
2571 + max_hardsectsize = hardsectsize;
2576 + if (max_hardsectsize == 0)
2577 + max_hardsectsize = SECTOR_SIZE;
2578 + P_DEV("hardblocksize for LV %s is %d\n",
2579 + kdevname(lv->lv_dev), max_hardsectsize);
2580 + lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
2584 * character device support function logical volume create
2586 static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv)
2588 - int e, ret, l, le, l_new, p, size;
2589 + int e, ret, l, le, l_new, p, size, activate = 1;
2590 ulong lv_status_save;
2591 lv_block_exception_t *lvbe = lv->lv_block_exception;
2592 vg_t *vg_ptr = vg[VG_CHR(minor)];
2593 lv_t *lv_ptr = NULL;
2596 - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
2597 - if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK)
2598 + if (!(pep = lv->lv_current_pe))
2601 - for (l = 0; l < vg_ptr->lv_max; l++) {
2602 + if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
2605 + for (l = 0; l < vg_ptr->lv_cur; l++) {
2606 if (vg_ptr->lv[l] != NULL &&
2607 strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2609 @@ -2151,23 +1947,26 @@
2611 lv_status_save = lv_ptr->lv_status;
2612 lv_ptr->lv_status &= ~LV_ACTIVE;
2613 - lv_ptr->lv_snapshot_org = \
2614 - lv_ptr->lv_snapshot_prev = \
2615 + lv_ptr->lv_snapshot_org = NULL;
2616 + lv_ptr->lv_snapshot_prev = NULL;
2617 lv_ptr->lv_snapshot_next = NULL;
2618 lv_ptr->lv_block_exception = NULL;
2619 lv_ptr->lv_iobuf = NULL;
2620 + lv_ptr->lv_COW_table_iobuf = NULL;
2621 lv_ptr->lv_snapshot_hash_table = NULL;
2622 lv_ptr->lv_snapshot_hash_table_size = 0;
2623 lv_ptr->lv_snapshot_hash_mask = 0;
2624 - lv_ptr->lv_COW_table_page = NULL;
2625 - init_MUTEX(&lv_ptr->lv_snapshot_sem);
2626 + init_rwsem(&lv_ptr->lv_lock);
2628 lv_ptr->lv_snapshot_use_rate = 0;
2630 vg_ptr->lv[l] = lv_ptr;
2632 /* get the PE structures from user space if this
2633 - is no snapshot logical volume */
2634 + is not a snapshot logical volume */
2635 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2636 size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2638 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2640 "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2641 @@ -2179,6 +1978,8 @@
2644 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2645 + P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2646 + pep, sizeof(size));
2647 vfree(lv_ptr->lv_current_pe);
2649 vg_ptr->lv[l] = NULL;
2650 @@ -2200,6 +2001,15 @@
2651 vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2652 if (lv_ptr->lv_snapshot_org != NULL) {
2653 size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
2656 + printk(KERN_WARNING
2657 + "%s -- zero length exception table requested\n",
2663 if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
2665 "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2666 @@ -2217,6 +2027,16 @@
2667 vg_ptr->lv[l] = NULL;
2671 + if(lv_ptr->lv_block_exception[0].rsector_org ==
2672 + LVM_SNAPSHOT_DROPPED_SECTOR)
2674 + printk(KERN_WARNING
2675 + "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2680 /* point to the original logical volume */
2681 lv_ptr = lv_ptr->lv_snapshot_org;
2683 @@ -2250,10 +2070,13 @@
2684 lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2685 /* need to fill the COW exception table data
2686 into the page for disk i/o */
2687 - lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr);
2688 + if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) {
2690 + vg_ptr->lv[l] = NULL;
2693 init_waitqueue_head(&lv_ptr->lv_snapshot_wait);
2695 - vfree(lv_ptr->lv_block_exception);
2697 vg_ptr->lv[l] = NULL;
2699 @@ -2274,22 +2097,9 @@
2700 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2702 lv_ptr->lv_status = lv_status_save;
2703 + lv_ptr->vg = vg_ptr;
2706 - char *lv_tmp, *lv_buf = lv->lv_name;
2708 - strtok(lv->lv_name, "/"); /* /dev */
2709 - while((lv_tmp = strtok(NULL, "/")) != NULL)
2712 - lv_devfs_handle[lv->lv_number] = devfs_register(
2713 - vg_devfs_handle[vg_ptr->vg_number], lv_buf,
2714 - DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number,
2715 - S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
2716 - &lvm_blk_dops, NULL);
2719 - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
2720 + __update_hardsectsize(lv_ptr);
2722 /* optionally add our new snapshot LV */
2723 if (lv_ptr->lv_access & LV_SNAPSHOT) {
2724 @@ -2302,19 +2112,24 @@
2725 fsync_dev_lockfs(org->lv_dev);
2728 - down(&org->lv_snapshot_sem);
2729 + down_write(&org->lv_lock);
2730 org->lv_access |= LV_SNAPSHOT_ORG;
2731 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
2734 /* Link in the list of snapshot volumes */
2735 for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
2736 lv_ptr->lv_snapshot_prev = last;
2737 last->lv_snapshot_next = lv_ptr;
2738 - up(&org->lv_snapshot_sem);
2739 + up_write(&org->lv_lock);
2742 /* activate the logical volume */
2743 - lv_ptr->lv_status |= LV_ACTIVE;
2745 + lv_ptr->lv_status |= LV_ACTIVE;
2747 + lv_ptr->lv_status &= ~LV_ACTIVE;
2749 if ( lv_ptr->lv_access & LV_WRITE)
2750 set_device_ro(lv_ptr->lv_dev, 0);
2752 @@ -2322,13 +2137,12 @@
2754 #ifdef LVM_VFS_ENHANCEMENT
2755 /* VFS function call to unlock the filesystem */
2756 - if (lv_ptr->lv_access & LV_SNAPSHOT) {
2757 + if (lv_ptr->lv_access & LV_SNAPSHOT)
2758 unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2762 - lv_ptr->vg = vg_ptr;
2764 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2765 + lvm_fs_create_lv(vg_ptr, lv_ptr);
2767 } /* lvm_do_lv_create() */
2769 @@ -2366,13 +2180,15 @@
2770 lv_ptr->lv_snapshot_next != NULL)
2773 + lvm_fs_remove_lv(vg_ptr, lv_ptr);
2775 if (lv_ptr->lv_access & LV_SNAPSHOT) {
2777 * Atomically make the the snapshot invisible
2778 * to the original lv before playing with it.
2780 lv_t * org = lv_ptr->lv_snapshot_org;
2781 - down(&org->lv_snapshot_sem);
2782 + down_write(&org->lv_lock);
2784 /* remove this snapshot logical volume from the chain */
2785 lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2786 @@ -2380,11 +2196,13 @@
2787 lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2788 lv_ptr->lv_snapshot_prev;
2790 - up(&org->lv_snapshot_sem);
2792 /* no more snapshots? */
2793 - if (!org->lv_snapshot_next)
2794 + if (!org->lv_snapshot_next) {
2795 org->lv_access &= ~LV_SNAPSHOT_ORG;
2797 + up_write(&org->lv_lock);
2799 lvm_snapshot_release(lv_ptr);
2801 /* Update the VG PE(s) used by snapshot reserve space. */
2802 @@ -2404,6 +2222,7 @@
2803 /* reset generic hd */
2804 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2805 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2806 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2807 lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2809 /* reset VG/LV mapping */
2810 @@ -2427,10 +2246,6 @@
2811 vfree(lv_ptr->lv_current_pe);
2814 - devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]);
2816 - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr);
2818 P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2820 vg_ptr->lv[l] = NULL;
2821 @@ -2440,204 +2255,215 @@
2825 - * character device support function logical volume extend / reduce
2826 + * logical volume extend / reduce
2828 -static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv)
2830 - ulong end, l, le, p, size, old_allocated_le;
2831 - vg_t *vg_ptr = vg[VG_CHR(minor)];
2834 +static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2836 + lv_block_exception_t *lvbe;
2838 - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
2839 + if (!new_lv->lv_block_exception)
2842 - for (l = 0; l < vg_ptr->lv_max; l++) {
2843 - if (vg_ptr->lv[l] != NULL &&
2844 - strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2846 + size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2847 + if ((lvbe = vmalloc(size)) == NULL) {
2849 + "%s -- lvm_do_lv_extend_reduce: vmalloc "
2850 + "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2851 + lvm_name, size, __LINE__);
2854 - if (l == vg_ptr->lv_max) return -ENXIO;
2855 - lv_ptr = vg_ptr->lv[l];
2857 - /* check for active snapshot */
2858 - if (lv->lv_access & LV_SNAPSHOT)
2861 - lv_block_exception_t *lvbe, *lvbe_old;
2862 - struct list_head * lvs_hash_table_old;
2864 - if (lv->lv_block_exception == NULL) return -ENXIO;
2865 - size = lv->lv_remap_end * sizeof ( lv_block_exception_t);
2866 - if ((lvbe = vmalloc(size)) == NULL)
2869 - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION "
2870 - "of %lu Byte at line %d\n",
2871 - lvm_name, size, __LINE__);
2874 - if (lv->lv_remap_end > lv_ptr->lv_remap_end)
2876 - if (copy_from_user(lvbe, lv->lv_block_exception, size))
2883 - lvbe_old = lv_ptr->lv_block_exception;
2884 - lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table;
2886 - /* we need to play on the safe side here... */
2887 - down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2888 - if (lv_ptr->lv_block_exception == NULL ||
2889 - lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end)
2891 - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2896 - lv_ptr->lv_block_exception,
2897 - (lv->lv_remap_end > lv_ptr->lv_remap_end ?
2898 - lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t));
2900 - lv_ptr->lv_block_exception = lvbe;
2901 - lv_ptr->lv_remap_end = lv->lv_remap_end;
2902 - if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0)
2904 - lvm_drop_snapshot(lv_ptr, "no memory for hash table");
2905 - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2907 - vfree(lvs_hash_table_old);
2911 - for (e = 0; e < lv_ptr->lv_remap_ptr; e++)
2912 - lvm_hash_link (lv_ptr->lv_block_exception + e,
2913 - lv_ptr->lv_block_exception[e].rdev_org,
2914 - lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2916 - up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2919 - vfree(lvs_hash_table_old);
2920 + if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2921 + (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2925 + new_lv->lv_block_exception = lvbe;
2928 + if (lvm_snapshot_alloc_hash_table(new_lv)) {
2929 + vfree(new_lv->lv_block_exception);
2936 - /* we drop in here in case it is an original logical volume */
2937 - if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) {
2938 +static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2939 + ulong size, l, p, end;
2942 + /* allocate space for new pe structures */
2943 + size = new_lv->lv_current_le * sizeof(pe_t);
2944 + if ((pe = vmalloc(size)) == NULL) {
2946 - "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
2947 - "of %lu Byte at line %d\n",
2948 + "%s -- lvm_do_lv_extend_reduce: "
2949 + "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2950 lvm_name, size, __LINE__);
2954 /* get the PE structures from user space */
2955 - if (copy_from_user(pe, pep, size)) {
2956 + if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2957 + if(old_lv->lv_access & LV_SNAPSHOT)
2958 + vfree(new_lv->lv_snapshot_hash_table);
2963 + new_lv->lv_current_pe = pe;
2965 /* reduce allocation counters on PV(s) */
2966 - for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2967 + for (l = 0; l < old_lv->lv_allocated_le; l++) {
2968 vg_ptr->pe_allocated--;
2969 for (p = 0; p < vg_ptr->pv_cur; p++) {
2970 if (vg_ptr->pv[p]->pv_dev ==
2971 - lv_ptr->lv_current_pe[le].dev) {
2972 + old_lv->lv_current_pe[l].dev) {
2973 vg_ptr->pv[p]->pe_allocated--;
2980 - /* save pointer to "old" lv/pe pointer array */
2981 - pep1 = lv_ptr->lv_current_pe;
2982 - end = lv_ptr->lv_current_le;
2984 - /* save open counter... */
2985 - lv->lv_open = lv_ptr->lv_open;
2986 - lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev;
2987 - lv->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2988 - lv->lv_snapshot_org = lv_ptr->lv_snapshot_org;
2990 - lv->lv_current_pe = pe;
2992 - /* save # of old allocated logical extents */
2993 - old_allocated_le = lv_ptr->lv_allocated_le;
2995 - /* copy preloaded LV */
2996 - memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2998 - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2999 - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
3000 - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
3001 - /* vg_lv_map array doesn't have to be changed here */
3003 - LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
3004 + /* extend the PE count in PVs */
3005 + for (l = 0; l < new_lv->lv_allocated_le; l++) {
3006 + vg_ptr->pe_allocated++;
3007 + for (p = 0; p < vg_ptr->pv_cur; p++) {
3008 + if (vg_ptr->pv[p]->pv_dev ==
3009 + new_lv->lv_current_pe[l].dev) {
3010 + vg_ptr->pv[p]->pe_allocated++;
3016 /* save availiable i/o statistic data */
3017 - /* linear logical volume */
3018 - if (lv_ptr->lv_stripes < 2) {
3019 - /* Check what last LE shall be used */
3020 - if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le;
3021 - for (le = 0; le < end; le++) {
3022 - lv_ptr->lv_current_pe[le].reads += pep1[le].reads;
3023 - lv_ptr->lv_current_pe[le].writes += pep1[le].writes;
3024 + if (old_lv->lv_stripes < 2) { /* linear logical volume */
3025 + end = min(old_lv->lv_current_le, new_lv->lv_current_le);
3026 + for (l = 0; l < end; l++) {
3027 + new_lv->lv_current_pe[l].reads +=
3028 + old_lv->lv_current_pe[l].reads;
3030 + new_lv->lv_current_pe[l].writes +=
3031 + old_lv->lv_current_pe[l].writes;
3033 - /* striped logical volume */
3036 + } else { /* striped logical volume */
3037 uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
3039 - old_stripe_size = old_allocated_le / lv_ptr->lv_stripes;
3040 - new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes;
3041 - end = old_stripe_size;
3042 - if (end > new_stripe_size) end = new_stripe_size;
3043 - for (i = source = dest = 0;
3044 - i < lv_ptr->lv_stripes; i++) {
3045 + old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes;
3046 + new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
3047 + end = min(old_stripe_size, new_stripe_size);
3049 + for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
3050 for (j = 0; j < end; j++) {
3051 - lv_ptr->lv_current_pe[dest + j].reads +=
3052 - pep1[source + j].reads;
3053 - lv_ptr->lv_current_pe[dest + j].writes +=
3054 - pep1[source + j].writes;
3055 + new_lv->lv_current_pe[dest + j].reads +=
3056 + old_lv->lv_current_pe[source + j].reads;
3057 + new_lv->lv_current_pe[dest + j].writes +=
3058 + old_lv->lv_current_pe[source + j].writes;
3060 source += old_stripe_size;
3061 dest += new_stripe_size;
3065 - /* extend the PE count in PVs */
3066 - for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
3067 - vg_ptr->pe_allocated++;
3068 - for (p = 0; p < vg_ptr->pv_cur; p++) {
3069 - if (vg_ptr->pv[p]->pv_dev ==
3070 - lv_ptr->lv_current_pe[le].dev) {
3071 - vg_ptr->pv[p]->pe_allocated++;
3079 +static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv)
3083 + vg_t *vg_ptr = vg[VG_CHR(minor)];
3089 + if ((pe = new_lv->lv_current_pe) == NULL)
3092 - if (lv->lv_access & LV_SNAPSHOT_ORG)
3094 - /* Correct the snapshot size information */
3095 - while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL)
3097 - lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
3098 - lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
3099 - lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
3100 - lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
3101 - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
3102 - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
3103 + for (l = 0; l < vg_ptr->lv_max; l++)
3104 + if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
3107 + if (l == vg_ptr->lv_max)
3110 + old_lv = vg_ptr->lv[l];
3112 + if (old_lv->lv_access & LV_SNAPSHOT) {
3113 + /* only perform this operation on active snapshots */
3114 + if (old_lv->lv_status & LV_ACTIVE)
3115 + r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
3120 + r = __extend_reduce(vg_ptr, old_lv, new_lv);
3125 + /* copy relevent fields */
3126 + down_write(&old_lv->lv_lock);
3128 + if(new_lv->lv_access & LV_SNAPSHOT) {
3129 + size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
3130 + old_lv->lv_remap_ptr : new_lv->lv_remap_end;
3131 + size *= sizeof(lv_block_exception_t);
3132 + memcpy(new_lv->lv_block_exception,
3133 + old_lv->lv_block_exception, size);
3135 + old_lv->lv_remap_end = new_lv->lv_remap_end;
3136 + old_lv->lv_block_exception = new_lv->lv_block_exception;
3137 + old_lv->lv_snapshot_hash_table =
3138 + new_lv->lv_snapshot_hash_table;
3139 + old_lv->lv_snapshot_hash_table_size =
3140 + new_lv->lv_snapshot_hash_table_size;
3141 + old_lv->lv_snapshot_hash_mask =
3142 + new_lv->lv_snapshot_hash_mask;
3144 + for (e = 0; e < new_lv->lv_remap_ptr; e++)
3145 + lvm_hash_link(new_lv->lv_block_exception + e,
3146 + new_lv->lv_block_exception[e].rdev_org,
3147 + new_lv->lv_block_exception[e].rsector_org,
3152 + vfree(old_lv->lv_current_pe);
3153 + vfree(old_lv->lv_snapshot_hash_table);
3155 + old_lv->lv_size = new_lv->lv_size;
3156 + old_lv->lv_allocated_le = new_lv->lv_allocated_le;
3157 + old_lv->lv_current_le = new_lv->lv_current_le;
3158 + old_lv->lv_current_pe = new_lv->lv_current_pe;
3159 + lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
3161 + lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
3163 + if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
3165 + for(snap = old_lv->lv_snapshot_next; snap;
3166 + snap = snap->lv_snapshot_next) {
3167 + down_write(&snap->lv_lock);
3168 + snap->lv_current_pe = old_lv->lv_current_pe;
3169 + snap->lv_allocated_le =
3170 + old_lv->lv_allocated_le;
3171 + snap->lv_current_le = old_lv->lv_current_le;
3172 + snap->lv_size = old_lv->lv_size;
3174 + lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects
3175 + = old_lv->lv_size;
3176 + lvm_size[MINOR(snap->lv_dev)] =
3177 + old_lv->lv_size >> 1;
3178 + __update_hardsectsize(snap);
3179 + up_write(&snap->lv_lock);
3184 + __update_hardsectsize(old_lv);
3185 + up_write(&old_lv->lv_lock);
3188 } /* lvm_do_lv_extend_reduce() */
3190 @@ -2648,10 +2474,10 @@
3191 static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg)
3197 lv_status_byname_req_t lv_status_byname_req;
3202 if (vg_ptr == NULL) return -ENXIO;
3203 if (copy_from_user(&lv_status_byname_req, arg,
3204 @@ -2659,28 +2485,30 @@
3207 if (lv_status_byname_req.lv == NULL) return -EINVAL;
3208 - if (copy_from_user(&lv, lv_status_byname_req.lv,
3209 - sizeof(lv_t)) != 0)
3212 for (l = 0; l < vg_ptr->lv_max; l++) {
3213 - lv_ptr = vg_ptr->lv[l];
3214 - if (lv_ptr != NULL &&
3215 + if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
3216 strcmp(lv_ptr->lv_name,
3217 - lv_status_byname_req.lv_name) == 0) {
3218 - if (copy_to_user(lv_status_byname_req.lv,
3219 + lv_status_byname_req.lv_name) == 0) {
3220 + /* Save usermode pointers */
3221 + if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
3223 + if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
3225 + if (copy_to_user(lv_status_byname_req.lv,
3230 - if (lv.lv_current_pe != NULL) {
3231 - size = lv_ptr->lv_allocated_le *
3233 - if (copy_to_user(lv.lv_current_pe,
3234 + if (saved_ptr1 != NULL) {
3235 + if (copy_to_user(saved_ptr1,
3236 lv_ptr->lv_current_pe,
3238 + lv_ptr->lv_allocated_le *
3239 + sizeof(pe_t)) != 0)
3242 + /* Restore usermode pointers */
3243 + if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
3248 @@ -2693,34 +2521,41 @@
3250 static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg)
3255 lv_status_byindex_req_t lv_status_byindex_req;
3260 if (vg_ptr == NULL) return -ENXIO;
3261 if (copy_from_user(&lv_status_byindex_req, arg,
3262 sizeof(lv_status_byindex_req)) != 0)
3265 - if ((lvp = lv_status_byindex_req.lv) == NULL)
3266 + if (lv_status_byindex_req.lv == NULL)
3268 if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
3271 - if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0)
3273 + /* Save usermode pointers */
3274 + if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
3276 + if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
3279 - if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0)
3280 + if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
3283 - if (lv.lv_current_pe != NULL) {
3284 - size = lv_ptr->lv_allocated_le * sizeof(pe_t);
3285 - if (copy_to_user(lv.lv_current_pe,
3286 - lv_ptr->lv_current_pe,
3288 + if (saved_ptr1 != NULL) {
3289 + if (copy_to_user(saved_ptr1,
3290 + lv_ptr->lv_current_pe,
3291 + lv_ptr->lv_allocated_le *
3292 + sizeof(pe_t)) != 0)
3296 + /* Restore usermode pointers */
3297 + if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
3301 } /* lvm_do_lv_status_byindex() */
3303 @@ -2731,6 +2566,9 @@
3304 static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) {
3306 lv_status_bydev_req_t lv_status_bydev_req;
3311 if (vg_ptr == NULL) return -ENXIO;
3312 if (copy_from_user(&lv_status_bydev_req, arg,
3313 @@ -2743,10 +2581,26 @@
3316 if ( l == vg_ptr->lv_max) return -ENXIO;
3317 + lv_ptr = vg_ptr->lv[l];
3319 - if (copy_to_user(lv_status_bydev_req.lv,
3320 - vg_ptr->lv[l], sizeof(lv_t)) != 0)
3321 + /* Save usermode pointers */
3322 + if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
3324 + if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
3327 + if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
3329 + if (saved_ptr1 != NULL) {
3330 + if (copy_to_user(saved_ptr1,
3331 + lv_ptr->lv_current_pe,
3332 + lv_ptr->lv_allocated_le *
3333 + sizeof(pe_t)) != 0)
3336 + /* Restore usermode pointers */
3337 + if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
3341 } /* lvm_do_lv_status_bydev() */
3342 @@ -2766,11 +2620,9 @@
3343 if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3344 if (lv_ptr->lv_dev == lv->lv_dev)
3346 - lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr);
3347 - strncpy(lv_ptr->lv_name,
3350 - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
3351 + lvm_fs_remove_lv(vg_ptr, lv_ptr);
3352 + strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
3353 + lvm_fs_create_lv(vg_ptr, lv_ptr);
3357 @@ -2787,9 +2639,7 @@
3361 -#ifdef LVM_GET_INODE
3362 - struct inode *inode_sav;
3364 + struct block_device *bd;
3366 if (vg_ptr == NULL) return -ENXIO;
3367 if (copy_from_user(&pv_change_req, arg,
3368 @@ -2801,20 +2651,17 @@
3369 if (pv_ptr != NULL &&
3370 strcmp(pv_ptr->pv_name,
3371 pv_change_req.pv_name) == 0) {
3372 -#ifdef LVM_GET_INODE
3373 - inode_sav = pv_ptr->inode;
3377 if (copy_from_user(pv_ptr,
3383 /* We don't need the PE list
3384 in kernel space as with LVs pe_t list */
3386 -#ifdef LVM_GET_INODE
3387 - pv_ptr->inode = inode_sav;
3392 @@ -2850,160 +2697,27 @@
3393 } /* lvm_do_pv_status() */
3398 - * create a devfs entry for a volume group
3399 + * character device support function flush and invalidate all buffers of a PV
3401 -void lvm_do_create_devfs_entry_of_vg ( vg_t *vg_ptr) {
3402 - vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL);
3403 - ch_devfs_handle[vg_ptr->vg_number] = devfs_register(
3404 - vg_devfs_handle[vg_ptr->vg_number] , "group",
3405 - DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number,
3406 - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
3407 - &lvm_chr_fops, NULL);
3412 - * create a /proc entry for a logical volume
3414 -void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) {
3417 - if ( vg_ptr->lv_subdir_pde != NULL) {
3418 - basename = strrchr(lv_ptr->lv_name, '/');
3419 - if (basename == NULL) basename = lv_ptr->lv_name;
3421 - pde = create_proc_entry(basename, S_IFREG,
3422 - vg_ptr->lv_subdir_pde);
3423 - if ( pde != NULL) {
3424 - pde->read_proc = lvm_proc_read_lv_info;
3425 - pde->data = lv_ptr;
3432 - * remove a /proc entry for a logical volume
3434 -void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) {
3437 - if ( vg_ptr->lv_subdir_pde != NULL) {
3438 - basename = strrchr(lv_ptr->lv_name, '/');
3439 - if (basename == NULL) basename = lv_ptr->lv_name;
3441 - remove_proc_entry(basename, vg_ptr->lv_subdir_pde);
3447 - * create a /proc entry for a physical volume
3449 -void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) {
3452 - char buffer[NAME_LEN];
3454 - basename = pv_ptr->pv_name;
3455 - if (strncmp(basename, "/dev/", 5) == 0) offset = 5;
3456 - strncpy(buffer, basename + offset, sizeof(buffer));
3457 - basename = buffer;
3458 - while ( ( basename = strchr ( basename, '/')) != NULL) *basename = '_';
3459 - pde = create_proc_entry(buffer, S_IFREG, vg_ptr->pv_subdir_pde);
3460 - if ( pde != NULL) {
3461 - pde->read_proc = lvm_proc_read_pv_info;
3462 - pde->data = pv_ptr;
3468 - * remove a /proc entry for a physical volume
3470 -void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) {
3473 - basename = strrchr(pv_ptr->pv_name, '/');
3474 - if ( vg_ptr->pv_subdir_pde != NULL) {
3475 - basename = strrchr(pv_ptr->pv_name, '/');
3476 - if (basename == NULL) basename = pv_ptr->pv_name;
3478 - remove_proc_entry(basename, vg_ptr->pv_subdir_pde);
3484 - * create a /proc entry for a volume group
3486 -void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) {
3490 +static int lvm_do_pv_flush(void *arg)
3492 + pv_flush_req_t pv_flush_req;
3494 - pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR,
3495 - lvm_proc_vg_subdir);
3496 - if ( pde != NULL) {
3497 - vg_ptr->vg_dir_pde = pde;
3498 - pde = create_proc_entry("group", S_IFREG,
3499 - vg_ptr->vg_dir_pde);
3500 - if ( pde != NULL) {
3501 - pde->read_proc = lvm_proc_read_vg_info;
3502 - pde->data = vg_ptr;
3504 - pde = create_proc_entry(LVM_LV_SUBDIR, S_IFDIR,
3505 - vg_ptr->vg_dir_pde);
3506 - if ( pde != NULL) {
3507 - vg_ptr->lv_subdir_pde = pde;
3508 - for ( l = 0; l < vg_ptr->lv_max; l++) {
3509 - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3510 - lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
3513 - pde = create_proc_entry(LVM_PV_SUBDIR, S_IFDIR,
3514 - vg_ptr->vg_dir_pde);
3515 - if ( pde != NULL) {
3516 - vg_ptr->pv_subdir_pde = pde;
3517 - for ( p = 0; p < vg_ptr->pv_max; p++) {
3518 - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue;
3519 - lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr);
3524 + if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
3528 - * remove a /proc entry for a volume group
3530 -void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) {
3534 + fsync_dev(pv_flush_req.pv_dev);
3535 + invalidate_buffers(pv_flush_req.pv_dev);
3537 - for ( l = 0; l < vg_ptr->lv_max; l++) {
3538 - if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3539 - lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]);
3541 - for ( p = 0; p < vg_ptr->pv_max; p++) {
3542 - if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue;
3543 - lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]);
3545 - if ( vg_ptr->vg_dir_pde != NULL) {
3546 - remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
3547 - remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
3548 - remove_proc_entry("group", vg_ptr->vg_dir_pde);
3549 - remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
3556 * support function initialize gendisk variables
3558 -void __init lvm_geninit(struct gendisk *lvm_gdisk)
3559 +static void __init lvm_geninit(struct gendisk *lvm_gdisk)
3563 @@ -3019,36 +2733,94 @@
3565 blk_size[MAJOR_NR] = lvm_size;
3566 blksize_size[MAJOR_NR] = lvm_blocksizes;
3567 - hardsect_size[MAJOR_NR] = lvm_blocksizes;
3568 + hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
3571 } /* lvm_gen_init() */
3575 +/* Must have down_write(_pe_lock) when we enqueue buffers */
3576 +static void _queue_io(struct buffer_head *bh, int rw) {
3577 + if (bh->b_reqnext) BUG();
3578 + bh->b_reqnext = _pe_requests;
3579 + _pe_requests = bh;
3582 +/* Must have down_write(_pe_lock) when we dequeue buffers */
3583 +static struct buffer_head *_dequeue_io(void)
3585 + struct buffer_head *bh = _pe_requests;
3586 + _pe_requests = NULL;
3591 - * return a pointer to a '-' padded uuid
3592 + * We do not need to hold _pe_lock to flush buffers. bh should be taken from
3593 + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
3594 + * NULL and we drop _pe_lock. Any new buffers defered at this time will be
3595 + * added to a new list, and the old buffers can have their I/O restarted
3598 + * If, for some reason, the same PE is locked again before all of these writes
3599 + * have finished, then these buffers will just be re-queued (i.e. no danger).
3601 -static char *lvm_show_uuid ( char *uuidstr) {
3603 - static char uuid[NAME_LEN] = { 0, };
3604 +static void _flush_io(struct buffer_head *bh)
3607 + struct buffer_head *next = bh->b_reqnext;
3608 + bh->b_reqnext = NULL;
3609 + /* resubmit this buffer head */
3610 + generic_make_request(WRITE, bh);
3615 - memset ( uuid, 0, NAME_LEN);
3618 - memcpy ( uuid, uuidstr, i);
3621 + * we must open the pv's before we use them
3623 +static int _open_pv(pv_t *pv) {
3625 + struct block_device *bd;
3627 + if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
3630 - for ( j = 0; j < 6; j++) {
3632 - memcpy ( &uuid[i], uuidstr, 4);
3635 + err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
3641 - memcpy ( &uuid[i], uuidstr, 2 );
3646 +static void _close_pv(pv_t *pv) {
3647 + if(!pv || !pv->bd)
3651 + blkdev_put(pv->bd, BDEV_FILE);
3657 +static unsigned long _sectors_to_k(unsigned long sect)
3659 + if(SECTOR_SIZE > 1024) {
3660 + return sect * (SECTOR_SIZE / 1024);
3663 + return sect / (1024 / SECTOR_SIZE);
3666 +MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
3667 +MODULE_DESCRIPTION("Logical Volume Manager");
3668 +#ifdef MODULE_LICENSE
3669 +MODULE_LICENSE("GPL");
3672 module_init(lvm_init);
3673 module_exit(lvm_cleanup);
3674 --- linux/drivers/md/lvm-internal.h.orig Tue Nov 13 08:46:52 2001
3675 +++ linux/drivers/md/lvm-internal.h Tue Nov 13 08:46:52 2001
3679 + * kernel/lvm_internal.h
3681 + * Copyright (C) 2001 Sistina Software
3684 + * LVM driver is free software; you can redistribute it and/or modify
3685 + * it under the terms of the GNU General Public License as published by
3686 + * the Free Software Foundation; either version 2, or (at your option)
3687 + * any later version.
3689 + * LVM driver is distributed in the hope that it will be useful,
3690 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3691 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3692 + * GNU General Public License for more details.
3694 + * You should have received a copy of the GNU General Public License
3695 + * along with GNU CC; see the file COPYING. If not, write to
3696 + * the Free Software Foundation, 59 Temple Place - Suite 330,
3697 + * Boston, MA 02111-1307, USA.
3704 + * 05/01/2001 - Factored this file out of lvm.c (Joe Thornber)
3705 + * 11/01/2001 - Renamed lvm_internal and added declarations
3706 + * for lvm_fs.c stuff
3710 +#ifndef LVM_INTERNAL_H
3711 +#define LVM_INTERNAL_H
3713 +#include <linux/lvm.h>
3715 +#define _LVM_INTERNAL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
3717 +/* global variables, defined in lvm.c */
3718 +extern char *lvm_version;
3719 +extern ushort lvm_iop_version;
3720 +extern int loadtime;
3721 +extern const char *const lvm_name;
3724 +extern uint vg_count;
3726 +extern struct file_operations lvm_chr_fops;
3728 +extern struct block_device_operations lvm_blk_dops;
3730 +#define lvm_sectsize(dev) get_hardsect_size(dev)
3732 +/* 2.4.8 had no global min/max macros, and 2.4.9's were flawed */
3736 +#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args)
3738 +#define P_IOCTL(fmt, args...)
3742 +#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args)
3744 +#define P_MAP(fmt, args...)
3748 +#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args)
3750 +#define P_KFREE(fmt, args...)
3753 +#ifdef DEBUG_DEVICE
3754 +#define P_DEV(fmt, args...) printk(KERN_DEBUG "lvm device: " fmt, ## args)
3756 +#define P_DEV(fmt, args...)
3761 +int lvm_get_blksize(kdev_t);
3762 +int lvm_snapshot_alloc(lv_t *);
3763 +int lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
3764 +int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, vg_t *vg, lv_t *);
3765 +int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
3766 +void lvm_snapshot_release(lv_t *);
3767 +int lvm_write_COW_table_block(vg_t *, lv_t *);
3768 +void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *);
3769 +int lvm_snapshot_alloc_hash_table(lv_t *);
3770 +void lvm_drop_snapshot(vg_t *vg, lv_t *, const char *);
3774 +void lvm_init_fs(void);
3775 +void lvm_fin_fs(void);
3777 +void lvm_fs_create_vg(vg_t *vg_ptr);
3778 +void lvm_fs_remove_vg(vg_t *vg_ptr);
3779 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
3780 +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv);
3781 +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv);
3782 +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv);
3785 --- linux/drivers/md/lvm-snap.c.orig Mon Sep 10 17:00:55 2001
3786 +++ linux/drivers/md/lvm-snap.c Tue Nov 13 09:46:52 2001
3790 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
3791 - * Heinz Mauelshagen, Sistina Software (persistent snapshots)
3792 + * 2000 - 2001 Heinz Mauelshagen, Sistina Software
3794 * LVM snapshot driver is free software; you can redistribute it and/or modify
3795 * it under the terms of the GNU General Public License as published by
3796 * the Free Software Foundation; either version 2, or (at your option)
3797 * any later version.
3800 * LVM snapshot driver is distributed in the hope that it will be useful,
3801 * but WITHOUT ANY WARRANTY; without even the implied warranty of
3802 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
3803 * GNU General Public License for more details.
3806 * You should have received a copy of the GNU General Public License
3807 * along with GNU CC; see the file COPYING. If not, write to
3808 * the Free Software Foundation, 59 Temple Place - Suite 330,
3809 - * Boston, MA 02111-1307, USA.
3810 + * Boston, MA 02111-1307, USA.
3816 * 05/07/2000 - implemented persistent snapshot support
3817 * 23/11/2000 - used cpu_to_le64 rather than my own macro
3818 + * 25/01/2001 - Put LockPage back in
3819 + * 01/02/2001 - A dropped snapshot is now set as inactive
3820 + * 14/02/2001 - tidied debug statements
3821 + * 19/02/2001 - changed rawio calls to pass in preallocated buffer_heads
3822 + * 26/02/2001 - introduced __brw_kiovec to remove a lot of conditional
3824 + * 07/03/2001 - fixed COW exception table not persistent on 2.2 (HM)
3825 + * 12/03/2001 - lvm_pv_get_number changes:
3826 + * o made it static
3827 + * o renamed it to _pv_get_number
3828 + * o pv number is returned in new uint * arg
3829 + * o -1 returned on error
3830 + * lvm_snapshot_fill_COW_table has a return value too.
3831 + * 15/10/2001 - fix snapshot alignment problem [CM]
3832 + * - fix snapshot full oops (always check lv_block_exception) [CM]
3837 #include <linux/types.h>
3838 #include <linux/iobuf.h>
3839 #include <linux/lvm.h>
3840 +#include <linux/devfs_fs_kernel.h>
3843 +#include "lvm-internal.h"
3845 -#include "lvm-snap.h"
3846 +static char *lvm_snap_version __attribute__ ((unused)) = "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n";
3848 -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9.1_beta2 snapshot code (18/01/2001)\n";
3850 extern const char *const lvm_name;
3851 extern int lvm_blocksizes[];
3853 void lvm_snapshot_release(lv_t *);
3855 -uint lvm_pv_get_number(vg_t * vg, kdev_t rdev)
3856 +static int _write_COW_table_block(vg_t *vg, lv_t *lv, int idx,
3857 + const char **reason);
3858 +static void _disable_snapshot(vg_t *vg, lv_t *lv);
3861 +static inline int __brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
3862 + kdev_t dev, unsigned long b[], int size,
3864 + return brw_kiovec(rw, nr, iovec, dev, b, size);
3868 +static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn)
3871 + for (p = 0; p < vg->pv_max; p++) {
3872 + if (vg->pv[p] == NULL)
3875 - for ( p = 0; p < vg->pv_max; p++)
3877 - if ( vg->pv[p] == NULL) continue;
3878 - if ( vg->pv[p]->pv_dev == rdev) break;
3879 + if (vg->pv[p]->pv_dev == rdev)
3883 - return vg->pv[p]->pv_number;
3884 + if (p >= vg->pv_max) {
3885 + /* bad news, the snapshot COW table is probably corrupt */
3887 + "%s -- _pv_get_number failed for rdev = %u\n",
3892 + *pvn = vg->pv[p]->pv_number;
3897 @@ -105,10 +144,20 @@
3898 unsigned long mask = lv->lv_snapshot_hash_mask;
3899 int chunk_size = lv->lv_chunk_size;
3903 hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
3904 list_add(&exception->hash, hash_table);
3908 + * Determine if we already have a snapshot chunk for this block.
3909 + * Return: 1 if it the chunk already exists
3910 + * 0 if we need to COW this block and allocate a new chunk
3911 + * -1 if the snapshot was disabled because it ran out of space
3913 + * We need to be holding at least a read lock on lv->lv_lock.
3915 int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
3916 unsigned long pe_start, lv_t * lv)
3919 int chunk_size = lv->lv_chunk_size;
3920 lv_block_exception_t * exception;
3922 + if (!lv->lv_block_exception)
3925 pe_off = pe_start % chunk_size;
3926 pe_adjustment = (*org_sector-pe_off) % chunk_size;
3927 __org_start = *org_sector - pe_adjustment;
3932 -void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
3933 +void lvm_drop_snapshot(vg_t *vg, lv_t *lv_snap, const char *reason)
3938 or error on this snapshot --> release it */
3939 invalidate_buffers(lv_snap->lv_dev);
3941 + /* wipe the snapshot since it's inconsistent now */
3942 + _disable_snapshot(vg, lv_snap);
3944 for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
3945 if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
3946 last_dev = lv_snap->lv_block_exception[i].rdev_new;
3947 @@ -150,26 +205,33 @@
3950 lvm_snapshot_release(lv_snap);
3951 + lv_snap->lv_status &= ~LV_ACTIVE;
3954 - "%s -- giving up to snapshot %s on %s due %s\n",
3955 + "%s -- giving up to snapshot %s on %s: %s\n",
3956 lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
3960 -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
3961 +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks,
3962 unsigned long start,
3966 int i, sectors_per_block, nr_blocks;
3968 - sectors_per_block = blocksize >> 9;
3969 + sectors_per_block = blocksize / SECTOR_SIZE;
3971 + if (start & (sectors_per_block - 1))
3974 nr_blocks = nr_sectors / sectors_per_block;
3975 start /= sectors_per_block;
3977 for (i = 0; i < nr_blocks; i++)
3978 blocks[i] = start++;
3983 inline int lvm_get_blksize(kdev_t dev)
3984 @@ -209,128 +271,61 @@
3988 -void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
3989 +int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
3991 - int id = 0, is = lv_snap->lv_remap_ptr;
3992 - ulong blksize_snap;
3993 - lv_COW_table_disk_t * lv_COW_table =
3994 - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
3995 + int id = 0, is = lv_snap->lv_remap_ptr;
3996 + ulong blksize_snap;
3997 + lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *)
3998 + page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
4003 - if (is == 0) return;
4005 - blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
4006 - is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
4008 + lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
4009 + is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
4011 memset(lv_COW_table, 0, blksize_snap);
4012 for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
4013 /* store new COW_table entry */
4014 - lv_COW_table[id].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org));
4015 - lv_COW_table[id].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_org);
4016 - lv_COW_table[id].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new));
4017 - lv_COW_table[id].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_new);
4020 + lv_block_exception_t *be = lv_snap->lv_block_exception + is;
4023 + if (_pv_get_number(vg, be->rdev_org, &pvn))
4027 - * writes a COW exception table sector to disk (HM)
4031 -int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
4035 - int idx = lv_snap->lv_remap_ptr, idx_COW_table;
4038 - ulong snap_pe_start, COW_table_sector_offset,
4039 - COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
4040 - const char * reason;
4041 - kdev_t snap_phys_dev;
4042 - struct kiobuf * iobuf = lv_snap->lv_iobuf;
4043 - struct page * page_tmp;
4044 - lv_COW_table_disk_t * lv_COW_table =
4045 - ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
4046 + lv_COW_table[id].pv_org_number = cpu_to_le64(pvn);
4047 + lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org);
4050 + if (_pv_get_number(vg, be->rdev_new, &pvn))
4053 - COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
4054 - COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
4056 - /* get physical addresse of destination chunk */
4057 - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4058 - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4060 - blksize_snap = lvm_get_blksize(snap_phys_dev);
4062 - COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
4063 - idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
4065 - if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
4067 - /* sector offset into the on disk COW table */
4068 - COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
4070 - /* COW table block to write next */
4071 - iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
4073 - /* store new COW_table entry */
4074 - lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
4075 - lv_COW_table[idx_COW_table].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_org);
4076 - lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, snap_phys_dev));
4077 - lv_COW_table[idx_COW_table].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_new);
4079 - length_tmp = iobuf->length;
4080 - iobuf->length = blksize_snap;
4081 - page_tmp = iobuf->maplist[0];
4082 - iobuf->maplist[0] = lv_snap->lv_COW_table_page;
4083 - nr_pages_tmp = iobuf->nr_pages;
4084 - iobuf->nr_pages = 1;
4086 - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4087 - iobuf->blocks, blksize_snap) != blksize_snap)
4088 - goto fail_raw_write;
4091 - /* initialization of next COW exception table block with zeroes */
4092 - end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
4093 - if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
4095 - /* don't go beyond the end */
4096 - if (idx + 1 >= lv_snap->lv_remap_end) goto good_out;
4098 - memset(lv_COW_table, 0, blksize_snap);
4103 - snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4104 - snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4105 - blksize_snap = lvm_get_blksize(snap_phys_dev);
4106 - iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10);
4107 - } else iobuf->blocks[0]++;
4109 - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4110 - iobuf->blocks, blksize_snap) != blksize_snap)
4111 - goto fail_raw_write;
4112 + lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn);
4113 + lv_COW_table[id].pv_snap_rsector = cpu_to_le64(be->rsector_new);
4118 - iobuf->length = length_tmp;
4119 - iobuf->maplist[0] = page_tmp;
4120 - iobuf->nr_pages = nr_pages_tmp;
4125 - lvm_drop_snapshot(lv_snap, reason);
4128 + printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name);
4133 - reason = "write error";
4137 + * writes a COW exception table sector to disk (HM)
4139 + * We need to hold a write lock on lv_snap->lv_lock.
4141 +int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
4145 + if((r = _write_COW_table_block(vg, lv_snap,
4146 + lv_snap->lv_remap_ptr - 1, &err)))
4147 + lvm_drop_snapshot(vg, lv_snap, err);
4152 @@ -340,17 +335,23 @@
4153 * if there is no exception storage space free any longer --> release snapshot.
4155 * this routine gets called for each _first_ write to a physical chunk.
4157 + * We need to hold a write lock on lv_snap->lv_lock. It is assumed that
4158 + * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block())
4159 + * when this function is called.
4161 int lvm_snapshot_COW(kdev_t org_phys_dev,
4162 unsigned long org_phys_sector,
4163 unsigned long org_pe_start,
4164 unsigned long org_virt_sector,
4166 + vg_t *vg, lv_t* lv_snap)
4168 const char * reason;
4169 unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
4170 + unsigned long phys_start;
4171 int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
4172 struct kiobuf * iobuf;
4173 + unsigned long blocks[KIO_MAX_SECTORS];
4174 int blksize_snap, blksize_org, min_blksize, max_blksize;
4175 int max_sectors, nr_sectors;
4177 @@ -370,13 +371,11 @@
4178 #ifdef DEBUG_SNAPSHOT
4181 - "org %02d:%02d faulting %lu start %lu, "
4182 - "snap %02d:%02d start %lu, "
4183 + "org %s faulting %lu start %lu, snap %s start %lu, "
4184 "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
4186 - MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
4188 - MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
4189 + kdevname(org_phys_dev), org_phys_sector, org_start,
4190 + kdevname(snap_phys_dev), snap_start,
4192 org_pe_start, pe_off,
4196 iobuf = lv_snap->lv_iobuf;
4198 - blksize_org = lvm_get_blksize(org_phys_dev);
4199 - blksize_snap = lvm_get_blksize(snap_phys_dev);
4200 + blksize_org = lvm_sectsize(org_phys_dev);
4201 + blksize_snap = lvm_sectsize(snap_phys_dev);
4202 max_blksize = max(blksize_org, blksize_snap);
4203 min_blksize = min(blksize_org, blksize_snap);
4204 max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
4206 if (chunk_size % (max_blksize>>9))
4209 + /* Don't change org_start, we need it to fill in the exception table */
4210 + phys_start = org_start;
4214 nr_sectors = min(chunk_size, max_sectors);
4215 @@ -400,17 +402,24 @@
4217 iobuf->length = nr_sectors << 9;
4219 - lvm_snapshot_prepare_blocks(iobuf->blocks, org_start,
4220 - nr_sectors, blksize_org);
4221 - if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
4222 - iobuf->blocks, blksize_org) != (nr_sectors<<9))
4223 + if (!lvm_snapshot_prepare_blocks(blocks, phys_start,
4224 + nr_sectors, blksize_org))
4225 + goto fail_prepare;
4227 + if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks,
4228 + blksize_org, lv_snap) != (nr_sectors<<9))
4231 - lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start,
4232 - nr_sectors, blksize_snap);
4233 - if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4234 - iobuf->blocks, blksize_snap) != (nr_sectors<<9))
4235 + if (!lvm_snapshot_prepare_blocks(blocks, snap_start,
4236 + nr_sectors, blksize_snap))
4237 + goto fail_prepare;
4239 + if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
4240 + blksize_snap, lv_snap) != (nr_sectors<<9))
4241 goto fail_raw_write;
4243 + phys_start += nr_sectors;
4244 + snap_start += nr_sectors;
4247 #ifdef DEBUG_SNAPSHOT
4248 @@ -434,52 +443,57 @@
4253 - lvm_drop_snapshot(lv_snap, reason);
4255 + lvm_drop_snapshot(vg, lv_snap, reason);
4258 - fail_out_of_space:
4260 reason = "out of space";
4264 reason = "read error";
4268 reason = "write error";
4272 reason = "blocksize error";
4276 + reason = "couldn't prepare kiovec blocks "
4277 + "(start probably isn't block aligned)";
4281 int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
4283 int bytes, nr_pages, err, i;
4285 - bytes = sectors << 9;
4286 + bytes = sectors * SECTOR_SIZE;
4287 nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
4288 err = expand_kiobuf(iobuf, nr_pages);
4291 + if (err) goto out;
4294 - iobuf->locked = 0;
4295 + iobuf->locked = 1;
4296 iobuf->nr_pages = 0;
4297 for (i = 0; i < nr_pages; i++)
4301 page = alloc_page(GFP_KERNEL);
4304 + if (!page) goto out;
4306 iobuf->maplist[i] = page;
4319 @@ -521,40 +535,46 @@
4321 INIT_LIST_HEAD(hash+buckets);
4328 int lvm_snapshot_alloc(lv_t * lv_snap)
4330 - int err, blocksize, max_sectors;
4331 + int ret, max_sectors;
4333 - err = alloc_kiovec(1, &lv_snap->lv_iobuf);
4336 + /* allocate kiovec to do chunk io */
4337 + ret = alloc_kiovec(1, &lv_snap->lv_iobuf);
4338 + if (ret) goto out;
4340 - blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
4341 max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
4343 - err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
4345 - goto out_free_kiovec;
4346 + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
4347 + if (ret) goto out_free_kiovec;
4349 - err = lvm_snapshot_alloc_hash_table(lv_snap);
4351 - goto out_free_kiovec;
4352 + /* allocate kiovec to do exception table io */
4353 + ret = alloc_kiovec(1, &lv_snap->lv_COW_table_iobuf);
4354 + if (ret) goto out_free_kiovec;
4356 + ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_COW_table_iobuf,
4357 + PAGE_SIZE/SECTOR_SIZE);
4358 + if (ret) goto out_free_both_kiovecs;
4360 - lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL);
4361 - if (!lv_snap->lv_COW_table_page)
4362 - goto out_free_kiovec;
4363 + ret = lvm_snapshot_alloc_hash_table(lv_snap);
4364 + if (ret) goto out_free_both_kiovecs;
4371 +out_free_both_kiovecs:
4372 + unmap_kiobuf(lv_snap->lv_COW_table_iobuf);
4373 + free_kiovec(1, &lv_snap->lv_COW_table_iobuf);
4374 + lv_snap->lv_COW_table_iobuf = NULL;
4378 unmap_kiobuf(lv_snap->lv_iobuf);
4379 free_kiovec(1, &lv_snap->lv_iobuf);
4380 + lv_snap->lv_iobuf = NULL;
4381 vfree(lv_snap->lv_snapshot_hash_table);
4382 lv_snap->lv_snapshot_hash_table = NULL;
4384 @@ -580,9 +600,125 @@
4385 free_kiovec(1, &lv->lv_iobuf);
4386 lv->lv_iobuf = NULL;
4388 - if (lv->lv_COW_table_page)
4389 + if (lv->lv_COW_table_iobuf)
4391 - free_page((ulong)lv->lv_COW_table_page);
4392 - lv->lv_COW_table_page = NULL;
4393 + kiobuf_wait_for_io(lv->lv_COW_table_iobuf);
4394 + unmap_kiobuf(lv->lv_COW_table_iobuf);
4395 + free_kiovec(1, &lv->lv_COW_table_iobuf);
4396 + lv->lv_COW_table_iobuf = NULL;
4401 +static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
4402 + int idx, const char **reason) {
4405 + int idx_COW_table;
4407 + ulong snap_pe_start, COW_table_sector_offset,
4408 + COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
4410 + kdev_t snap_phys_dev;
4411 + lv_block_exception_t *be;
4412 + struct kiobuf *COW_table_iobuf = lv_snap->lv_COW_table_iobuf;
4413 + lv_COW_table_disk_t * lv_COW_table =
4414 + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
4416 + COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
4417 + COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
4419 + /* get physical addresse of destination chunk */
4420 + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4421 + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4423 + blksize_snap = lvm_sectsize(snap_phys_dev);
4425 + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
4426 + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
4428 + if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
4430 + /* sector offset into the on disk COW table */
4431 + COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
4433 + /* COW table block to write next */
4434 + blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
4436 + /* store new COW_table entry */
4437 + be = lv_snap->lv_block_exception + idx;
4438 + if(_pv_get_number(vg, be->rdev_org, &pvn))
4439 + goto fail_pv_get_number;
4441 + lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn);
4442 + lv_COW_table[idx_COW_table].pv_org_rsector =
4443 + cpu_to_le64(be->rsector_org);
4444 + if(_pv_get_number(vg, snap_phys_dev, &pvn))
4445 + goto fail_pv_get_number;
4447 + lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn);
4448 + lv_COW_table[idx_COW_table].pv_snap_rsector =
4449 + cpu_to_le64(be->rsector_new);
4451 + COW_table_iobuf->length = blksize_snap;
4452 + /* COW_table_iobuf->nr_pages = 1; */
4454 + if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
4455 + blocks, blksize_snap, lv_snap) != blksize_snap)
4456 + goto fail_raw_write;
4458 + /* initialization of next COW exception table block with zeroes */
4459 + end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
4460 + if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
4462 + /* don't go beyond the end */
4463 + if (idx + 1 >= lv_snap->lv_remap_end) goto out;
4465 + memset(lv_COW_table, 0, blksize_snap);
4470 + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4471 + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4472 + blksize_snap = lvm_sectsize(snap_phys_dev);
4473 + blocks[0] = snap_pe_start >> (blksize_snap >> 10);
4474 + } else blocks[0]++;
4476 + if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
4477 + blocks, blksize_snap, lv_snap) !=
4479 + goto fail_raw_write;
4486 + *reason = "write error";
4489 +fail_pv_get_number:
4490 + *reason = "_pv_get_number failed";
4496 + * This function is a bit of a hack; we need to ensure that the
4497 + * snapshot is never made active again, because it will surely be
4498 + * corrupt. At the moment we do not have access to the LVM metadata
4499 + * from within the kernel. So we set the first exception to point to
4500 + * sector 1 (which will always be within the metadata, and as such
4501 + * invalid). User land tools will check for this when they are asked
4502 + * to activate the snapshot and prevent this from happening.
4505 +static void _disable_snapshot(vg_t *vg, lv_t *lv) {
4507 + lv->lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR;
4508 + if(_write_COW_table_block(vg, lv, 0, &err) < 0) {
4509 + printk(KERN_ERR "%s -- couldn't disable snapshot: %s\n",
4513 --- linux/drivers/md/lvm-fs.c.orig Tue Nov 13 08:46:52 2001
4514 +++ linux/drivers/md/lvm-fs.c Tue Nov 13 08:46:52 2001
4519 + * Copyright (C) 2001 Sistina Software
4521 + * January-April 2001
4523 + * LVM driver is free software; you can redistribute it and/or modify
4524 + * it under the terms of the GNU General Public License as published by
4525 + * the Free Software Foundation; either version 2, or (at your option)
4526 + * any later version.
4528 + * LVM driver is distributed in the hope that it will be useful,
4529 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4530 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4531 + * GNU General Public License for more details.
4533 + * You should have received a copy of the GNU General Public License
4534 + * along with GNU CC; see the file COPYING. If not, write to
4535 + * the Free Software Foundation, 59 Temple Place - Suite 330,
4536 + * Boston, MA 02111-1307, USA.
4543 + * 11/01/2001 - First version (Joe Thornber)
4544 + * 21/03/2001 - added display of stripes and stripe size (HM)
4545 + * 04/10/2001 - corrected devfs_register() call in lvm_init_fs()
4546 + * 11/04/2001 - don't devfs_register("lvm") as user-space always does it
4547 + * 10/05/2001 - show more of PV name in /proc/lvm/global
4551 +#include <linux/config.h>
4552 +#include <linux/version.h>
4554 +#include <linux/kernel.h>
4555 +#include <linux/vmalloc.h>
4556 +#include <linux/smp_lock.h>
4558 +#include <linux/devfs_fs_kernel.h>
4559 +#include <linux/proc_fs.h>
4560 +#include <linux/init.h>
4561 +#include <linux/lvm.h>
4563 +#include "lvm-internal.h"
4566 +static int _proc_read_vg(char *page, char **start, off_t off,
4567 + int count, int *eof, void *data);
4568 +static int _proc_read_lv(char *page, char **start, off_t off,
4569 + int count, int *eof, void *data);
4570 +static int _proc_read_pv(char *page, char **start, off_t off,
4571 + int count, int *eof, void *data);
4572 +static int _proc_read_global(char *page, char **start, off_t off,
4573 + int count, int *eof, void *data);
4575 +static int _vg_info(vg_t *vg_ptr, char *buf);
4576 +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf);
4577 +static int _pv_info(pv_t *pv_ptr, char *buf);
4579 +static void _show_uuid(const char *src, char *b, char *e);
4582 +static devfs_handle_t lvm_devfs_handle;
4584 +static devfs_handle_t vg_devfs_handle[MAX_VG];
4585 +static devfs_handle_t ch_devfs_handle[MAX_VG];
4586 +static devfs_handle_t lv_devfs_handle[MAX_LV];
4588 +static struct proc_dir_entry *lvm_proc_dir = NULL;
4589 +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL;
4591 +/* inline functions */
4593 +/* public interface */
4594 +void __init lvm_init_fs() {
4595 + struct proc_dir_entry *pde;
4597 +/* User-space has already registered this */
4599 + lvm_devfs_handle = devfs_register(
4600 + 0 , "lvm", 0, LVM_CHAR_MAJOR, 0,
4601 + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
4602 + &lvm_chr_fops, NULL);
4604 + lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root);
4605 + if (lvm_proc_dir) {
4606 + lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR,
4608 + pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir);
4609 + if ( pde != NULL) pde->read_proc = _proc_read_global;
4613 +void lvm_fin_fs() {
4615 + devfs_unregister (lvm_devfs_handle);
4617 + remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
4618 + remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
4619 + remove_proc_entry(LVM_DIR, &proc_root);
4622 +void lvm_fs_create_vg(vg_t *vg_ptr) {
4623 + struct proc_dir_entry *pde;
4625 + vg_devfs_handle[vg_ptr->vg_number] =
4626 + devfs_mk_dir(0, vg_ptr->vg_name, NULL);
4628 + ch_devfs_handle[vg_ptr->vg_number] = devfs_register(
4629 + vg_devfs_handle[vg_ptr->vg_number] , "group",
4630 + DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number,
4631 + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
4632 + &lvm_chr_fops, NULL);
4634 + vg_ptr->vg_dir_pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR,
4635 + lvm_proc_vg_subdir);
4637 + if((pde = create_proc_entry("group", S_IFREG, vg_ptr->vg_dir_pde))) {
4638 + pde->read_proc = _proc_read_vg;
4639 + pde->data = vg_ptr;
4642 + vg_ptr->lv_subdir_pde =
4643 + create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde);
4645 + vg_ptr->pv_subdir_pde =
4646 + create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde);
4649 +void lvm_fs_remove_vg(vg_t *vg_ptr) {
4652 + devfs_unregister(ch_devfs_handle[vg_ptr->vg_number]);
4653 + devfs_unregister(vg_devfs_handle[vg_ptr->vg_number]);
4656 + for(i = 0; i < vg_ptr->lv_max; i++)
4657 + if(vg_ptr->lv[i]) lvm_fs_remove_lv(vg_ptr, vg_ptr->lv[i]);
4660 + for(i = 0; i < vg_ptr->pv_max; i++)
4661 + if(vg_ptr->pv[i]) lvm_fs_remove_pv(vg_ptr, vg_ptr->pv[i]);
4663 + if(vg_ptr->vg_dir_pde) {
4664 + remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
4665 + vg_ptr->lv_subdir_pde = NULL;
4667 + remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
4668 + vg_ptr->pv_subdir_pde = NULL;
4670 + remove_proc_entry("group", vg_ptr->vg_dir_pde);
4671 + vg_ptr->vg_dir_pde = NULL;
4673 + remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
4678 +static inline const char *_basename(const char *str) {
4679 + const char *name = strrchr(str, '/');
4680 + name = name ? name + 1 : str;
4684 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
4685 + struct proc_dir_entry *pde;
4686 + const char *name = _basename(lv->lv_name);
4688 + lv_devfs_handle[MINOR(lv->lv_dev)] = devfs_register(
4689 + vg_devfs_handle[vg_ptr->vg_number], name,
4690 + DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, MINOR(lv->lv_dev),
4691 + S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
4692 + &lvm_blk_dops, NULL);
4694 + if(vg_ptr->lv_subdir_pde &&
4695 + (pde = create_proc_entry(name, S_IFREG, vg_ptr->lv_subdir_pde))) {
4696 + pde->read_proc = _proc_read_lv;
4699 + return lv_devfs_handle[MINOR(lv->lv_dev)];
4702 +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) {
4703 + devfs_unregister(lv_devfs_handle[MINOR(lv->lv_dev)]);
4705 + if(vg_ptr->lv_subdir_pde) {
4706 + const char *name = _basename(lv->lv_name);
4707 + remove_proc_entry(name, vg_ptr->lv_subdir_pde);
4712 +static inline void _make_pv_name(const char *src, char *b, char *e) {
4713 + int offset = strlen(LVM_DIR_PREFIX);
4714 + if(strncmp(src, LVM_DIR_PREFIX, offset))
4719 + while(*src && (b != e)) {
4720 + *b++ = (*src == '/') ? '_' : *src;
4726 +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv) {
4727 + struct proc_dir_entry *pde;
4728 + char name[NAME_LEN];
4730 + if(!vg_ptr->pv_subdir_pde)
4733 + _make_pv_name(pv->pv_name, name, name + sizeof(name));
4734 + if((pde = create_proc_entry(name, S_IFREG, vg_ptr->pv_subdir_pde))) {
4735 + pde->read_proc = _proc_read_pv;
4740 +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv) {
4741 + char name[NAME_LEN];
4743 + if(!vg_ptr->pv_subdir_pde)
4746 + _make_pv_name(pv->pv_name, name, name + sizeof(name));
4747 + remove_proc_entry(name, vg_ptr->pv_subdir_pde);
4751 +static int _proc_read_vg(char *page, char **start, off_t off,
4752 + int count, int *eof, void *data) {
4754 + vg_t *vg_ptr = data;
4755 + char uuid[NAME_LEN];
4757 + sz += sprintf(page + sz, "name: %s\n", vg_ptr->vg_name);
4758 + sz += sprintf(page + sz, "size: %u\n",
4759 + vg_ptr->pe_total * vg_ptr->pe_size / 2);
4760 + sz += sprintf(page + sz, "access: %u\n", vg_ptr->vg_access);
4761 + sz += sprintf(page + sz, "status: %u\n", vg_ptr->vg_status);
4762 + sz += sprintf(page + sz, "number: %u\n", vg_ptr->vg_number);
4763 + sz += sprintf(page + sz, "LV max: %u\n", vg_ptr->lv_max);
4764 + sz += sprintf(page + sz, "LV current: %u\n", vg_ptr->lv_cur);
4765 + sz += sprintf(page + sz, "LV open: %u\n", vg_ptr->lv_open);
4766 + sz += sprintf(page + sz, "PV max: %u\n", vg_ptr->pv_max);
4767 + sz += sprintf(page + sz, "PV current: %u\n", vg_ptr->pv_cur);
4768 + sz += sprintf(page + sz, "PV active: %u\n", vg_ptr->pv_act);
4769 + sz += sprintf(page + sz, "PE size: %u\n", vg_ptr->pe_size / 2);
4770 + sz += sprintf(page + sz, "PE total: %u\n", vg_ptr->pe_total);
4771 + sz += sprintf(page + sz, "PE allocated: %u\n", vg_ptr->pe_allocated);
4773 + _show_uuid(vg_ptr->vg_uuid, uuid, uuid + sizeof(uuid));
4774 + sz += sprintf(page + sz, "uuid: %s\n", uuid);
4779 +static int _proc_read_lv(char *page, char **start, off_t off,
4780 + int count, int *eof, void *data) {
4784 + sz += sprintf(page + sz, "name: %s\n", lv->lv_name);
4785 + sz += sprintf(page + sz, "size: %u\n", lv->lv_size);
4786 + sz += sprintf(page + sz, "access: %u\n", lv->lv_access);
4787 + sz += sprintf(page + sz, "status: %u\n", lv->lv_status);
4788 + sz += sprintf(page + sz, "number: %u\n", lv->lv_number);
4789 + sz += sprintf(page + sz, "open: %u\n", lv->lv_open);
4790 + sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation);
4791 + if(lv->lv_stripes > 1) {
4792 + sz += sprintf(page + sz, "stripes: %u\n",
4794 + sz += sprintf(page + sz, "stripesize: %u\n",
4795 + lv->lv_stripesize);
4797 + sz += sprintf(page + sz, "device: %02u:%02u\n",
4798 + MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
4803 +static int _proc_read_pv(char *page, char **start, off_t off,
4804 + int count, int *eof, void *data) {
4807 + char uuid[NAME_LEN];
4809 + sz += sprintf(page + sz, "name: %s\n", pv->pv_name);
4810 + sz += sprintf(page + sz, "size: %u\n", pv->pv_size);
4811 + sz += sprintf(page + sz, "status: %u\n", pv->pv_status);
4812 + sz += sprintf(page + sz, "number: %u\n", pv->pv_number);
4813 + sz += sprintf(page + sz, "allocatable: %u\n", pv->pv_allocatable);
4814 + sz += sprintf(page + sz, "LV current: %u\n", pv->lv_cur);
4815 + sz += sprintf(page + sz, "PE size: %u\n", pv->pe_size / 2);
4816 + sz += sprintf(page + sz, "PE total: %u\n", pv->pe_total);
4817 + sz += sprintf(page + sz, "PE allocated: %u\n", pv->pe_allocated);
4818 + sz += sprintf(page + sz, "device: %02u:%02u\n",
4819 + MAJOR(pv->pv_dev), MINOR(pv->pv_dev));
4821 + _show_uuid(pv->pv_uuid, uuid, uuid + sizeof(uuid));
4822 + sz += sprintf(page + sz, "uuid: %s\n", uuid);
4827 +static int _proc_read_global(char *page, char **start, off_t pos, int count,
4828 + int *eof, void *data) {
4830 +#define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz])
4832 + int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
4833 + lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds;
4836 + static char *buf = NULL;
4837 + static char dummy_buf[160]; /* sized for 2 lines */
4843 +#ifdef DEBUG_LVM_PROC_GET_INFO
4845 + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d\n",
4846 + lvm_name, pos, count);
4849 + if(pos != 0 && buf != NULL)
4852 + sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
4853 + lv_open_total = pe_t_bytes = hash_table_bytes = \
4854 + lv_block_exception_t_bytes = 0;
4856 + /* get some statistics */
4857 + for (v = 0; v < ABS_MAX_VG; v++) {
4858 + if ((vg_ptr = vg[v]) != NULL) {
4860 + pv_counter += vg_ptr->pv_cur;
4861 + lv_counter += vg_ptr->lv_cur;
4862 + if (vg_ptr->lv_cur > 0) {
4863 + for (l = 0; l < vg[v]->lv_max; l++) {
4864 + if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
4865 + pe_t_bytes += lv_ptr->lv_allocated_le;
4866 + hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size;
4867 + if (lv_ptr->lv_block_exception != NULL)
4868 + lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
4869 + if (lv_ptr->lv_open > 0) {
4870 + lv_open_counter++;
4871 + lv_open_total += lv_ptr->lv_open;
4879 + pe_t_bytes *= sizeof(pe_t);
4880 + lv_block_exception_t_bytes *= sizeof(lv_block_exception_t);
4882 + if (buf != NULL) {
4883 + P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__);
4889 + /* 2 times: first to get size to allocate buffer,
4890 + 2nd to fill the malloced buffer */
4891 + for (i = 0; i < 2; i++) {
4893 + sz += sprintf(LVM_PROC_BUF,
4901 + "Total: %d VG%s %d PV%s %d LV%s ",
4903 + vg_counter, vg_counter == 1 ? "" : "s",
4904 + pv_counter, pv_counter == 1 ? "" : "s",
4905 + lv_counter, lv_counter == 1 ? "" : "s");
4906 + sz += sprintf(LVM_PROC_BUF,
4909 + lv_open_counter == 1 ? "" : "s");
4910 + if (lv_open_total > 0)
4911 + sz += sprintf(LVM_PROC_BUF,
4915 + sz += sprintf(LVM_PROC_BUF, ")");
4916 + sz += sprintf(LVM_PROC_BUF,
4917 + "\nGlobal: %lu bytes malloced IOP version: %d ",
4918 + vg_counter * sizeof(vg_t) +
4919 + pv_counter * sizeof(pv_t) +
4920 + lv_counter * sizeof(lv_t) +
4921 + pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last,
4924 + seconds = CURRENT_TIME - loadtime;
4926 + loadtime = CURRENT_TIME + seconds;
4927 + if (seconds / 86400 > 0) {
4928 + sz += sprintf(LVM_PROC_BUF, "%d day%s ",
4930 + seconds / 86400 == 0 ||
4931 + seconds / 86400 > 1 ? "s" : "");
4933 + sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n",
4934 + (seconds % 86400) / 3600,
4935 + (seconds % 3600) / 60,
4938 + if (vg_counter > 0) {
4939 + for (v = 0; v < ABS_MAX_VG; v++) {
4940 + /* volume group */
4941 + if ((vg_ptr = vg[v]) != NULL) {
4942 + sz += _vg_info(vg_ptr, LVM_PROC_BUF);
4944 + /* physical volumes */
4945 + sz += sprintf(LVM_PROC_BUF,
4947 + vg_ptr->pv_cur == 1 ? ": " : "s:");
4949 + for (p = 0; p < vg_ptr->pv_max; p++) {
4950 + if ((pv_ptr = vg_ptr->pv[p]) != NULL) {
4951 + sz += _pv_info(pv_ptr, LVM_PROC_BUF);
4954 + if (c < vg_ptr->pv_cur)
4955 + sz += sprintf(LVM_PROC_BUF,
4960 + /* logical volumes */
4961 + sz += sprintf(LVM_PROC_BUF,
4963 + vg_ptr->lv_cur == 1 ? ": " : "s:");
4965 + for (l = 0; l < vg_ptr->lv_max; l++) {
4966 + if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
4967 + sz += _lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF);
4969 + if (c < vg_ptr->lv_cur)
4970 + sz += sprintf(LVM_PROC_BUF,
4974 + if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none");
4975 + sz += sprintf(LVM_PROC_BUF, "\n");
4979 + if (buf == NULL) {
4981 + buf = vmalloc(sz);
4983 + if (buf == NULL) {
4985 + return sprintf(page, "%s - vmalloc error at line %d\n",
4986 + lvm_name, __LINE__);
4993 + if (pos > sz - 1) {
5000 + *start = &buf[pos];
5001 + if (sz - pos < count)
5006 +#undef LVM_PROC_BUF
5010 + * provide VG info for proc filesystem use (global)
5012 +static int _vg_info(vg_t *vg_ptr, char *buf) {
5014 + char inactive_flag = ' ';
5016 + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I';
5018 + "\nVG: %c%s [%d PV, %d LV/%d open] "
5019 + " PE Size: %d KB\n"
5020 + " Usage [KB/PE]: %d /%d total "
5021 + "%d /%d used %d /%d free",
5027 + vg_ptr->pe_size >> 1,
5028 + vg_ptr->pe_size * vg_ptr->pe_total >> 1,
5030 + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1,
5031 + vg_ptr->pe_allocated,
5032 + (vg_ptr->pe_total - vg_ptr->pe_allocated) *
5033 + vg_ptr->pe_size >> 1,
5034 + vg_ptr->pe_total - vg_ptr->pe_allocated);
5040 + * provide LV info for proc filesystem use (global)
5042 +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
5044 + char inactive_flag = 'A', allocation_flag = ' ',
5045 + stripes_flag = ' ', rw_flag = ' ', *basename;
5047 + if (!(lv_ptr->lv_status & LV_ACTIVE))
5048 + inactive_flag = 'I';
5050 + if (lv_ptr->lv_access & LV_WRITE)
5052 + allocation_flag = 'D';
5053 + if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
5054 + allocation_flag = 'C';
5055 + stripes_flag = 'L';
5056 + if (lv_ptr->lv_stripes > 1)
5057 + stripes_flag = 'S';
5058 + sz += sprintf(buf+sz,
5064 + if (lv_ptr->lv_stripes > 1)
5065 + sz += sprintf(buf+sz, "%-2d",
5066 + lv_ptr->lv_stripes);
5068 + sz += sprintf(buf+sz, " ");
5070 + /* FIXME: use _basename */
5071 + basename = strrchr(lv_ptr->lv_name, '/');
5072 + if ( basename == 0) basename = lv_ptr->lv_name;
5074 + sz += sprintf(buf+sz, "] %-25s", basename);
5075 + if (strlen(basename) > 25)
5076 + sz += sprintf(buf+sz,
5078 + sz += sprintf(buf+sz, "%9d /%-6d ",
5079 + lv_ptr->lv_size >> 1,
5080 + lv_ptr->lv_size / vg_ptr->pe_size);
5082 + if (lv_ptr->lv_open == 0)
5083 + sz += sprintf(buf+sz, "close");
5085 + sz += sprintf(buf+sz, "%dx open",
5093 + * provide PV info for proc filesystem use (global)
5095 +static int _pv_info(pv_t *pv, char *buf) {
5097 + char inactive_flag = 'A', allocation_flag = ' ';
5098 + char *pv_name = NULL;
5100 + if (!(pv->pv_status & PV_ACTIVE))
5101 + inactive_flag = 'I';
5102 + allocation_flag = 'A';
5103 + if (!(pv->pv_allocatable & PV_ALLOCATABLE))
5104 + allocation_flag = 'N';
5105 + pv_name = strchr(pv->pv_name+1,'/');
5106 + if ( pv_name == 0) pv_name = pv->pv_name;
5109 + "[%c%c] %-21s %8d /%-6d "
5110 + "%8d /%-6d %8d /%-6d",
5114 + pv->pe_total * pv->pe_size >> 1,
5116 + pv->pe_allocated * pv->pe_size >> 1,
5118 + (pv->pe_total - pv->pe_allocated) *
5120 + pv->pe_total - pv->pe_allocated);
5124 +static void _show_uuid(const char *src, char *b, char *e) {
5128 + for(i = 0; *src && (b != e); i++) {
5129 + if(i && !(i & 0x3))
5135 diff -ruN -X /home/joe/packages/dontdiff linux_2.4.1/drivers/md/lvm-snap.h linux/drivers/md/lvm-snap.h
5136 --- linux_2.4.1/drivers/md/lvm-snap.h Fri Feb 16 14:51:26 2001
5137 +++ linux/drivers/md/lvm-snap.h Thu Jan 1 01:00:00 1970
5140 - * kernel/lvm-snap.h
5142 - * Copyright (C) 2001 Sistina Software
5145 - * LVM driver is free software; you can redistribute it and/or modify
5146 - * it under the terms of the GNU General Public License as published by
5147 - * the Free Software Foundation; either version 2, or (at your option)
5148 - * any later version.
5150 - * LVM driver is distributed in the hope that it will be useful,
5151 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
5152 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5153 - * GNU General Public License for more details.
5155 - * You should have received a copy of the GNU General Public License
5156 - * along with GNU CC; see the file COPYING. If not, write to
5157 - * the Free Software Foundation, 59 Temple Place - Suite 330,
5158 - * Boston, MA 02111-1307, USA.
5165 - * 05/01/2001:Joe Thornber - Factored this file out of lvm.c
5172 -/* external snapshot calls */
5173 -extern inline int lvm_get_blksize(kdev_t);
5174 -extern int lvm_snapshot_alloc(lv_t *);
5175 -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
5176 -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
5177 -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
5178 -extern void lvm_snapshot_release(lv_t *);
5179 -extern int lvm_write_COW_table_block(vg_t *, lv_t *);
5180 -extern inline void lvm_hash_link(lv_block_exception_t *,
5181 - kdev_t, ulong, lv_t *);
5182 -extern int lvm_snapshot_alloc_hash_table(lv_t *);
5183 -extern void lvm_drop_snapshot(lv_t *, const char *);