1 --- linux/include/linux/lvm.h.orig Sat Oct 20 20:50:33 2001
2 +++ linux/include/linux/lvm.h Sat Oct 20 20:55:23 2001
5 * January-March,July,September,October,Dezember 1999
6 * January,February,July,November 2000
8 + * January-March,June,July 2001
10 * lvm is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
13 * 01/03/2001 - Revert to IOP10 and add VG_CREATE_OLD call for compatibility
14 * 08/03/2001 - new lv_t (in core) version number 5: changed page member
15 * to (struct kiobuf *) to use for COW exception table io
16 + * 26/03/2001 - changed lv_v4 to lv_v5 in structure definition (HM)
17 + * 21/06/2001 - changed BLOCK_SIZE back to 1024 for non S/390
18 + * 22/06/2001 - added Andreas Dilger's PE on 4k boundary alignment enhancements
19 + * 19/07/2001 - added rwsem compatibility macros for 2.2 kernels
24 #ifndef _LVM_H_INCLUDE
25 #define _LVM_H_INCLUDE
27 -#define LVM_RELEASE_NAME "0.9.1_beta6"
28 -#define LVM_RELEASE_DATE "12/03/2001"
29 +#define LVM_RELEASE_NAME "1.0.1-rc4"
30 +#define LVM_RELEASE_DATE "03/10/2001"
32 #define _LVM_KERNEL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
38 -#endif /* #ifdef __KERNEL__ */
42 #include <linux/kdev_t.h>
43 #include <linux/list.h>
47 #include <linux/kdev_t.h>
48 #include <linux/list.h>
50 #endif /* #ifndef __KERNEL__ */
52 #include <asm/types.h>
54 #include <asm/semaphore.h>
55 #endif /* #ifdef __KERNEL__ */
60 #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
62 #ifdef CONFIG_ARCH_S390
63 #define BLOCK_SIZE 4096
65 -#define BLOCK_SIZE 512
66 +#define BLOCK_SIZE 1024
70 #define SECTOR_SIZE 512
73 -#define LVM_STRUCT_VERSION 1 /* structure version */
74 +/* structure version */
75 +#define LVM_STRUCT_VERSION 1
77 #define LVM_DIR_PREFIX "/dev/"
80 -#define min(a,b) (((a)<(b))?(a):(b))
83 -#define max(a,b) (((a)>(b))?(a):(b))
86 -/* set the default structure version */
87 -#if ( LVM_STRUCT_VERSION == 1)
91 -#define pv_disk_t pv_disk_v2_t
92 -#define lv_disk_t lv_disk_v3_t
93 -#define vg_disk_t vg_disk_v2_t
94 -#define lv_block_exception_t lv_block_exception_v1_t
95 -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t
101 * i/o protocol version
103 @@ -241,40 +224,11 @@
106 /* DONT TOUCH THESE !!! */
107 -/* base of PV structure in disk partition */
108 -#define LVM_PV_DISK_BASE 0L
110 -/* size reserved for PV structure on disk */
111 -#define LVM_PV_DISK_SIZE 1024L
113 -/* base of VG structure in disk partition */
114 -#define LVM_VG_DISK_BASE LVM_PV_DISK_SIZE
116 -/* size reserved for VG structure */
117 -#define LVM_VG_DISK_SIZE ( 9 * 512L)
119 -/* size reserved for timekeeping */
120 -#define LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE + LVM_VG_DISK_SIZE)
121 -#define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */
123 -/* name list of physical volumes on disk */
124 -#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
125 - LVM_TIMESTAMP_DISK_SIZE)
127 -/* now for the dynamically calculated parts of the VGDA */
128 -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \
129 - sizeof ( lv_disk_t) * b)
130 -#define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \
131 - (pv)->pe_on_disk.size)
132 -#define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \
133 - ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
134 -#define LVM_PE_ON_DISK_BASE(pv) \
136 - pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
137 - if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
138 - pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
140 -/* END default disk spaces and offsets for PVs */
146 @@ -318,25 +272,12 @@
147 #define LVM_SNAPSHOT_MIN_CHUNK (PAGE_SIZE/1024) /* 4 or 8 KB */
154 -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \
155 - vg->pe_size / lv->lv_chunk_size)
157 -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \
159 - int COW_table_entries_per_PE; \
160 - int COW_table_chunks_per_PE; \
162 - COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \
163 - COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \
164 - COW_table_entries_per_PE - COW_table_chunks_per_PE;})
169 + * FIXME: the last parameter to _IO{W,R,WR} is a data type. The macro will
170 + * expand this using sizeof(), so putting "1" there is misleading
171 + * because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine!
174 #define VG_CREATE_OLD _IOW ( 0xfe, 0x00, 1)
178 /* lock the logical volume manager */
179 +#if LVM_DRIVER_IOP_VERSION > 11
180 +#define LVM_LOCK_LVM _IO ( 0xfe, 0x9A)
182 +/* This is actually the same as _IO ( 0xff, 0x00), oops. Remove for IOP 12+ */
183 #define LVM_LOCK_LVM _IO ( 0xfe, 0x100)
188 @@ -451,21 +397,21 @@
189 #define UUID_LEN 32 /* don't change!!! */
191 /* copy on write tables in disk format */
193 +typedef struct lv_COW_table_disk_v1 {
194 uint64_t pv_org_number;
195 uint64_t pv_org_rsector;
196 uint64_t pv_snap_number;
197 uint64_t pv_snap_rsector;
198 -} lv_COW_table_disk_v1_t;
199 +} lv_COW_table_disk_t;
201 /* remap physical sector/rdev pairs including hash */
203 +typedef struct lv_block_exception_v1 {
204 struct list_head hash;
205 uint32_t rsector_org;
207 uint32_t rsector_new;
209 -} lv_block_exception_v1_t;
210 +} lv_block_exception_t;
212 /* disk stored pe information */
214 @@ -481,37 +427,11 @@
218 - * Structure Physical Volume (PV) Version 1
219 + * physical volume structures
224 - char id[2]; /* Identifier */
225 - unsigned short version; /* HM lvm version */
226 - lvm_disk_data_t pv_on_disk;
227 - lvm_disk_data_t vg_on_disk;
228 - lvm_disk_data_t pv_namelist_on_disk;
229 - lvm_disk_data_t lv_on_disk;
230 - lvm_disk_data_t pe_on_disk;
231 - char pv_name[NAME_LEN];
232 - char vg_name[NAME_LEN];
233 - char system_id[NAME_LEN]; /* for vgexport/vgimport */
237 - uint pv_allocatable;
238 - uint pv_size; /* HM */
243 - uint pe_stale; /* for future use */
244 - pe_disk_t *pe; /* HM */
245 - struct inode *inode; /* HM */
250 +typedef struct pv_v2 {
251 char id[2]; /* Identifier */
252 unsigned short version; /* HM lvm version */
253 lvm_disk_data_t pv_on_disk;
254 @@ -533,36 +453,17 @@
256 uint pe_stale; /* for future use */
257 pe_disk_t *pe; /* HM */
258 - struct inode *inode; /* HM */
259 + struct block_device *bd;
260 char pv_uuid[UUID_LEN+1];
264 + uint32_t pe_start; /* in sectors */
270 - uint8_t id[2]; /* Identifier */
271 - uint16_t version; /* HM lvm version */
272 - lvm_disk_data_t pv_on_disk;
273 - lvm_disk_data_t vg_on_disk;
274 - lvm_disk_data_t pv_namelist_on_disk;
275 - lvm_disk_data_t lv_on_disk;
276 - lvm_disk_data_t pe_on_disk;
277 - uint8_t pv_name[NAME_LEN];
278 - uint8_t vg_name[NAME_LEN];
279 - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */
281 - uint32_t pv_number;
282 - uint32_t pv_status;
283 - uint32_t pv_allocatable;
284 - uint32_t pv_size; /* HM */
288 - uint32_t pe_allocated;
293 +typedef struct pv_disk_v2 {
294 uint8_t id[2]; /* Identifier */
295 uint16_t version; /* HM lvm version */
296 lvm_disk_data_t pv_on_disk;
300 uint32_t pe_allocated;
303 + /* new in struct version 2 */
304 + uint32_t pe_start; /* in sectors */
313 typedef struct lv_bmap {
323 -typedef struct lv_v4 {
324 +typedef struct lv_v5 {
325 char lv_name[NAME_LEN];
326 char vg_name[NAME_LEN];
331 /* delta to version 1 starts here */
332 - struct lv_v4 *lv_snapshot_org;
333 - struct lv_v4 *lv_snapshot_prev;
334 - struct lv_v4 *lv_snapshot_next;
335 + struct lv_v5 *lv_snapshot_org;
336 + struct lv_v5 *lv_snapshot_prev;
337 + struct lv_v5 *lv_snapshot_next;
338 lv_block_exception_t *lv_block_exception;
341 @@ -649,22 +554,22 @@
343 struct kiobuf *lv_iobuf;
344 struct kiobuf *lv_COW_table_iobuf;
345 - struct semaphore lv_snapshot_sem;
346 + struct rw_semaphore lv_lock;
347 struct list_head *lv_snapshot_hash_table;
348 uint32_t lv_snapshot_hash_table_size;
349 uint32_t lv_snapshot_hash_mask;
350 wait_queue_head_t lv_snapshot_wait;
351 int lv_snapshot_use_rate;
355 uint lv_allocated_snapshot_le;
364 +typedef struct lv_disk_v3 {
365 uint8_t lv_name[NAME_LEN];
366 uint8_t vg_name[NAME_LEN];
368 @@ -686,36 +591,14 @@
369 uint32_t lv_allocation;
370 uint32_t lv_io_timeout; /* for future use */
371 uint32_t lv_read_ahead; /* HM */
376 * Structure Volume Group (VG) Version 1
381 - char vg_name[NAME_LEN]; /* volume group name */
382 - uint vg_number; /* volume group number */
383 - uint vg_access; /* read/write */
384 - uint vg_status; /* active or not */
385 - uint lv_max; /* maximum logical volumes */
386 - uint lv_cur; /* current logical volumes */
387 - uint lv_open; /* open logical volumes */
388 - uint pv_max; /* maximum physical volumes */
389 - uint pv_cur; /* current physical volumes FU */
390 - uint pv_act; /* active physical volumes */
391 - uint dummy; /* was obsolete max_pe_per_pv */
392 - uint vgda; /* volume group descriptor arrays FU */
393 - uint pe_size; /* physical extent size in sectors */
394 - uint pe_total; /* total of physical extents */
395 - uint pe_allocated; /* allocated physical extents */
396 - uint pvg_total; /* physical volume groups FU */
397 - struct proc_dir_entry *proc;
398 - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */
399 - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */
403 +typedef struct vg_v3 {
404 char vg_name[NAME_LEN]; /* volume group name */
405 uint vg_number; /* volume group number */
406 uint vg_access; /* read/write */
407 @@ -743,30 +626,11 @@
417 - uint8_t vg_name[NAME_LEN]; /* volume group name */
418 - uint32_t vg_number; /* volume group number */
419 - uint32_t vg_access; /* read/write */
420 - uint32_t vg_status; /* active or not */
421 - uint32_t lv_max; /* maximum logical volumes */
422 - uint32_t lv_cur; /* current logical volumes */
423 - uint32_t lv_open; /* open logical volumes */
424 - uint32_t pv_max; /* maximum physical volumes */
425 - uint32_t pv_cur; /* current physical volumes FU */
426 - uint32_t pv_act; /* active physical volumes */
428 - uint32_t vgda; /* volume group descriptor arrays FU */
429 - uint32_t pe_size; /* physical extent size in sectors */
430 - uint32_t pe_total; /* total of physical extents */
431 - uint32_t pe_allocated; /* allocated physical extents */
432 - uint32_t pvg_total; /* physical volume groups FU */
436 +typedef struct vg_disk_v2 {
437 uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */
438 uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */
439 uint32_t vg_number; /* volume group number */
441 uint32_t pe_total; /* total of physical extents */
442 uint32_t pe_allocated; /* allocated physical extents */
443 uint32_t pvg_total; /* physical volume groups FU */
451 } lv_snapshot_use_rate_req_t;
455 +/* useful inlines */
456 +static inline ulong round_up(ulong n, ulong size) {
458 + return (n + size) & ~size;
461 +static inline ulong div_up(ulong n, ulong size) {
462 + return round_up(n, size) / size;
465 +/* FIXME: nasty capital letters */
466 +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) {
467 + return vg->pe_size / lv->lv_chunk_size;
470 +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) {
471 + ulong chunks = vg->pe_size / lv->lv_chunk_size;
472 + ulong entry_size = sizeof(lv_COW_table_disk_t);
473 + ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE;
474 + ulong entries = (vg->pe_size * SECTOR_SIZE) /
475 + (entry_size + chunk_size);
480 + for(; entries; entries--)
481 + if((div_up(entries * entry_size, chunk_size) + entries) <=
489 #endif /* #ifndef _LVM_H_INCLUDE */
491 --- linux/drivers/md/lvm.c.orig Sat Oct 20 20:50:29 2001
492 +++ linux/drivers/md/lvm.c Sat Oct 20 20:36:55 2001
494 * April-May,July-August,November 1998
495 * January-March,May,July,September,October 1999
496 * January,February,July,September-November 2000
497 - * January,February,March 2001
498 + * January-April 2001
501 * LVM driver is free software; you can redistribute it and/or modify
503 * only other update ioctls are blocked now
504 * - fixed pv->pe to NULL for pv_status
505 * - using lv_req structure in lvm_chr_ioctl() now
506 - * - fixed NULL ptr reference bug in lvm_do_lv_extendreduce()
507 + * - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
508 * caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
509 * 09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
510 * handle lgoical volume private read ahead sector
511 @@ -194,11 +194,25 @@
512 * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
513 * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock
514 * when the locking process closes.
515 - * 05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it
516 - * destroys stacking devices. call b_end_io on failed maps.
518 - * 30/04/2001 - replace get_hardblock_size() with get_hardsect_size() for
520 + * 05/04/2001 - Defer writes to an extent that is being moved [JT]
521 + * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
522 + * lvm_map() in order to make stacking devices more happy (HM)
523 + * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
524 + * rw flag, instead WRITEA's are just dropped [JT]
525 + * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
526 + * than get_hardblocksize() call
527 + * 03/05/2001 - Use copy_to/from_user to preserve pointers in
528 + * lvm_do_status_by*
529 + * 11/05/2001 - avoid accesses to inactive snapshot data in
530 + * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
531 + * 28/05/2001 - implemented missing BLKSSZGET ioctl
532 + * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
533 + * locked. Make buffer queue flush not need locking.
534 + * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
535 + * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
536 + * the same hardsectsize (very likely) before scanning all LEs
537 + * in the LV each time. [AED]
538 + * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
543 #include <linux/locks.h>
546 +#include <linux/devfs_fs_kernel.h>
547 #include <linux/smp_lock.h>
548 #include <asm/ioctl.h>
549 #include <asm/segment.h>
551 static void __update_hardsectsize(lv_t *lv);
554 +static void _queue_io(struct buffer_head *bh, int rw);
555 +static struct buffer_head *_dequeue_io(void);
556 +static void _flush_io(struct buffer_head *bh);
558 +static int _open_pv(pv_t *pv);
559 +static void _close_pv(pv_t *pv);
561 +static unsigned long _sectors_to_k(unsigned long sect);
564 void lvm_hd_name(char *, int);
570 -char *lvm_version = "LVM version "LVM_RELEASE_NAME" by Heinz Mauelshagen "
571 - "("LVM_RELEASE_DATE")\n";
572 -char *lvm_short_version = "version "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")";
573 +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
574 ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
576 const char *const lvm_name = LVM_NAME;
578 /* volume group descriptor area pointers */
579 vg_t *vg[ABS_MAX_VG];
581 -static pv_t *pvp = NULL;
582 -static lv_t *lvp = NULL;
583 -static pe_t *pep = NULL;
586 /* map from block minor number to VG and LV numbers */
590 /* Request structures (lvm_chr_ioctl()) */
591 static pv_change_req_t pv_change_req;
592 static pv_status_req_t pv_status_req;
593 -static pe_lock_req_t pe_lock_req;
594 +volatile static pe_lock_req_t pe_lock_req;
595 static le_remap_req_t le_remap_req;
596 static lv_req_t lv_req;
598 @@ -361,11 +378,14 @@
599 static uint vg_count = 0;
600 static long lvm_chr_open_count = 0;
601 static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
602 -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait);
604 static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
605 static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
607 +static struct buffer_head *_pe_requests;
608 +static DECLARE_RWSEM(_pe_lock);
611 struct file_operations lvm_chr_fops = {
613 release: lvm_chr_close,
615 /* block device operations structure needed for 2.3.38? and above */
616 struct block_device_operations lvm_blk_dops =
618 - open: lvm_blk_open,
619 + open: lvm_blk_open,
620 release: lvm_blk_close,
621 ioctl: lvm_blk_ioctl,
623 @@ -402,22 +422,58 @@
624 NULL, /* pointer to next gendisk struct (internal) */
627 +static void add_gendisk(struct gendisk *gp)
629 + struct gendisk *gendisk_ptr = NULL;
631 + if (gendisk_head != NULL) {
632 + gendisk_ptr = gendisk_head;
633 + while (gendisk_ptr->next != NULL &&
634 + gendisk_ptr->major > lvm_gendisk.major) {
635 + gendisk_ptr = gendisk_ptr->next;
637 + lvm_gendisk.next = gendisk_ptr->next;
638 + gendisk_ptr->next = &lvm_gendisk;
640 + gendisk_head = &lvm_gendisk;
641 + lvm_gendisk.next = NULL;
645 +static void del_gendisk(struct gendisk *gp)
647 + struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
649 + gendisk_ptr = gendisk_ptr_prev = gendisk_head;
650 + while (gendisk_ptr != NULL) {
651 + if (gendisk_ptr == &lvm_gendisk)
653 + gendisk_ptr_prev = gendisk_ptr;
654 + gendisk_ptr = gendisk_ptr->next;
657 + if (gendisk_ptr == &lvm_gendisk)
658 + gendisk_ptr_prev->next = gendisk_ptr->next;
662 * Driver initialization...
666 - struct gendisk *gendisk_ptr = NULL;
668 - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
669 - printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
670 + if (devfs_register_chrdev(LVM_CHAR_MAJOR,
671 + lvm_name, &lvm_chr_fops) < 0) {
672 + printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
676 - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
677 + if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
679 - printk("%s -- register_blkdev failed\n", lvm_name);
680 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
681 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
682 + printk("%s -- devfs_register_blkdev failed\n", lvm_name);
683 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
685 + "%s -- devfs_unregister_chrdev failed\n",
691 lvm_geninit(&lvm_gendisk);
693 /* insert our gendisk at the corresponding major */
694 - if (gendisk_head != NULL) {
695 - gendisk_ptr = gendisk_head;
696 - while (gendisk_ptr->next != NULL &&
697 - gendisk_ptr->major > lvm_gendisk.major) {
698 - gendisk_ptr = gendisk_ptr->next;
700 - lvm_gendisk.next = gendisk_ptr->next;
701 - gendisk_ptr->next = &lvm_gendisk;
703 - gendisk_head = &lvm_gendisk;
704 - lvm_gendisk.next = NULL;
706 + add_gendisk(&lvm_gendisk);
709 /* reference from drivers/block/genhd.c */
710 @@ -447,20 +492,19 @@
711 blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
714 + /* initialise the pe lock */
715 + pe_lock_req.lock = UNLOCK_PE;
717 /* optional read root VGDA */
719 if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
726 + printk(KERN_INFO "%s module loaded\n", lvm_version);
729 + printk(KERN_INFO "%s\n", lvm_version);
731 - " successfully initialized\n",
732 - lvm_version, lvm_name);
736 @@ -471,27 +515,17 @@
738 static void lvm_cleanup(void)
740 - struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL;
742 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
743 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
745 - if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) {
746 - printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
748 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
749 + printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
751 + if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
752 + printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
757 - gendisk_ptr = gendisk_ptr_prev = gendisk_head;
758 - while (gendisk_ptr != NULL) {
759 - if (gendisk_ptr == &lvm_gendisk)
761 - gendisk_ptr_prev = gendisk_ptr;
762 - gendisk_ptr = gendisk_ptr->next;
764 /* delete our gendisk from chain */
765 - if (gendisk_ptr == &lvm_gendisk)
766 - gendisk_ptr_prev->next = gendisk_ptr->next;
767 + del_gendisk(&lvm_gendisk);
769 blk_size[MAJOR_NR] = NULL;
770 blksize_size[MAJOR_NR] = NULL;
772 /* unregister with procfs and devfs */
776 printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
780 } /* lvm_cleanup() */
782 lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
784 pe_lock_req.lock = UNLOCK_PE;
785 - pe_lock_req.data.lv_dev = \
786 - pe_lock_req.data.pv_dev = \
787 + pe_lock_req.data.lv_dev = 0;
788 + pe_lock_req.data.pv_dev = 0;
789 pe_lock_req.data.pv_offset = 0;
791 /* Initialize VG pointers */
794 ********************************************************************/
796 +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
797 + (mode) & FMODE_WRITE ? "WRITE" : ""
800 * character device open routine
804 int minor = MINOR(inode->i_rdev);
806 - P_DEV("%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n",
807 - lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
808 + P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n",
809 + minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
811 /* super user validation */
812 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
814 /* otherwise cc will complain about unused variables */
817 - P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d "
818 - "VG#: %d mode: 0x%X\n",
819 - lvm_name, command, minor, VG_CHR(minor), file->f_mode);
820 + P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n",
821 + minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
823 #ifdef LVM_TOTAL_RESET
824 if (lvm_reset_spindown > 0) return -EACCES;
825 @@ -789,13 +827,10 @@
827 * character device close routine
829 -int lvm_chr_close(struct inode *inode, struct file *file)
830 +static int lvm_chr_close(struct inode *inode, struct file *file)
833 - int minor = MINOR(inode->i_rdev);
835 - "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor));
837 + P_DEV("chr_close MINOR: %d VG#: %d\n",
838 + MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
840 #ifdef LVM_TOTAL_RESET
841 if (lvm_reset_spindown > 0) {
843 spin_lock(&lvm_lock);
844 if(lock == current->pid) {
845 if(!_lock_open_count) {
846 + P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
848 wake_up_interruptible(&lvm_wait);
852 vg_t *vg_ptr = vg[VG_BLK(minor)];
854 - P_DEV("%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n",
855 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
856 + P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n",
857 + minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
859 #ifdef LVM_TOTAL_RESET
860 if (lvm_reset_spindown > 0)
865 - P_DEV("%s -- OPEN OK, LV size %d\n", lvm_name, lv_ptr->lv_size);
866 + P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
870 @@ -893,16 +929,18 @@
871 void *arg = (void *) a;
872 struct hd_geometry *hd = (struct hd_geometry *) a;
874 - P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %lX "
875 - "VG#: %dl LV#: %d\n",
876 - lvm_name, minor, command, (ulong) arg,
877 - VG_BLK(minor), LV_BLK(minor));
878 + P_IOCTL("blk MINOR: %d command: 0x%X arg: %p VG#: %d LV#: %d "
879 + "mode: %s%s\n", minor, command, arg, VG_BLK(minor),
880 + LV_BLK(minor), MODE_TO_STR(file->f_mode));
884 + /* get block device sector size as needed e.g. by fdisk */
885 + return put_user(lvm_sectsize(inode->i_rdev), (int *) arg);
888 /* return device size */
889 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
890 - lvm_name, lv_ptr->lv_size);
891 + P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size);
892 if (put_user(lv_ptr->lv_size, (long *)arg))
896 /* flush buffer cache */
897 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
899 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
900 + P_IOCTL("BLKFLSBUF\n");
902 fsync_dev(inode->i_rdev);
903 invalidate_buffers(inode->i_rdev);
905 /* set read ahead for block device */
906 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
908 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %ld sectors for %s\n",
909 - lvm_name, (long) arg, kdevname(inode->i_rdev));
910 + P_IOCTL("BLKRASET: %ld sectors for %s\n",
911 + (long) arg, kdevname(inode->i_rdev));
913 if ((long) arg < LVM_MIN_READ_AHEAD ||
914 (long) arg > LVM_MAX_READ_AHEAD)
915 @@ -935,14 +973,11 @@
918 /* get current read ahead setting */
919 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
920 + P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead);
921 if (put_user(lv_ptr->lv_read_ahead, (long *)arg))
927 - return blk_ioctl (inode->i_rdev, command, a);
930 /* get disk geometry */
931 @@ -991,13 +1026,12 @@
935 - /* turn logical block into (dev_t, block). non privileged. */
936 + /* turn logical block into (dev_t, block). non privileged. */
937 /* don't bmap a snapshot, since the mapping can change */
938 - if (lv_ptr->lv_access & LV_SNAPSHOT)
939 + if(lv_ptr->lv_access & LV_SNAPSHOT)
942 return lvm_user_bmap(inode, (struct lv_bmap *) arg);
945 case LV_SET_ALLOCATION:
946 /* set allocation flags of a logical volume */
947 @@ -1028,13 +1062,9 @@
948 vg_t *vg_ptr = vg[VG_BLK(minor)];
949 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
953 - "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n",
954 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
956 + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n",
957 + minor, VG_BLK(minor), LV_BLK(minor));
959 - sync_dev(inode->i_rdev);
960 if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
963 @@ -1086,17 +1116,17 @@
966 memset(&bh,0,sizeof bh);
967 - bh.b_rsector = block;
968 - bh.b_dev = bh.b_rdev = inode->i_dev;
969 + bh.b_blocknr = block;
970 + bh.b_dev = bh.b_rdev = inode->i_rdev;
971 bh.b_size = lvm_get_blksize(bh.b_dev);
972 + bh.b_rsector = block * (bh.b_size >> 9);
973 if ((err=lvm_map(&bh, READ)) < 0) {
974 printk("lvm map failed: %d\n", err);
978 - return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
979 - put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
981 + return (put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
982 + put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block));
986 @@ -1104,16 +1134,68 @@
987 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
988 * (see init_module/lvm_init)
990 -static inline void __remap_snapshot(kdev_t rdev, ulong rsector,
991 +static void __remap_snapshot(kdev_t rdev, ulong rsector,
992 ulong pe_start, lv_t *lv, vg_t *vg) {
994 + /* copy a chunk from the origin to a snapshot device */
995 + down_write(&lv->lv_lock);
997 + /* we must redo lvm_snapshot_remap_block in order to avoid a
998 + race condition in the gap where no lock was held */
999 if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1000 !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1001 lvm_write_COW_table_block(vg, lv);
1003 + up_write(&lv->lv_lock);
1006 +static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1007 + ulong pe_start, lv_t *lv, vg_t *vg) {
1010 + /* check to see if this chunk is already in the snapshot */
1011 + down_read(&lv->lv_lock);
1012 + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1013 + up_read(&lv->lv_lock);
1016 + /* we haven't yet copied this block to the snapshot */
1017 + __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1022 + * extents destined for a pe that is on the move should be deferred
1024 +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1025 + return ((pe_lock_req.lock == LOCK_PE) &&
1026 + (pv == pe_lock_req.data.pv_dev) &&
1027 + (sector >= pe_lock_req.data.pv_offset) &&
1028 + (sector < (pe_lock_req.data.pv_offset + pe_size)));
1031 +static inline int _defer_extent(struct buffer_head *bh, int rw,
1032 + kdev_t pv, ulong sector, uint32_t pe_size)
1034 + if (pe_lock_req.lock == LOCK_PE) {
1035 + down_read(&_pe_lock);
1036 + if (_should_defer(pv, sector, pe_size)) {
1037 + up_read(&_pe_lock);
1038 + down_write(&_pe_lock);
1039 + if (_should_defer(pv, sector, pe_size))
1040 + _queue_io(bh, rw);
1041 + up_write(&_pe_lock);
1044 + up_read(&_pe_lock);
1050 static int lvm_map(struct buffer_head *bh, int rw)
1052 - int minor = MINOR(bh->b_dev);
1053 + int minor = MINOR(bh->b_rdev);
1056 ulong size = bh->b_size >> 9;
1057 @@ -1124,7 +1206,7 @@
1058 lv_t *lv = vg_this->lv[LV_BLK(minor)];
1061 - down(&lv->lv_snapshot_sem);
1062 + down_read(&lv->lv_lock);
1063 if (!(lv->lv_status & LV_ACTIVE)) {
1065 "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1066 @@ -1142,7 +1224,7 @@
1068 P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
1070 - kdevname(bh->b_dev),
1071 + kdevname(bh->b_rdev),
1074 if (rsector_org + size > lv->lv_size) {
1075 @@ -1153,7 +1235,7 @@
1081 if (lv->lv_stripes < 2) { /* linear mapping */
1083 index = rsector_org / vg_this->pe_size;
1084 @@ -1190,36 +1272,33 @@
1085 rsector_map, stripe_length, stripe_index);
1088 - /* handle physical extents on the move */
1089 - if (pe_lock_req.lock == LOCK_PE) {
1090 - if (rdev_map == pe_lock_req.data.pv_dev &&
1091 - rsector_map >= pe_lock_req.data.pv_offset &&
1092 - rsector_map < (pe_lock_req.data.pv_offset +
1093 - vg_this->pe_size)) {
1094 - sleep_on(&lvm_map_wait);
1097 + * Queue writes to physical extents on the move until move completes.
1098 + * Don't get _pe_lock until there is a reasonable expectation that
1099 + * we need to queue this request, because this is in the fast path.
1101 + if (rw == WRITE || rw == WRITEA) {
1102 + if(_defer_extent(bh, rw, rdev_map,
1103 + rsector_map, vg_this->pe_size)) {
1105 + up_read(&lv->lv_lock);
1111 - if (rw == WRITE || rw == WRITEA)
1112 - lv->lv_current_pe[index].writes++;
1114 - lv->lv_current_pe[index].reads++;
1115 + lv->lv_current_pe[index].writes++; /* statistic */
1117 + lv->lv_current_pe[index].reads++; /* statistic */
1119 - /* snapshot volume exception handling on physical device
1121 + /* snapshot volume exception handling on physical device address base */
1122 if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
1125 if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
1126 - if (lv->lv_block_exception)
1127 - lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1130 + if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1131 + pe_start, lv) < 0)
1134 - } else if(rw == WRITE || rw == WRITEA) { /* snapshot origin */
1135 + } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
1138 /* start with first snapshot and loop through all of
1139 @@ -1232,22 +1311,20 @@
1141 /* Serializes the COW with the accesses to the
1143 - down(&snap->lv_snapshot_sem);
1144 - __remap_snapshot(rdev_map, rsector_map,
1145 + _remap_snapshot(rdev_map, rsector_map,
1146 pe_start, snap, vg_this);
1147 - up(&snap->lv_snapshot_sem);
1152 bh->b_rdev = rdev_map;
1153 bh->b_rsector = rsector_map;
1154 - up(&lv->lv_snapshot_sem);
1155 + up_read(&lv->lv_lock);
1159 buffer_IO_error(bh);
1160 - up(&lv->lv_snapshot_sem);
1161 + up_read(&lv->lv_lock);
1165 @@ -1281,15 +1358,10 @@
1167 * make request function
1169 -static int lvm_make_request_fn(request_queue_t *q,
1171 - struct buffer_head *bh)
1173 - if (lvm_map(bh, rw) >= 0)
1176 - buffer_IO_error(bh);
1178 +static int lvm_make_request_fn(request_queue_t *q,
1180 + struct buffer_head *bh) {
1181 + return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1185 @@ -1306,8 +1378,7 @@
1187 spin_lock(&lvm_lock);
1188 if (lock != 0 && lock != current->pid) {
1189 - P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n",
1191 + P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1192 spin_unlock(&lvm_lock);
1193 interruptible_sleep_on(&lvm_wait);
1194 if (current->sigpending != 0)
1195 @@ -1319,6 +1390,7 @@
1196 goto lock_try_again;
1198 lock = current->pid;
1199 + P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1200 spin_unlock(&lvm_lock);
1202 } /* lvm_do_lock_lvm */
1203 @@ -1329,33 +1401,60 @@
1205 static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
1207 + pe_lock_req_t new_lock;
1208 + struct buffer_head *bh;
1211 if (vg_ptr == NULL) return -ENXIO;
1212 - if (copy_from_user(&pe_lock_req, arg,
1213 - sizeof(pe_lock_req_t)) != 0) return -EFAULT;
1214 + if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1217 - switch (pe_lock_req.lock) {
1218 + switch (new_lock.lock) {
1220 for (p = 0; p < vg_ptr->pv_max; p++) {
1221 if (vg_ptr->pv[p] != NULL &&
1222 - pe_lock_req.data.pv_dev ==
1223 - vg_ptr->pv[p]->pv_dev)
1224 + new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1227 if (p == vg_ptr->pv_max) return -ENXIO;
1229 - pe_lock_req.lock = UNLOCK_PE;
1231 + * this sync releaves memory pressure to lessen the
1232 + * likelyhood of pvmove being paged out - resulting in
1235 + * This method of doing a pvmove is broken
1237 fsync_dev(pe_lock_req.data.lv_dev);
1239 + down_write(&_pe_lock);
1240 + if (pe_lock_req.lock == LOCK_PE) {
1241 + up_write(&_pe_lock);
1245 + /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1246 pe_lock_req.lock = LOCK_PE;
1247 + pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1248 + pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1249 + pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1250 + up_write(&_pe_lock);
1252 + /* some requests may have got through since the fsync */
1253 + fsync_dev(pe_lock_req.data.pv_dev);
1257 + down_write(&_pe_lock);
1258 pe_lock_req.lock = UNLOCK_PE;
1259 - pe_lock_req.data.lv_dev = \
1260 - pe_lock_req.data.pv_dev = \
1261 + pe_lock_req.data.lv_dev = 0;
1262 + pe_lock_req.data.pv_dev = 0;
1263 pe_lock_req.data.pv_offset = 0;
1264 - wake_up(&lvm_map_wait);
1265 + bh = _dequeue_io();
1266 + up_write(&_pe_lock);
1268 + /* handle all deferred io for this PE */
1273 @@ -1423,6 +1522,8 @@
1275 /* get the volume group structure */
1276 if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1277 + P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1278 + arg, sizeof(vg_t));
1282 @@ -1432,8 +1533,9 @@
1285 if (vg[VG_CHR(minor)] != NULL) {
1288 + P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1293 /* we are not that active so far... */
1294 @@ -1464,6 +1566,7 @@
1295 /* get the physical volume structures */
1296 vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1297 for (p = 0; p < vg_ptr->pv_max; p++) {
1299 /* user space address */
1300 if ((pvp = vg_ptr->pv[p]) != NULL) {
1301 ret = lvm_do_pv_create(pvp, vg_ptr, p);
1302 @@ -1487,9 +1590,12 @@
1303 /* get the logical volume structures */
1305 for (l = 0; l < vg_ptr->lv_max; l++) {
1307 /* user space address */
1308 if ((lvp = vg_ptr->lv[l]) != NULL) {
1309 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1310 + P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
1311 + lvp, sizeof(lv_t));
1312 lvm_do_vg_remove(minor);
1315 @@ -1511,7 +1617,7 @@
1316 /* Second path to correct snapshot logical volumes which are not
1317 in place during first path above */
1318 for (l = 0; l < ls; l++) {
1319 - lvp = snap_lv_ptr[l];
1320 + lv_t *lvp = snap_lv_ptr[l];
1321 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1322 lvm_do_vg_remove(minor);
1324 @@ -1703,27 +1809,41 @@
1325 * character device support function physical volume create
1327 static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
1328 - pv_t *pv_ptr = NULL;
1332 - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL);
1333 - if (pv_ptr == NULL) {
1334 + pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
1337 "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1338 lvm_name, __LINE__);
1341 - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) {
1343 + memset(pv, 0, sizeof(*pv));
1345 + if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1346 + P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1347 + pvp, sizeof(pv_t));
1352 + if ((err = _open_pv(pv))) {
1357 /* We don't need the PE list
1358 in kernel space as with LVs pe_t list (see below) */
1359 - pv_ptr->pe = NULL;
1360 - pv_ptr->pe_allocated = 0;
1361 - pv_ptr->pv_status = PV_ACTIVE;
1363 + pv->pe_allocated = 0;
1364 + pv->pv_status = PV_ACTIVE;
1367 - lvm_fs_create_pv(vg_ptr, pv_ptr);
1368 + lvm_fs_create_pv(vg_ptr, pv);
1370 + vg_ptr->pv[p] = pv;
1372 } /* lvm_do_pv_create() */
1374 @@ -1732,47 +1852,73 @@
1375 * character device support function physical volume remove
1377 static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
1378 - pv_t *pv_ptr = vg_ptr->pv[p];
1379 + pv_t *pv = vg_ptr->pv[p];
1381 - lvm_fs_remove_pv(vg_ptr, pv_ptr);
1382 + lvm_fs_remove_pv(vg_ptr, pv);
1384 - vg_ptr->pe_total -= pv_ptr->pe_total;
1385 + vg_ptr->pe_total -= pv->pe_total;
1388 -#ifdef LVM_GET_INODE
1389 - lvm_clear_inode(pv_ptr->inode);
1396 vg_ptr->pv[p] = NULL;
1402 -static void __update_hardsectsize(lv_t *lv) {
1404 - int max_hardsectsize = 0, hardsectsize;
1406 - for (le = 0; le < lv->lv_allocated_le; le++) {
1407 - hardsectsize = get_hardsect_size(lv->lv_current_pe[le].dev);
1408 - if (hardsectsize == 0)
1409 - hardsectsize = 512;
1410 - if (hardsectsize > max_hardsectsize)
1411 - max_hardsectsize = hardsectsize;
1414 - if (lv->lv_access & LV_SNAPSHOT) {
1415 - for (e = 0; e < lv->lv_remap_end; e++) {
1417 - get_hardsect_size(
1418 - lv->lv_block_exception[e].rdev_new);
1419 - if (hardsectsize == 0)
1420 - hardsectsize = 512;
1421 - if (hardsectsize > max_hardsectsize)
1422 +static void __update_hardsectsize(lv_t *lv)
1424 + int max_hardsectsize = 0, hardsectsize = 0;
1427 + /* Check PVs first to see if they all have same sector size */
1428 + for (p = 0; p < lv->vg->pv_cur; p++) {
1429 + pv_t *pv = lv->vg->pv[p];
1430 + if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1431 + if (max_hardsectsize == 0)
1432 max_hardsectsize = hardsectsize;
1433 + else if (hardsectsize != max_hardsectsize) {
1434 + P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
1435 + lv->lv_name, p, kdevname(pv->pv_dev),
1436 + hardsectsize, max_hardsectsize);
1442 + /* PVs have different block size, need to check each LE sector size */
1443 + if (hardsectsize != max_hardsectsize) {
1445 + for (le = 0; le < lv->lv_allocated_le; le++) {
1446 + hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
1447 + if (hardsectsize > max_hardsectsize) {
1448 + P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
1450 + kdevname(lv->lv_current_pe[le].dev),
1451 + hardsectsize, max_hardsectsize);
1452 + max_hardsectsize = hardsectsize;
1456 + /* only perform this operation on active snapshots */
1457 + if ((lv->lv_access & LV_SNAPSHOT) &&
1458 + (lv->lv_status & LV_ACTIVE)) {
1460 + for (e = 0; e < lv->lv_remap_end; e++) {
1461 + hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
1462 + if (hardsectsize > max_hardsectsize)
1463 + max_hardsectsize = hardsectsize;
1468 + if (max_hardsectsize == 0)
1469 + max_hardsectsize = SECTOR_SIZE;
1470 + P_DEV("hardblocksize for LV %s is %d\n",
1471 + kdevname(lv->lv_dev), max_hardsectsize);
1472 lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
1475 @@ -1786,9 +1932,12 @@
1476 lv_block_exception_t *lvbe = lv->lv_block_exception;
1477 vg_t *vg_ptr = vg[VG_CHR(minor)];
1478 lv_t *lv_ptr = NULL;
1481 - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
1482 - if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK)
1483 + if (!(pep = lv->lv_current_pe))
1486 + if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
1489 for (l = 0; l < vg_ptr->lv_cur; l++) {
1490 @@ -1820,8 +1969,8 @@
1492 lv_status_save = lv_ptr->lv_status;
1493 lv_ptr->lv_status &= ~LV_ACTIVE;
1494 - lv_ptr->lv_snapshot_org = \
1495 - lv_ptr->lv_snapshot_prev = \
1496 + lv_ptr->lv_snapshot_org = NULL;
1497 + lv_ptr->lv_snapshot_prev = NULL;
1498 lv_ptr->lv_snapshot_next = NULL;
1499 lv_ptr->lv_block_exception = NULL;
1500 lv_ptr->lv_iobuf = NULL;
1501 @@ -1829,7 +1978,8 @@
1502 lv_ptr->lv_snapshot_hash_table = NULL;
1503 lv_ptr->lv_snapshot_hash_table_size = 0;
1504 lv_ptr->lv_snapshot_hash_mask = 0;
1505 - init_MUTEX(&lv_ptr->lv_snapshot_sem);
1506 + init_rwsem(&lv_ptr->lv_lock);
1508 lv_ptr->lv_snapshot_use_rate = 0;
1510 vg_ptr->lv[l] = lv_ptr;
1511 @@ -1838,6 +1988,7 @@
1512 is not a snapshot logical volume */
1513 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
1514 size = lv_ptr->lv_allocated_le * sizeof(pe_t);
1516 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
1518 "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
1519 @@ -1849,6 +2000,8 @@
1522 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
1523 + P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
1524 + pep, sizeof(size));
1525 vfree(lv_ptr->lv_current_pe);
1527 vg_ptr->lv[l] = NULL;
1528 @@ -1870,6 +2023,15 @@
1529 vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
1530 if (lv_ptr->lv_snapshot_org != NULL) {
1531 size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
1534 + printk(KERN_WARNING
1535 + "%s -- zero length exception table requested\n",
1541 if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
1543 "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
1544 @@ -1957,6 +2119,7 @@
1545 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
1547 lv_ptr->lv_status = lv_status_save;
1548 + lv_ptr->vg = vg_ptr;
1550 __update_hardsectsize(lv_ptr);
1552 @@ -1971,7 +2134,7 @@
1553 fsync_dev_lockfs(org->lv_dev);
1556 - down(&org->lv_snapshot_sem);
1557 + down_write(&org->lv_lock);
1558 org->lv_access |= LV_SNAPSHOT_ORG;
1559 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
1561 @@ -1980,7 +2143,7 @@
1562 for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
1563 lv_ptr->lv_snapshot_prev = last;
1564 last->lv_snapshot_next = lv_ptr;
1565 - up(&org->lv_snapshot_sem);
1566 + up_write(&org->lv_lock);
1569 /* activate the logical volume */
1570 @@ -1996,14 +2159,12 @@
1572 #ifdef LVM_VFS_ENHANCEMENT
1573 /* VFS function call to unlock the filesystem */
1574 - if (lv_ptr->lv_access & LV_SNAPSHOT) {
1575 + if (lv_ptr->lv_access & LV_SNAPSHOT)
1576 unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
1580 - lv_ptr->vg = vg_ptr;
1582 - lvm_fs_create_lv(vg_ptr, lv_ptr);
1583 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1584 + lvm_fs_create_lv(vg_ptr, lv_ptr);
1586 } /* lvm_do_lv_create() */
1588 @@ -2049,7 +2210,7 @@
1589 * to the original lv before playing with it.
1591 lv_t * org = lv_ptr->lv_snapshot_org;
1592 - down(&org->lv_snapshot_sem);
1593 + down_write(&org->lv_lock);
1595 /* remove this snapshot logical volume from the chain */
1596 lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
1597 @@ -2062,7 +2223,7 @@
1598 if (!org->lv_snapshot_next) {
1599 org->lv_access &= ~LV_SNAPSHOT_ORG;
1601 - up(&org->lv_snapshot_sem);
1602 + up_write(&org->lv_lock);
1604 lvm_snapshot_release(lv_ptr);
1606 @@ -2083,6 +2244,7 @@
1607 /* reset generic hd */
1608 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
1609 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
1610 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
1611 lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
1613 /* reset VG/LV mapping */
1614 @@ -2214,8 +2376,7 @@
1615 new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
1616 end = min(old_stripe_size, new_stripe_size);
1618 - for (i = source = dest = 0;
1619 - i < new_lv->lv_stripes; i++) {
1620 + for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
1621 for (j = 0; j < end; j++) {
1622 new_lv->lv_current_pe[dest + j].reads +=
1623 old_lv->lv_current_pe[source + j].reads;
1624 @@ -2250,23 +2411,27 @@
1626 old_lv = vg_ptr->lv[l];
1628 - if (old_lv->lv_access & LV_SNAPSHOT)
1629 - r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
1631 + if (old_lv->lv_access & LV_SNAPSHOT) {
1632 + /* only perform this operation on active snapshots */
1633 + if (old_lv->lv_status & LV_ACTIVE)
1634 + r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
1639 r = __extend_reduce(vg_ptr, old_lv, new_lv);
1644 /* copy relevent fields */
1645 - down(&old_lv->lv_snapshot_sem);
1646 + down_write(&old_lv->lv_lock);
1648 if(new_lv->lv_access & LV_SNAPSHOT) {
1650 size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
1651 old_lv->lv_remap_ptr : new_lv->lv_remap_end;
1652 size *= sizeof(lv_block_exception_t);
1653 - memcpy(new_lv->lv_block_exception,
1654 + memcpy(new_lv->lv_block_exception,
1655 old_lv->lv_block_exception, size);
1657 old_lv->lv_remap_end = new_lv->lv_remap_end;
1658 @@ -2281,7 +2446,7 @@
1659 for (e = 0; e < new_lv->lv_remap_ptr; e++)
1660 lvm_hash_link(new_lv->lv_block_exception + e,
1661 new_lv->lv_block_exception[e].rdev_org,
1662 - new_lv->lv_block_exception[e].rsector_org,
1663 + new_lv->lv_block_exception[e].rsector_org,
1667 @@ -2301,7 +2466,7 @@
1669 for(snap = old_lv->lv_snapshot_next; snap;
1670 snap = snap->lv_snapshot_next) {
1671 - down(&snap->lv_snapshot_sem);
1672 + down_write(&snap->lv_lock);
1673 snap->lv_current_pe = old_lv->lv_current_pe;
1674 snap->lv_allocated_le =
1675 old_lv->lv_allocated_le;
1676 @@ -2313,13 +2478,13 @@
1677 lvm_size[MINOR(snap->lv_dev)] =
1678 old_lv->lv_size >> 1;
1679 __update_hardsectsize(snap);
1680 - up(&snap->lv_snapshot_sem);
1681 + up_write(&snap->lv_lock);
1686 __update_hardsectsize(old_lv);
1687 - up(&old_lv->lv_snapshot_sem);
1688 + up_write(&old_lv->lv_lock);
1691 } /* lvm_do_lv_extend_reduce() */
1692 @@ -2348,8 +2513,10 @@
1693 strcmp(lv_ptr->lv_name,
1694 lv_status_byname_req.lv_name) == 0) {
1695 /* Save usermode pointers */
1696 - saved_ptr1 = lv_status_byname_req.lv->lv_current_pe;
1697 - saved_ptr2 = lv_status_byname_req.lv->lv_block_exception;
1698 + if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
1700 + if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
1702 if (copy_to_user(lv_status_byname_req.lv,
1705 @@ -2362,7 +2529,8 @@
1708 /* Restore usermode pointers */
1709 - lv_status_byname_req.lv->lv_current_pe = saved_ptr1;
1710 + if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
1715 @@ -2391,8 +2559,11 @@
1718 /* Save usermode pointers */
1719 - saved_ptr1 = lv_status_byindex_req.lv->lv_current_pe;
1720 - saved_ptr2 = lv_status_byindex_req.lv->lv_block_exception;
1721 + if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
1723 + if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
1726 if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
1728 if (saved_ptr1 != NULL) {
1729 @@ -2404,7 +2575,8 @@
1732 /* Restore usermode pointers */
1733 - lv_status_byindex_req.lv->lv_current_pe = saved_ptr1;
1734 + if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
1738 } /* lvm_do_lv_status_byindex() */
1739 @@ -2434,8 +2606,10 @@
1740 lv_ptr = vg_ptr->lv[l];
1742 /* Save usermode pointers */
1743 - saved_ptr1 = lv_status_bydev_req.lv->lv_current_pe;
1744 - saved_ptr2 = lv_status_bydev_req.lv->lv_block_exception;
1745 + if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
1747 + if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
1750 if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
1752 @@ -2447,7 +2621,8 @@
1755 /* Restore usermode pointers */
1756 - lv_status_bydev_req.lv->lv_current_pe = saved_ptr1;
1757 + if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
1761 } /* lvm_do_lv_status_bydev() */
1762 @@ -2468,9 +2643,7 @@
1763 if (lv_ptr->lv_dev == lv->lv_dev)
1765 lvm_fs_remove_lv(vg_ptr, lv_ptr);
1766 - strncpy(lv_ptr->lv_name,
1769 + strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
1770 lvm_fs_create_lv(vg_ptr, lv_ptr);
1773 @@ -2488,9 +2661,7 @@
1777 -#ifdef LVM_GET_INODE
1778 - struct inode *inode_sav;
1780 + struct block_device *bd;
1782 if (vg_ptr == NULL) return -ENXIO;
1783 if (copy_from_user(&pv_change_req, arg,
1784 @@ -2502,20 +2673,17 @@
1785 if (pv_ptr != NULL &&
1786 strcmp(pv_ptr->pv_name,
1787 pv_change_req.pv_name) == 0) {
1788 -#ifdef LVM_GET_INODE
1789 - inode_sav = pv_ptr->inode;
1793 if (copy_from_user(pv_ptr,
1799 /* We don't need the PE list
1800 in kernel space as with LVs pe_t list */
1802 -#ifdef LVM_GET_INODE
1803 - pv_ptr->inode = inode_sav;
1808 @@ -2558,8 +2726,7 @@
1810 pv_flush_req_t pv_flush_req;
1812 - if (copy_from_user(&pv_flush_req, arg,
1813 - sizeof(pv_flush_req)) != 0)
1814 + if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
1817 fsync_dev(pv_flush_req.pv_dev);
1818 @@ -2594,5 +2761,82 @@
1819 } /* lvm_gen_init() */
1823 +/* Must have down_write(_pe_lock) when we enqueue buffers */
1824 +static void _queue_io(struct buffer_head *bh, int rw) {
1825 + if (bh->b_reqnext) BUG();
1826 + bh->b_reqnext = _pe_requests;
1827 + _pe_requests = bh;
1830 +/* Must have down_write(_pe_lock) when we dequeue buffers */
1831 +static struct buffer_head *_dequeue_io(void)
1833 + struct buffer_head *bh = _pe_requests;
1834 + _pe_requests = NULL;
1839 + * We do not need to hold _pe_lock to flush buffers. bh should be taken from
1840 + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
1841 + * NULL and we drop _pe_lock. Any new buffers defered at this time will be
1842 + * added to a new list, and the old buffers can have their I/O restarted
1845 + * If, for some reason, the same PE is locked again before all of these writes
1846 + * have finished, then these buffers will just be re-queued (i.e. no danger).
1848 +static void _flush_io(struct buffer_head *bh)
1851 + struct buffer_head *next = bh->b_reqnext;
1852 + bh->b_reqnext = NULL;
1853 + /* resubmit this buffer head */
1854 + generic_make_request(WRITE, bh);
1861 + * we must open the pv's before we use them
1863 +static int _open_pv(pv_t *pv) {
1865 + struct block_device *bd;
1867 + if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
1870 + err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
1880 +static void _close_pv(pv_t *pv) {
1881 + if(!pv || !pv->bd)
1884 + blkdev_put(pv->bd, BDEV_FILE);
1890 +static unsigned long _sectors_to_k(unsigned long sect)
1892 + if(SECTOR_SIZE > 1024) {
1893 + return sect * (SECTOR_SIZE / 1024);
1896 + return sect / (1024 / SECTOR_SIZE);
1899 module_init(lvm_init);
1900 module_exit(lvm_cleanup);
1901 --- linux/drivers/md/lvm-internal.h.orig Sat Oct 20 20:50:29 2001
1902 +++ linux/drivers/md/lvm-internal.h Sat Oct 20 20:36:55 2001
1906 * kernel/lvm_internal.h
1910 /* global variables, defined in lvm.c */
1911 extern char *lvm_version;
1912 -extern char *lvm_short_version;
1913 extern ushort lvm_iop_version;
1914 extern int loadtime;
1915 extern const char *const lvm_name;
1918 +extern uint vg_count;
1920 extern struct file_operations lvm_chr_fops;
1922 extern struct block_device_operations lvm_blk_dops;
1924 +#define lvm_sectsize(dev) get_hardsect_size(dev)
1926 +/* 2.4.8 had no global min/max macros, and 2.4.9's were flawed */
1929 +#define min(x,y) ({ \
1930 + const typeof(x) _x = (x); \
1931 + const typeof(y) _y = (y); \
1932 + (void) (&_x == &_y); \
1933 + _x < _y ? _x : _y; })
1936 +#define max(x,y) ({ \
1937 + const typeof(x) _x = (x); \
1938 + const typeof(y) _y = (y); \
1939 + (void) (&_x == &_y); \
1940 + _x > _y ? _x : _y; })
1947 void lvm_fs_create_vg(vg_t *vg_ptr);
1948 void lvm_fs_remove_vg(vg_t *vg_ptr);
1949 -void lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
1950 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
1951 void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv);
1952 void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv);
1953 void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv);
1954 --- linux/drivers/md/lvm-snap.c.orig Sat Oct 20 20:50:29 2001
1955 +++ linux/drivers/md/lvm-snap.c Sat Oct 20 20:36:55 2001
1957 * o pv number is returned in new uint * arg
1958 * o -1 returned on error
1959 * lvm_snapshot_fill_COW_table has a return value too.
1960 + * 15/10/2001 - fix snapshot alignment problem [CM]
1961 + * - fix snapshot full oops (always check lv_block_exception) [CM]
1966 #include <linux/types.h>
1967 #include <linux/iobuf.h>
1968 #include <linux/lvm.h>
1969 +#include <linux/devfs_fs_kernel.h>
1972 #include "lvm-internal.h"
1973 @@ -140,10 +143,20 @@
1974 unsigned long mask = lv->lv_snapshot_hash_mask;
1975 int chunk_size = lv->lv_chunk_size;
1979 hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
1980 list_add(&exception->hash, hash_table);
1984 + * Determine if we already have a snapshot chunk for this block.
1985 + * Return: 1 if it the chunk already exists
1986 + * 0 if we need to COW this block and allocate a new chunk
1987 + * -1 if the snapshot was disabled because it ran out of space
1989 + * We need to be holding at least a read lock on lv->lv_lock.
1991 int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
1992 unsigned long pe_start, lv_t * lv)
1995 int chunk_size = lv->lv_chunk_size;
1996 lv_block_exception_t * exception;
1998 + if (!lv->lv_block_exception)
2001 pe_off = pe_start % chunk_size;
2002 pe_adjustment = (*org_sector-pe_off) % chunk_size;
2003 __org_start = *org_sector - pe_adjustment;
2004 @@ -196,19 +212,25 @@
2008 -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
2009 +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks,
2010 unsigned long start,
2014 int i, sectors_per_block, nr_blocks;
2016 - sectors_per_block = blocksize >> 9;
2017 + sectors_per_block = blocksize / SECTOR_SIZE;
2019 + if(start & (sectors_per_block - 1))
2022 nr_blocks = nr_sectors / sectors_per_block;
2023 start /= sectors_per_block;
2025 for (i = 0; i < nr_blocks; i++)
2026 blocks[i] = start++;
2031 inline int lvm_get_blksize(kdev_t dev)
2035 * writes a COW exception table sector to disk (HM)
2037 + * We need to hold a write lock on lv_snap->lv_lock.
2039 int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
2041 @@ -309,6 +333,10 @@
2042 * if there is no exception storage space free any longer --> release snapshot.
2044 * this routine gets called for each _first_ write to a physical chunk.
2046 + * We need to hold a write lock on lv_snap->lv_lock. It is assumed that
2047 + * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block())
2048 + * when this function is called.
2050 int lvm_snapshot_COW(kdev_t org_phys_dev,
2051 unsigned long org_phys_sector,
2054 const char * reason;
2055 unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
2056 + unsigned long phys_start;
2057 int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
2058 struct kiobuf * iobuf;
2059 unsigned long blocks[KIO_MAX_SECTORS];
2062 iobuf = lv_snap->lv_iobuf;
2064 - blksize_org = lvm_get_blksize(org_phys_dev);
2065 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2066 + blksize_org = lvm_sectsize(org_phys_dev);
2067 + blksize_snap = lvm_sectsize(snap_phys_dev);
2068 max_blksize = max(blksize_org, blksize_snap);
2069 min_blksize = min(blksize_org, blksize_snap);
2070 max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
2072 if (chunk_size % (max_blksize>>9))
2075 + /* Don't change org_start, we need it to fill in the exception table */
2076 + phys_start = org_start;
2080 nr_sectors = min(chunk_size, max_sectors);
2081 @@ -368,17 +400,24 @@
2083 iobuf->length = nr_sectors << 9;
2085 - lvm_snapshot_prepare_blocks(blocks, org_start,
2086 - nr_sectors, blksize_org);
2087 + if (!lvm_snapshot_prepare_blocks(blocks, phys_start,
2088 + nr_sectors, blksize_org))
2089 + goto fail_prepare;
2091 if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks,
2092 blksize_org, lv_snap) != (nr_sectors<<9))
2095 - lvm_snapshot_prepare_blocks(blocks, snap_start,
2096 - nr_sectors, blksize_snap);
2097 + if (!lvm_snapshot_prepare_blocks(blocks, snap_start,
2098 + nr_sectors, blksize_snap))
2099 + goto fail_prepare;
2101 if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
2102 blksize_snap, lv_snap) != (nr_sectors<<9))
2103 goto fail_raw_write;
2105 + phys_start += nr_sectors;
2106 + snap_start += nr_sectors;
2109 #ifdef DEBUG_SNAPSHOT
2110 @@ -418,6 +457,11 @@
2112 reason = "blocksize error";
2116 + reason = "couldn't prepare kiovec blocks "
2117 + "(start probably isn't block aligned)";
2121 int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
2123 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
2124 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
2126 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2127 + blksize_snap = lvm_sectsize(snap_phys_dev);
2129 COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
2130 idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
2133 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
2134 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
2135 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2136 + blksize_snap = lvm_sectsize(snap_phys_dev);
2137 blocks[0] = snap_pe_start >> (blksize_snap >> 10);
2140 --- linux/drivers/md/lvm-fs.c.orig Sat Oct 20 20:50:29 2001
2141 +++ linux/drivers/md/lvm-fs.c Sat Oct 20 20:36:55 2001
2144 * Copyright (C) 2001 Sistina Software
2146 - * January,February 2001
2147 + * January-April 2001
2149 * LVM driver is free software; you can redistribute it and/or modify
2150 * it under the terms of the GNU General Public License as published by
2154 * 11/01/2001 - First version (Joe Thornber)
2155 + * 21/03/2001 - added display of stripes and stripe size (HM)
2156 + * 04/10/2001 - corrected devfs_register() call in lvm_init_fs()
2157 + * 11/04/2001 - don't devfs_register("lvm") as user-space always does it
2158 + * 10/05/2001 - show more of PV name in /proc/lvm/global
2164 #include <linux/devfs_fs_kernel.h>
2165 #include <linux/proc_fs.h>
2166 +#include <linux/init.h>
2167 #include <linux/lvm.h>
2169 #include "lvm-internal.h"
2172 static void _show_uuid(const char *src, char *b, char *e);
2175 static devfs_handle_t lvm_devfs_handle;
2177 static devfs_handle_t vg_devfs_handle[MAX_VG];
2178 static devfs_handle_t ch_devfs_handle[MAX_VG];
2179 static devfs_handle_t lv_devfs_handle[MAX_LV];
2181 /* inline functions */
2183 /* public interface */
2184 -void lvm_init_fs() {
2185 +void __init lvm_init_fs() {
2186 struct proc_dir_entry *pde;
2188 +/* User-space has already registered this */
2190 lvm_devfs_handle = devfs_register(
2191 - 0 , "lvm", 0, 0, LVM_CHAR_MAJOR,
2192 + 0 , "lvm", 0, LVM_CHAR_MAJOR, 0,
2193 S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
2194 &lvm_chr_fops, NULL);
2197 lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root);
2199 lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR,
2205 devfs_unregister (lvm_devfs_handle);
2208 remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
2209 remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
2210 remove_proc_entry(LVM_DIR, &proc_root);
2211 @@ -137,8 +147,14 @@
2213 if(vg_ptr->vg_dir_pde) {
2214 remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
2215 + vg_ptr->lv_subdir_pde = NULL;
2217 remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
2218 + vg_ptr->pv_subdir_pde = NULL;
2220 remove_proc_entry("group", vg_ptr->vg_dir_pde);
2221 + vg_ptr->vg_dir_pde = NULL;
2223 remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
2230 -void lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
2231 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
2232 struct proc_dir_entry *pde;
2233 const char *name = _basename(lv->lv_name);
2236 pde->read_proc = _proc_read_lv;
2239 + return lv_devfs_handle[MINOR(lv->lv_dev)];
2242 void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) {
2243 @@ -256,6 +273,12 @@
2244 sz += sprintf(page + sz, "number: %u\n", lv->lv_number);
2245 sz += sprintf(page + sz, "open: %u\n", lv->lv_open);
2246 sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation);
2247 + if(lv->lv_stripes > 1) {
2248 + sz += sprintf(page + sz, "stripes: %u\n",
2250 + sz += sprintf(page + sz, "stripesize: %u\n",
2251 + lv->lv_stripesize);
2253 sz += sprintf(page + sz, "device: %02u:%02u\n",
2254 MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
2258 #ifdef DEBUG_LVM_PROC_GET_INFO
2260 - "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n",
2261 - lvm_name, pos, count, whence);
2262 + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d\n",
2263 + lvm_name, pos, count);
2266 if(pos != 0 && buf != NULL)
2270 "Total: %d VG%s %d PV%s %d LV%s ",
2271 - lvm_short_version,
2273 vg_counter, vg_counter == 1 ? "" : "s",
2274 pv_counter, pv_counter == 1 ? "" : "s",
2275 lv_counter, lv_counter == 1 ? "" : "s");
2277 allocation_flag = 'A';
2278 if (!(pv->pv_allocatable & PV_ALLOCATABLE))
2279 allocation_flag = 'N';
2280 - pv_name = strrchr(pv->pv_name+1,'/');
2281 + pv_name = strchr(pv->pv_name+1,'/');
2282 if ( pv_name == 0) pv_name = pv->pv_name;
2285 diff -ruN -X /home/joe/packages/dontdiff linux_2.4.1/drivers/md/lvm-snap.h linux/drivers/md/lvm-snap.h
2286 --- linux_2.4.1/drivers/md/lvm-snap.h Fri Feb 16 14:51:26 2001
2287 +++ linux/drivers/md/lvm-snap.h Thu Jan 1 01:00:00 1970
2290 - * kernel/lvm-snap.h
2292 - * Copyright (C) 2001 Sistina Software
2295 - * LVM driver is free software; you can redistribute it and/or modify
2296 - * it under the terms of the GNU General Public License as published by
2297 - * the Free Software Foundation; either version 2, or (at your option)
2298 - * any later version.
2300 - * LVM driver is distributed in the hope that it will be useful,
2301 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
2302 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2303 - * GNU General Public License for more details.
2305 - * You should have received a copy of the GNU General Public License
2306 - * along with GNU CC; see the file COPYING. If not, write to
2307 - * the Free Software Foundation, 59 Temple Place - Suite 330,
2308 - * Boston, MA 02111-1307, USA.
2315 - * 05/01/2001:Joe Thornber - Factored this file out of lvm.c
2322 -/* external snapshot calls */
2323 -extern inline int lvm_get_blksize(kdev_t);
2324 -extern int lvm_snapshot_alloc(lv_t *);
2325 -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
2326 -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
2327 -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
2328 -extern void lvm_snapshot_release(lv_t *);
2329 -extern int lvm_write_COW_table_block(vg_t *, lv_t *);
2330 -extern inline void lvm_hash_link(lv_block_exception_t *,
2331 - kdev_t, ulong, lv_t *);
2332 -extern int lvm_snapshot_alloc_hash_table(lv_t *);
2333 -extern void lvm_drop_snapshot(lv_t *, const char *);