1 --- linux/include/linux/lvm.h.orig Tue Oct 23 13:55:20 2001
2 +++ linux/include/linux/lvm.h Tue Oct 23 13:55:51 2001
5 * January-March,July,September,October,Dezember 1999
6 * January,February,July,November 2000
8 + * January-March,June,July 2001
10 * lvm is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
13 * 01/03/2001 - Revert to IOP10 and add VG_CREATE_OLD call for compatibility
14 * 08/03/2001 - new lv_t (in core) version number 5: changed page member
15 * to (struct kiobuf *) to use for COW exception table io
16 + * 26/03/2001 - changed lv_v4 to lv_v5 in structure definition (HM)
17 + * 21/06/2001 - changed BLOCK_SIZE back to 1024 for non S/390
18 + * 22/06/2001 - added Andreas Dilger's PE on 4k boundary alignment enhancements
19 + * 19/07/2001 - added rwsem compatibility macros for 2.2 kernels
24 #ifndef _LVM_H_INCLUDE
25 #define _LVM_H_INCLUDE
27 -#define LVM_RELEASE_NAME "0.9.1_beta6"
28 -#define LVM_RELEASE_DATE "12/03/2001"
29 +#define LVM_RELEASE_NAME "1.0.1-rc4"
30 +#define LVM_RELEASE_DATE "03/10/2001"
32 #define _LVM_KERNEL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
38 -#endif /* #ifdef __KERNEL__ */
42 #include <linux/kdev_t.h>
43 #include <linux/list.h>
47 #include <linux/kdev_t.h>
48 #include <linux/list.h>
50 #endif /* #ifndef __KERNEL__ */
52 #include <asm/types.h>
54 #include <asm/semaphore.h>
55 #endif /* #ifdef __KERNEL__ */
60 #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
62 #ifdef CONFIG_ARCH_S390
63 #define BLOCK_SIZE 4096
65 -#define BLOCK_SIZE 512
66 +#define BLOCK_SIZE 1024
70 #define SECTOR_SIZE 512
73 -#define LVM_STRUCT_VERSION 1 /* structure version */
74 +/* structure version */
75 +#define LVM_STRUCT_VERSION 1
77 #define LVM_DIR_PREFIX "/dev/"
79 -/* set the default structure version */
80 -#if ( LVM_STRUCT_VERSION == 1)
84 -#define pv_disk_t pv_disk_v2_t
85 -#define lv_disk_t lv_disk_v3_t
86 -#define vg_disk_t vg_disk_v2_t
87 -#define lv_block_exception_t lv_block_exception_v1_t
88 -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t
94 * i/o protocol version
99 /* DONT TOUCH THESE !!! */
100 -/* base of PV structure in disk partition */
101 -#define LVM_PV_DISK_BASE 0L
103 -/* size reserved for PV structure on disk */
104 -#define LVM_PV_DISK_SIZE 1024L
106 -/* base of VG structure in disk partition */
107 -#define LVM_VG_DISK_BASE LVM_PV_DISK_SIZE
109 -/* size reserved for VG structure */
110 -#define LVM_VG_DISK_SIZE ( 9 * 512L)
112 -/* size reserved for timekeeping */
113 -#define LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE + LVM_VG_DISK_SIZE)
114 -#define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */
116 -/* name list of physical volumes on disk */
117 -#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
118 - LVM_TIMESTAMP_DISK_SIZE)
120 -/* now for the dynamically calculated parts of the VGDA */
121 -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \
122 - sizeof ( lv_disk_t) * b)
123 -#define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \
124 - (pv)->pe_on_disk.size)
125 -#define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \
126 - ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
127 -#define LVM_PE_ON_DISK_BASE(pv) \
129 - pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
130 - if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
131 - pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
133 -/* END default disk spaces and offsets for PVs */
140 #define LVM_MAX_STRIPES 128 /* max # of stripes */
141 #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */
142 #define LVM_MAX_MIRRORS 2 /* future use */
143 -#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */
144 -#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */
145 -#define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */
146 +#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */
147 +#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */
148 #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */
149 #define LVM_PARTITION 0xfe /* LVM partition id */
150 #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */
151 @@ -312,25 +272,12 @@
152 #define LVM_SNAPSHOT_MIN_CHUNK (PAGE_SIZE/1024) /* 4 or 8 KB */
159 -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \
160 - vg->pe_size / lv->lv_chunk_size)
162 -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \
164 - int COW_table_entries_per_PE; \
165 - int COW_table_chunks_per_PE; \
167 - COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \
168 - COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \
169 - COW_table_entries_per_PE - COW_table_chunks_per_PE;})
174 + * FIXME: the last parameter to _IO{W,R,WR} is a data type. The macro will
175 + * expand this using sizeof(), so putting "1" there is misleading
176 + * because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine!
179 #define VG_CREATE_OLD _IOW ( 0xfe, 0x00, 1)
183 /* lock the logical volume manager */
184 +#if LVM_DRIVER_IOP_VERSION > 11
185 +#define LVM_LOCK_LVM _IO ( 0xfe, 0x9A)
187 +/* This is actually the same as _IO ( 0xff, 0x00), oops. Remove for IOP 12+ */
188 #define LVM_LOCK_LVM _IO ( 0xfe, 0x100)
193 @@ -445,21 +397,21 @@
194 #define UUID_LEN 32 /* don't change!!! */
196 /* copy on write tables in disk format */
198 +typedef struct lv_COW_table_disk_v1 {
199 uint64_t pv_org_number;
200 uint64_t pv_org_rsector;
201 uint64_t pv_snap_number;
202 uint64_t pv_snap_rsector;
203 -} lv_COW_table_disk_v1_t;
204 +} lv_COW_table_disk_t;
206 /* remap physical sector/rdev pairs including hash */
208 +typedef struct lv_block_exception_v1 {
209 struct list_head hash;
210 uint32_t rsector_org;
212 uint32_t rsector_new;
214 -} lv_block_exception_v1_t;
215 +} lv_block_exception_t;
217 /* disk stored pe information */
219 @@ -475,37 +427,11 @@
223 - * Structure Physical Volume (PV) Version 1
224 + * physical volume structures
229 - char id[2]; /* Identifier */
230 - unsigned short version; /* HM lvm version */
231 - lvm_disk_data_t pv_on_disk;
232 - lvm_disk_data_t vg_on_disk;
233 - lvm_disk_data_t pv_namelist_on_disk;
234 - lvm_disk_data_t lv_on_disk;
235 - lvm_disk_data_t pe_on_disk;
236 - char pv_name[NAME_LEN];
237 - char vg_name[NAME_LEN];
238 - char system_id[NAME_LEN]; /* for vgexport/vgimport */
242 - uint pv_allocatable;
243 - uint pv_size; /* HM */
248 - uint pe_stale; /* for future use */
249 - pe_disk_t *pe; /* HM */
250 - struct inode *inode; /* HM */
255 +typedef struct pv_v2 {
256 char id[2]; /* Identifier */
257 unsigned short version; /* HM lvm version */
258 lvm_disk_data_t pv_on_disk;
259 @@ -527,36 +453,17 @@
261 uint pe_stale; /* for future use */
262 pe_disk_t *pe; /* HM */
263 - struct inode *inode; /* HM */
264 + struct block_device *bd;
265 char pv_uuid[UUID_LEN+1];
269 + uint32_t pe_start; /* in sectors */
275 - uint8_t id[2]; /* Identifier */
276 - uint16_t version; /* HM lvm version */
277 - lvm_disk_data_t pv_on_disk;
278 - lvm_disk_data_t vg_on_disk;
279 - lvm_disk_data_t pv_namelist_on_disk;
280 - lvm_disk_data_t lv_on_disk;
281 - lvm_disk_data_t pe_on_disk;
282 - uint8_t pv_name[NAME_LEN];
283 - uint8_t vg_name[NAME_LEN];
284 - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */
286 - uint32_t pv_number;
287 - uint32_t pv_status;
288 - uint32_t pv_allocatable;
289 - uint32_t pv_size; /* HM */
293 - uint32_t pe_allocated;
298 +typedef struct pv_disk_v2 {
299 uint8_t id[2]; /* Identifier */
300 uint16_t version; /* HM lvm version */
301 lvm_disk_data_t pv_on_disk;
305 uint32_t pe_allocated;
308 + /* new in struct version 2 */
309 + uint32_t pe_start; /* in sectors */
318 typedef struct lv_bmap {
328 -typedef struct lv_v4 {
329 +typedef struct lv_v5 {
330 char lv_name[NAME_LEN];
331 char vg_name[NAME_LEN];
336 /* delta to version 1 starts here */
337 - struct lv_v4 *lv_snapshot_org;
338 - struct lv_v4 *lv_snapshot_prev;
339 - struct lv_v4 *lv_snapshot_next;
340 + struct lv_v5 *lv_snapshot_org;
341 + struct lv_v5 *lv_snapshot_prev;
342 + struct lv_v5 *lv_snapshot_next;
343 lv_block_exception_t *lv_block_exception;
346 @@ -643,22 +554,22 @@
348 struct kiobuf *lv_iobuf;
349 struct kiobuf *lv_COW_table_iobuf;
350 - struct semaphore lv_snapshot_sem;
351 + struct rw_semaphore lv_lock;
352 struct list_head *lv_snapshot_hash_table;
353 uint32_t lv_snapshot_hash_table_size;
354 uint32_t lv_snapshot_hash_mask;
355 wait_queue_head_t lv_snapshot_wait;
356 int lv_snapshot_use_rate;
360 uint lv_allocated_snapshot_le;
369 +typedef struct lv_disk_v3 {
370 uint8_t lv_name[NAME_LEN];
371 uint8_t vg_name[NAME_LEN];
373 @@ -680,36 +591,14 @@
374 uint32_t lv_allocation;
375 uint32_t lv_io_timeout; /* for future use */
376 uint32_t lv_read_ahead; /* HM */
381 * Structure Volume Group (VG) Version 1
386 - char vg_name[NAME_LEN]; /* volume group name */
387 - uint vg_number; /* volume group number */
388 - uint vg_access; /* read/write */
389 - uint vg_status; /* active or not */
390 - uint lv_max; /* maximum logical volumes */
391 - uint lv_cur; /* current logical volumes */
392 - uint lv_open; /* open logical volumes */
393 - uint pv_max; /* maximum physical volumes */
394 - uint pv_cur; /* current physical volumes FU */
395 - uint pv_act; /* active physical volumes */
396 - uint dummy; /* was obsolete max_pe_per_pv */
397 - uint vgda; /* volume group descriptor arrays FU */
398 - uint pe_size; /* physical extent size in sectors */
399 - uint pe_total; /* total of physical extents */
400 - uint pe_allocated; /* allocated physical extents */
401 - uint pvg_total; /* physical volume groups FU */
402 - struct proc_dir_entry *proc;
403 - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */
404 - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */
408 +typedef struct vg_v3 {
409 char vg_name[NAME_LEN]; /* volume group name */
410 uint vg_number; /* volume group number */
411 uint vg_access; /* read/write */
412 @@ -737,30 +626,11 @@
422 - uint8_t vg_name[NAME_LEN]; /* volume group name */
423 - uint32_t vg_number; /* volume group number */
424 - uint32_t vg_access; /* read/write */
425 - uint32_t vg_status; /* active or not */
426 - uint32_t lv_max; /* maximum logical volumes */
427 - uint32_t lv_cur; /* current logical volumes */
428 - uint32_t lv_open; /* open logical volumes */
429 - uint32_t pv_max; /* maximum physical volumes */
430 - uint32_t pv_cur; /* current physical volumes FU */
431 - uint32_t pv_act; /* active physical volumes */
433 - uint32_t vgda; /* volume group descriptor arrays FU */
434 - uint32_t pe_size; /* physical extent size in sectors */
435 - uint32_t pe_total; /* total of physical extents */
436 - uint32_t pe_allocated; /* allocated physical extents */
437 - uint32_t pvg_total; /* physical volume groups FU */
441 +typedef struct vg_disk_v2 {
442 uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */
443 uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */
444 uint32_t vg_number; /* volume group number */
446 uint32_t pe_total; /* total of physical extents */
447 uint32_t pe_allocated; /* allocated physical extents */
448 uint32_t pvg_total; /* physical volume groups FU */
456 } lv_snapshot_use_rate_req_t;
460 +/* useful inlines */
461 +static inline ulong round_up(ulong n, ulong size) {
463 + return (n + size) & ~size;
466 +static inline ulong div_up(ulong n, ulong size) {
467 + return round_up(n, size) / size;
470 +/* FIXME: nasty capital letters */
471 +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) {
472 + return vg->pe_size / lv->lv_chunk_size;
475 +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) {
476 + ulong chunks = vg->pe_size / lv->lv_chunk_size;
477 + ulong entry_size = sizeof(lv_COW_table_disk_t);
478 + ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE;
479 + ulong entries = (vg->pe_size * SECTOR_SIZE) /
480 + (entry_size + chunk_size);
485 + for(; entries; entries--)
486 + if((div_up(entries * entry_size, chunk_size) + entries) <=
494 #endif /* #ifndef _LVM_H_INCLUDE */
496 --- linux/drivers/md/lvm.c.orig Tue Oct 23 13:55:17 2001
497 +++ linux/drivers/md/lvm.c Tue Oct 23 13:55:51 2001
499 * April-May,July-August,November 1998
500 * January-March,May,July,September,October 1999
501 * January,February,July,September-November 2000
502 - * January,February,March 2001
503 + * January-April 2001
506 * LVM driver is free software; you can redistribute it and/or modify
508 * only other update ioctls are blocked now
509 * - fixed pv->pe to NULL for pv_status
510 * - using lv_req structure in lvm_chr_ioctl() now
511 - * - fixed NULL ptr reference bug in lvm_do_lv_extendreduce()
512 + * - fixed NULL ptr reference bug in lvm_do_lv_extend_reduce()
513 * caused by uncontiguous PV array in lvm_chr_ioctl(VG_REDUCE)
514 * 09/02/1999 - changed BLKRASET and BLKRAGET in lvm_chr_ioctl() to
515 * handle lgoical volume private read ahead sector
516 @@ -194,11 +194,25 @@
517 * - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
518 * 09/03/2001 - Added _lock_open_count to ensure we only drop the lock
519 * when the locking process closes.
520 - * 05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it
521 - * destroys stacking devices. call b_end_io on failed maps.
523 - * 30/04/2001 - replace get_hardblock_size() with get_hardsect_size() for
525 + * 05/04/2001 - Defer writes to an extent that is being moved [JT]
526 + * 05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
527 + * lvm_map() in order to make stacking devices more happy (HM)
528 + * 11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
529 + * rw flag, instead WRITEA's are just dropped [JT]
530 + * 30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
531 + * than get_hardblocksize() call
532 + * 03/05/2001 - Use copy_to/from_user to preserve pointers in
533 + * lvm_do_status_by*
534 + * 11/05/2001 - avoid accesses to inactive snapshot data in
535 + * __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
536 + * 28/05/2001 - implemented missing BLKSSZGET ioctl
537 + * 05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
538 + * locked. Make buffer queue flush not need locking.
539 + * Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
540 + * 30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
541 + * the same hardsectsize (very likely) before scanning all LEs
542 + * in the LV each time. [AED]
543 + * 12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
548 #include <linux/locks.h>
551 +#include <linux/devfs_fs_kernel.h>
552 #include <linux/smp_lock.h>
553 #include <asm/ioctl.h>
554 #include <asm/segment.h>
557 #include "lvm-internal.h"
559 -#define LVM_CORRECT_READ_AHEAD(a) \
561 - if ((a) < LVM_MIN_READ_AHEAD || \
562 - (a) > LVM_MAX_READ_AHEAD) \
563 - (a) = LVM_DEFAULT_READ_AHEAD; \
564 - read_ahead[MAJOR_NR] = (a); \
566 +#define LVM_CORRECT_READ_AHEAD( a) \
567 + if ( a < LVM_MIN_READ_AHEAD || \
568 + a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD;
571 # define WRITEA WRITE
573 static void __update_hardsectsize(lv_t *lv);
576 +static void _queue_io(struct buffer_head *bh, int rw);
577 +static struct buffer_head *_dequeue_io(void);
578 +static void _flush_io(struct buffer_head *bh);
580 +static int _open_pv(pv_t *pv);
581 +static void _close_pv(pv_t *pv);
583 +static unsigned long _sectors_to_k(unsigned long sect);
586 void lvm_hd_name(char *, int);
592 -char *lvm_version = "LVM version "LVM_RELEASE_NAME" by Heinz Mauelshagen "
593 - "("LVM_RELEASE_DATE")\n";
594 -char *lvm_short_version = "version "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")";
595 +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
596 ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
598 const char *const lvm_name = LVM_NAME;
600 /* volume group descriptor area pointers */
601 vg_t *vg[ABS_MAX_VG];
603 -static pv_t *pvp = NULL;
604 -static lv_t *lvp = NULL;
605 -static pe_t *pep = NULL;
608 /* map from block minor number to VG and LV numbers */
612 /* Request structures (lvm_chr_ioctl()) */
613 static pv_change_req_t pv_change_req;
614 static pv_status_req_t pv_status_req;
615 -static pe_lock_req_t pe_lock_req;
616 +volatile static pe_lock_req_t pe_lock_req;
617 static le_remap_req_t le_remap_req;
618 static lv_req_t lv_req;
620 @@ -365,11 +378,14 @@
621 static uint vg_count = 0;
622 static long lvm_chr_open_count = 0;
623 static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
624 -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait);
626 static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
627 static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
629 +static struct buffer_head *_pe_requests;
630 +static DECLARE_RWSEM(_pe_lock);
633 struct file_operations lvm_chr_fops = {
635 release: lvm_chr_close,
637 /* block device operations structure needed for 2.3.38? and above */
638 struct block_device_operations lvm_blk_dops =
640 - open: lvm_blk_open,
641 + open: lvm_blk_open,
642 release: lvm_blk_close,
643 ioctl: lvm_blk_ioctl,
645 @@ -393,29 +409,38 @@
647 static struct gendisk lvm_gendisk =
650 - major_name: LVM_NAME,
653 - part: lvm_hd_struct,
656 + MAJOR_NR, /* major # */
657 + LVM_NAME, /* name of major */
658 + 0, /* number of times minor is shifted
659 + to get real minor */
660 + 1, /* maximum partitions per device */
661 + lvm_hd_struct, /* partition table */
662 + lvm_size, /* device size in blocks, copied
664 + MAX_LV, /* number or real devices */
665 + NULL, /* internal */
666 + NULL, /* pointer to next gendisk struct (internal) */
671 * Driver initialization...
675 - if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
676 - printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
677 + if (devfs_register_chrdev(LVM_CHAR_MAJOR,
678 + lvm_name, &lvm_chr_fops) < 0) {
679 + printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
683 - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
684 + if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
686 - printk("%s -- register_blkdev failed\n", lvm_name);
687 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
688 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
689 + printk("%s -- devfs_register_blkdev failed\n", lvm_name);
690 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
692 + "%s -- devfs_unregister_chrdev failed\n",
699 lvm_geninit(&lvm_gendisk);
701 + /* insert our gendisk at the corresponding major */
702 add_gendisk(&lvm_gendisk);
705 @@ -433,20 +459,19 @@
706 blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
709 + /* initialise the pe lock */
710 + pe_lock_req.lock = UNLOCK_PE;
712 /* optional read root VGDA */
714 if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
721 + printk(KERN_INFO "%s module loaded\n", lvm_version);
724 + printk(KERN_INFO "%s\n", lvm_version);
726 - " successfully initialized\n",
727 - lvm_version, lvm_name);
731 @@ -457,15 +482,16 @@
733 static void lvm_cleanup(void)
735 - if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
736 - printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
738 - if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) {
739 - printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
741 + if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
742 + printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
744 + if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
745 + printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
750 + /* delete our gendisk from chain */
751 del_gendisk(&lvm_gendisk);
753 blk_size[MAJOR_NR] = NULL;
755 /* unregister with procfs and devfs */
759 printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
763 } /* lvm_cleanup() */
765 lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
767 pe_lock_req.lock = UNLOCK_PE;
768 - pe_lock_req.data.lv_dev = \
769 - pe_lock_req.data.pv_dev = \
770 + pe_lock_req.data.lv_dev = 0;
771 + pe_lock_req.data.pv_dev = 0;
772 pe_lock_req.data.pv_offset = 0;
774 /* Initialize VG pointers */
777 ********************************************************************/
779 +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
780 + (mode) & FMODE_WRITE ? "WRITE" : ""
783 * character device open routine
787 int minor = MINOR(inode->i_rdev);
789 - P_DEV("%s -- lvm_chr_open MINOR: %d VG#: %d mode: 0x%X lock: %d\n",
790 - lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
791 + P_DEV("chr_open MINOR: %d VG#: %d mode: %s%s lock: %d\n",
792 + minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
794 /* super user validation */
795 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
797 /* otherwise cc will complain about unused variables */
800 - P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X MINOR: %d "
801 - "VG#: %d mode: 0x%X\n",
802 - lvm_name, command, minor, VG_CHR(minor), file->f_mode);
803 + P_IOCTL("chr MINOR: %d command: 0x%X arg: %p VG#: %d mode: %s%s\n",
804 + minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
806 #ifdef LVM_TOTAL_RESET
807 if (lvm_reset_spindown > 0) return -EACCES;
808 @@ -764,13 +794,10 @@
810 * character device close routine
812 -int lvm_chr_close(struct inode *inode, struct file *file)
813 +static int lvm_chr_close(struct inode *inode, struct file *file)
816 - int minor = MINOR(inode->i_rdev);
818 - "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor));
820 + P_DEV("chr_close MINOR: %d VG#: %d\n",
821 + MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
823 #ifdef LVM_TOTAL_RESET
824 if (lvm_reset_spindown > 0) {
826 spin_lock(&lvm_lock);
827 if(lock == current->pid) {
828 if(!_lock_open_count) {
829 + P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
831 wake_up_interruptible(&lvm_wait);
835 vg_t *vg_ptr = vg[VG_BLK(minor)];
837 - P_DEV("%s -- lvm_blk_open MINOR: %d VG#: %d LV#: %d mode: 0x%X\n",
838 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
839 + P_DEV("blk_open MINOR: %d VG#: %d LV#: %d mode: %s%s\n",
840 + minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
842 #ifdef LVM_TOTAL_RESET
843 if (lvm_reset_spindown > 0)
848 - P_DEV("%s -- OPEN OK, LV size %d\n", lvm_name, lv_ptr->lv_size);
849 + P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
853 @@ -868,22 +896,19 @@
854 void *arg = (void *) a;
855 struct hd_geometry *hd = (struct hd_geometry *) a;
857 - P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d command: 0x%X arg: %lX "
858 - "VG#: %dl LV#: %d\n",
859 - lvm_name, minor, command, (ulong) arg,
860 - VG_BLK(minor), LV_BLK(minor));
861 + P_IOCTL("blk MINOR: %d command: 0x%X arg: %p VG#: %d LV#: %d "
862 + "mode: %s%s\n", minor, command, arg, VG_BLK(minor),
863 + LV_BLK(minor), MODE_TO_STR(file->f_mode));
867 + /* get block device sector size as needed e.g. by fdisk */
868 + return put_user(lvm_sectsize(inode->i_rdev), (int *) arg);
871 /* return device size */
872 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
873 - lvm_name, lv_ptr->lv_size);
874 - if (put_user(lv_ptr->lv_size, (unsigned long *)arg))
879 - if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg))
880 + P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size);
881 + if (put_user(lv_ptr->lv_size, (long *)arg))
886 /* flush buffer cache */
887 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
889 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
890 + P_IOCTL("BLKFLSBUF\n");
892 fsync_dev(inode->i_rdev);
893 invalidate_buffers(inode->i_rdev);
894 @@ -903,20 +928,19 @@
895 /* set read ahead for block device */
896 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
898 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %ld sectors for %s\n",
899 - lvm_name, (long) arg, kdevname(inode->i_rdev));
900 + P_IOCTL("BLKRASET: %ld sectors for %s\n",
901 + (long) arg, kdevname(inode->i_rdev));
903 if ((long) arg < LVM_MIN_READ_AHEAD ||
904 (long) arg > LVM_MAX_READ_AHEAD)
906 lv_ptr->lv_read_ahead = (long) arg;
907 - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead;
912 /* get current read ahead setting */
913 - P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
914 + P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead);
915 if (put_user(lv_ptr->lv_read_ahead, (long *)arg))
918 @@ -969,13 +993,12 @@
922 - /* turn logical block into (dev_t, block). non privileged. */
923 + /* turn logical block into (dev_t, block). non privileged. */
924 /* don't bmap a snapshot, since the mapping can change */
925 - if (lv_ptr->lv_access & LV_SNAPSHOT)
926 + if(lv_ptr->lv_access & LV_SNAPSHOT)
929 return lvm_user_bmap(inode, (struct lv_bmap *) arg);
932 case LV_SET_ALLOCATION:
933 /* set allocation flags of a logical volume */
934 @@ -1006,11 +1029,8 @@
935 vg_t *vg_ptr = vg[VG_BLK(minor)];
936 lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
940 - "%s -- lvm_blk_close MINOR: %d VG#: %d LV#: %d\n",
941 - lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
943 + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n",
944 + minor, VG_BLK(minor), LV_BLK(minor));
946 if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
948 @@ -1063,17 +1083,17 @@
951 memset(&bh,0,sizeof bh);
952 - bh.b_rsector = block;
953 - bh.b_dev = bh.b_rdev = inode->i_dev;
954 + bh.b_blocknr = block;
955 + bh.b_dev = bh.b_rdev = inode->i_rdev;
956 bh.b_size = lvm_get_blksize(bh.b_dev);
957 + bh.b_rsector = block * (bh.b_size >> 9);
958 if ((err=lvm_map(&bh, READ)) < 0) {
959 printk("lvm map failed: %d\n", err);
963 - return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
964 - put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
966 + return (put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
967 + put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block));
971 @@ -1081,16 +1101,68 @@
972 * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
973 * (see init_module/lvm_init)
975 -static inline void __remap_snapshot(kdev_t rdev, ulong rsector,
976 +static void __remap_snapshot(kdev_t rdev, ulong rsector,
977 ulong pe_start, lv_t *lv, vg_t *vg) {
979 + /* copy a chunk from the origin to a snapshot device */
980 + down_write(&lv->lv_lock);
982 + /* we must redo lvm_snapshot_remap_block in order to avoid a
983 + race condition in the gap where no lock was held */
984 if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
985 !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
986 lvm_write_COW_table_block(vg, lv);
988 + up_write(&lv->lv_lock);
991 +static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
992 + ulong pe_start, lv_t *lv, vg_t *vg) {
995 + /* check to see if this chunk is already in the snapshot */
996 + down_read(&lv->lv_lock);
997 + r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
998 + up_read(&lv->lv_lock);
1001 + /* we haven't yet copied this block to the snapshot */
1002 + __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1007 + * extents destined for a pe that is on the move should be deferred
1009 +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1010 + return ((pe_lock_req.lock == LOCK_PE) &&
1011 + (pv == pe_lock_req.data.pv_dev) &&
1012 + (sector >= pe_lock_req.data.pv_offset) &&
1013 + (sector < (pe_lock_req.data.pv_offset + pe_size)));
1016 +static inline int _defer_extent(struct buffer_head *bh, int rw,
1017 + kdev_t pv, ulong sector, uint32_t pe_size)
1019 + if (pe_lock_req.lock == LOCK_PE) {
1020 + down_read(&_pe_lock);
1021 + if (_should_defer(pv, sector, pe_size)) {
1022 + up_read(&_pe_lock);
1023 + down_write(&_pe_lock);
1024 + if (_should_defer(pv, sector, pe_size))
1025 + _queue_io(bh, rw);
1026 + up_write(&_pe_lock);
1029 + up_read(&_pe_lock);
1035 static int lvm_map(struct buffer_head *bh, int rw)
1037 - int minor = MINOR(bh->b_dev);
1038 + int minor = MINOR(bh->b_rdev);
1041 ulong size = bh->b_size >> 9;
1042 @@ -1101,7 +1173,7 @@
1043 lv_t *lv = vg_this->lv[LV_BLK(minor)];
1046 - down(&lv->lv_snapshot_sem);
1047 + down_read(&lv->lv_lock);
1048 if (!(lv->lv_status & LV_ACTIVE)) {
1050 "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1051 @@ -1119,7 +1191,7 @@
1053 P_MAP("%s - lvm_map minor: %d *rdev: %s *rsector: %lu size:%lu\n",
1055 - kdevname(bh->b_dev),
1056 + kdevname(bh->b_rdev),
1059 if (rsector_org + size > lv->lv_size) {
1060 @@ -1130,7 +1202,7 @@
1066 if (lv->lv_stripes < 2) { /* linear mapping */
1068 index = rsector_org / vg_this->pe_size;
1069 @@ -1167,36 +1239,33 @@
1070 rsector_map, stripe_length, stripe_index);
1073 - /* handle physical extents on the move */
1074 - if (pe_lock_req.lock == LOCK_PE) {
1075 - if (rdev_map == pe_lock_req.data.pv_dev &&
1076 - rsector_map >= pe_lock_req.data.pv_offset &&
1077 - rsector_map < (pe_lock_req.data.pv_offset +
1078 - vg_this->pe_size)) {
1079 - sleep_on(&lvm_map_wait);
1082 + * Queue writes to physical extents on the move until move completes.
1083 + * Don't get _pe_lock until there is a reasonable expectation that
1084 + * we need to queue this request, because this is in the fast path.
1086 + if (rw == WRITE || rw == WRITEA) {
1087 + if(_defer_extent(bh, rw, rdev_map,
1088 + rsector_map, vg_this->pe_size)) {
1090 + up_read(&lv->lv_lock);
1096 - if (rw == WRITE || rw == WRITEA)
1097 - lv->lv_current_pe[index].writes++;
1099 - lv->lv_current_pe[index].reads++;
1100 + lv->lv_current_pe[index].writes++; /* statistic */
1102 + lv->lv_current_pe[index].reads++; /* statistic */
1104 - /* snapshot volume exception handling on physical device
1106 + /* snapshot volume exception handling on physical device address base */
1107 if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
1110 if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
1111 - if (lv->lv_block_exception)
1112 - lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1115 + if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
1116 + pe_start, lv) < 0)
1119 - } else if(rw == WRITE || rw == WRITEA) { /* snapshot origin */
1120 + } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
1123 /* start with first snapshot and loop through all of
1124 @@ -1209,22 +1278,20 @@
1126 /* Serializes the COW with the accesses to the
1128 - down(&snap->lv_snapshot_sem);
1129 - __remap_snapshot(rdev_map, rsector_map,
1130 + _remap_snapshot(rdev_map, rsector_map,
1131 pe_start, snap, vg_this);
1132 - up(&snap->lv_snapshot_sem);
1137 bh->b_rdev = rdev_map;
1138 bh->b_rsector = rsector_map;
1139 - up(&lv->lv_snapshot_sem);
1140 + up_read(&lv->lv_lock);
1144 buffer_IO_error(bh);
1145 - up(&lv->lv_snapshot_sem);
1146 + up_read(&lv->lv_lock);
1150 @@ -1258,15 +1325,10 @@
1152 * make request function
1154 -static int lvm_make_request_fn(request_queue_t *q,
1156 - struct buffer_head *bh)
1158 - if (lvm_map(bh, rw) >= 0)
1161 - buffer_IO_error(bh);
1163 +static int lvm_make_request_fn(request_queue_t *q,
1165 + struct buffer_head *bh) {
1166 + return (lvm_map(bh, rw) <= 0) ? 0 : 1;
1170 @@ -1283,8 +1345,7 @@
1172 spin_lock(&lvm_lock);
1173 if (lock != 0 && lock != current->pid) {
1174 - P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n",
1176 + P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
1177 spin_unlock(&lvm_lock);
1178 interruptible_sleep_on(&lvm_wait);
1179 if (current->sigpending != 0)
1180 @@ -1296,6 +1357,7 @@
1181 goto lock_try_again;
1183 lock = current->pid;
1184 + P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
1185 spin_unlock(&lvm_lock);
1187 } /* lvm_do_lock_lvm */
1188 @@ -1306,33 +1368,60 @@
1190 static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
1192 + pe_lock_req_t new_lock;
1193 + struct buffer_head *bh;
1196 if (vg_ptr == NULL) return -ENXIO;
1197 - if (copy_from_user(&pe_lock_req, arg,
1198 - sizeof(pe_lock_req_t)) != 0) return -EFAULT;
1199 + if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
1202 - switch (pe_lock_req.lock) {
1203 + switch (new_lock.lock) {
1205 for (p = 0; p < vg_ptr->pv_max; p++) {
1206 if (vg_ptr->pv[p] != NULL &&
1207 - pe_lock_req.data.pv_dev ==
1208 - vg_ptr->pv[p]->pv_dev)
1209 + new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
1212 if (p == vg_ptr->pv_max) return -ENXIO;
1214 - pe_lock_req.lock = UNLOCK_PE;
1216 + * this sync releaves memory pressure to lessen the
1217 + * likelyhood of pvmove being paged out - resulting in
1220 + * This method of doing a pvmove is broken
1222 fsync_dev(pe_lock_req.data.lv_dev);
1224 + down_write(&_pe_lock);
1225 + if (pe_lock_req.lock == LOCK_PE) {
1226 + up_write(&_pe_lock);
1230 + /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
1231 pe_lock_req.lock = LOCK_PE;
1232 + pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
1233 + pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
1234 + pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
1235 + up_write(&_pe_lock);
1237 + /* some requests may have got through since the fsync */
1238 + fsync_dev(pe_lock_req.data.pv_dev);
1242 + down_write(&_pe_lock);
1243 pe_lock_req.lock = UNLOCK_PE;
1244 - pe_lock_req.data.lv_dev = \
1245 - pe_lock_req.data.pv_dev = \
1246 + pe_lock_req.data.lv_dev = 0;
1247 + pe_lock_req.data.pv_dev = 0;
1248 pe_lock_req.data.pv_offset = 0;
1249 - wake_up(&lvm_map_wait);
1250 + bh = _dequeue_io();
1251 + up_write(&_pe_lock);
1253 + /* handle all deferred io for this PE */
1258 @@ -1400,6 +1489,8 @@
1260 /* get the volume group structure */
1261 if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
1262 + P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
1263 + arg, sizeof(vg_t));
1267 @@ -1409,8 +1500,9 @@
1270 if (vg[VG_CHR(minor)] != NULL) {
1273 + P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
1278 /* we are not that active so far... */
1279 @@ -1441,6 +1533,7 @@
1280 /* get the physical volume structures */
1281 vg_ptr->pv_act = vg_ptr->pv_cur = 0;
1282 for (p = 0; p < vg_ptr->pv_max; p++) {
1284 /* user space address */
1285 if ((pvp = vg_ptr->pv[p]) != NULL) {
1286 ret = lvm_do_pv_create(pvp, vg_ptr, p);
1287 @@ -1464,9 +1557,12 @@
1288 /* get the logical volume structures */
1290 for (l = 0; l < vg_ptr->lv_max; l++) {
1292 /* user space address */
1293 if ((lvp = vg_ptr->lv[l]) != NULL) {
1294 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1295 + P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
1296 + lvp, sizeof(lv_t));
1297 lvm_do_vg_remove(minor);
1300 @@ -1488,7 +1584,7 @@
1301 /* Second path to correct snapshot logical volumes which are not
1302 in place during first path above */
1303 for (l = 0; l < ls; l++) {
1304 - lvp = snap_lv_ptr[l];
1305 + lv_t *lvp = snap_lv_ptr[l];
1306 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
1307 lvm_do_vg_remove(minor);
1309 @@ -1680,27 +1776,41 @@
1310 * character device support function physical volume create
1312 static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
1313 - pv_t *pv_ptr = NULL;
1317 - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL);
1318 - if (pv_ptr == NULL) {
1319 + pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
1322 "%s -- PV_CREATE: kmalloc error PV at line %d\n",
1323 lvm_name, __LINE__);
1326 - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) {
1328 + memset(pv, 0, sizeof(*pv));
1330 + if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
1331 + P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
1332 + pvp, sizeof(pv_t));
1337 + if ((err = _open_pv(pv))) {
1342 /* We don't need the PE list
1343 in kernel space as with LVs pe_t list (see below) */
1344 - pv_ptr->pe = NULL;
1345 - pv_ptr->pe_allocated = 0;
1346 - pv_ptr->pv_status = PV_ACTIVE;
1348 + pv->pe_allocated = 0;
1349 + pv->pv_status = PV_ACTIVE;
1352 - lvm_fs_create_pv(vg_ptr, pv_ptr);
1353 + lvm_fs_create_pv(vg_ptr, pv);
1355 + vg_ptr->pv[p] = pv;
1357 } /* lvm_do_pv_create() */
1359 @@ -1709,47 +1819,73 @@
1360 * character device support function physical volume remove
1362 static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
1363 - pv_t *pv_ptr = vg_ptr->pv[p];
1364 + pv_t *pv = vg_ptr->pv[p];
1366 - lvm_fs_remove_pv(vg_ptr, pv_ptr);
1367 + lvm_fs_remove_pv(vg_ptr, pv);
1369 - vg_ptr->pe_total -= pv_ptr->pe_total;
1370 + vg_ptr->pe_total -= pv->pe_total;
1373 -#ifdef LVM_GET_INODE
1374 - lvm_clear_inode(pv_ptr->inode);
1381 vg_ptr->pv[p] = NULL;
1387 -static void __update_hardsectsize(lv_t *lv) {
1389 - int max_hardsectsize = 0, hardsectsize;
1391 - for (le = 0; le < lv->lv_allocated_le; le++) {
1392 - hardsectsize = get_hardsect_size(lv->lv_current_pe[le].dev);
1393 - if (hardsectsize == 0)
1394 - hardsectsize = 512;
1395 - if (hardsectsize > max_hardsectsize)
1396 - max_hardsectsize = hardsectsize;
1399 - if (lv->lv_access & LV_SNAPSHOT) {
1400 - for (e = 0; e < lv->lv_remap_end; e++) {
1402 - get_hardsect_size(
1403 - lv->lv_block_exception[e].rdev_new);
1404 - if (hardsectsize == 0)
1405 - hardsectsize = 512;
1406 - if (hardsectsize > max_hardsectsize)
1407 +static void __update_hardsectsize(lv_t *lv)
1409 + int max_hardsectsize = 0, hardsectsize = 0;
1412 + /* Check PVs first to see if they all have same sector size */
1413 + for (p = 0; p < lv->vg->pv_cur; p++) {
1414 + pv_t *pv = lv->vg->pv[p];
1415 + if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
1416 + if (max_hardsectsize == 0)
1417 max_hardsectsize = hardsectsize;
1418 + else if (hardsectsize != max_hardsectsize) {
1419 + P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
1420 + lv->lv_name, p, kdevname(pv->pv_dev),
1421 + hardsectsize, max_hardsectsize);
1427 + /* PVs have different block size, need to check each LE sector size */
1428 + if (hardsectsize != max_hardsectsize) {
1430 + for (le = 0; le < lv->lv_allocated_le; le++) {
1431 + hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
1432 + if (hardsectsize > max_hardsectsize) {
1433 + P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
1435 + kdevname(lv->lv_current_pe[le].dev),
1436 + hardsectsize, max_hardsectsize);
1437 + max_hardsectsize = hardsectsize;
1441 + /* only perform this operation on active snapshots */
1442 + if ((lv->lv_access & LV_SNAPSHOT) &&
1443 + (lv->lv_status & LV_ACTIVE)) {
1445 + for (e = 0; e < lv->lv_remap_end; e++) {
1446 + hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
1447 + if (hardsectsize > max_hardsectsize)
1448 + max_hardsectsize = hardsectsize;
1453 + if (max_hardsectsize == 0)
1454 + max_hardsectsize = SECTOR_SIZE;
1455 + P_DEV("hardblocksize for LV %s is %d\n",
1456 + kdevname(lv->lv_dev), max_hardsectsize);
1457 lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
1460 @@ -1763,9 +1899,12 @@
1461 lv_block_exception_t *lvbe = lv->lv_block_exception;
1462 vg_t *vg_ptr = vg[VG_CHR(minor)];
1463 lv_t *lv_ptr = NULL;
1466 - if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
1467 - if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK)
1468 + if (!(pep = lv->lv_current_pe))
1471 + if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
1474 for (l = 0; l < vg_ptr->lv_cur; l++) {
1475 @@ -1797,8 +1936,8 @@
1477 lv_status_save = lv_ptr->lv_status;
1478 lv_ptr->lv_status &= ~LV_ACTIVE;
1479 - lv_ptr->lv_snapshot_org = \
1480 - lv_ptr->lv_snapshot_prev = \
1481 + lv_ptr->lv_snapshot_org = NULL;
1482 + lv_ptr->lv_snapshot_prev = NULL;
1483 lv_ptr->lv_snapshot_next = NULL;
1484 lv_ptr->lv_block_exception = NULL;
1485 lv_ptr->lv_iobuf = NULL;
1486 @@ -1806,7 +1945,8 @@
1487 lv_ptr->lv_snapshot_hash_table = NULL;
1488 lv_ptr->lv_snapshot_hash_table_size = 0;
1489 lv_ptr->lv_snapshot_hash_mask = 0;
1490 - init_MUTEX(&lv_ptr->lv_snapshot_sem);
1491 + init_rwsem(&lv_ptr->lv_lock);
1493 lv_ptr->lv_snapshot_use_rate = 0;
1495 vg_ptr->lv[l] = lv_ptr;
1496 @@ -1815,6 +1955,7 @@
1497 is not a snapshot logical volume */
1498 if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
1499 size = lv_ptr->lv_allocated_le * sizeof(pe_t);
1501 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
1503 "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
1504 @@ -1826,6 +1967,8 @@
1507 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
1508 + P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
1509 + pep, sizeof(size));
1510 vfree(lv_ptr->lv_current_pe);
1512 vg_ptr->lv[l] = NULL;
1513 @@ -1847,6 +1990,15 @@
1514 vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
1515 if (lv_ptr->lv_snapshot_org != NULL) {
1516 size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
1519 + printk(KERN_WARNING
1520 + "%s -- zero length exception table requested\n",
1526 if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
1528 "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
1529 @@ -1934,6 +2086,7 @@
1530 LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
1532 lv_ptr->lv_status = lv_status_save;
1533 + lv_ptr->vg = vg_ptr;
1535 __update_hardsectsize(lv_ptr);
1537 @@ -1948,7 +2101,7 @@
1538 fsync_dev_lockfs(org->lv_dev);
1541 - down(&org->lv_snapshot_sem);
1542 + down_write(&org->lv_lock);
1543 org->lv_access |= LV_SNAPSHOT_ORG;
1544 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
1546 @@ -1957,7 +2110,7 @@
1547 for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
1548 lv_ptr->lv_snapshot_prev = last;
1549 last->lv_snapshot_next = lv_ptr;
1550 - up(&org->lv_snapshot_sem);
1551 + up_write(&org->lv_lock);
1554 /* activate the logical volume */
1555 @@ -1973,14 +2126,12 @@
1557 #ifdef LVM_VFS_ENHANCEMENT
1558 /* VFS function call to unlock the filesystem */
1559 - if (lv_ptr->lv_access & LV_SNAPSHOT) {
1560 + if (lv_ptr->lv_access & LV_SNAPSHOT)
1561 unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
1565 - lv_ptr->vg = vg_ptr;
1567 - lvm_fs_create_lv(vg_ptr, lv_ptr);
1568 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
1569 + lvm_fs_create_lv(vg_ptr, lv_ptr);
1571 } /* lvm_do_lv_create() */
1573 @@ -2026,7 +2177,7 @@
1574 * to the original lv before playing with it.
1576 lv_t * org = lv_ptr->lv_snapshot_org;
1577 - down(&org->lv_snapshot_sem);
1578 + down_write(&org->lv_lock);
1580 /* remove this snapshot logical volume from the chain */
1581 lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
1582 @@ -2039,7 +2190,7 @@
1583 if (!org->lv_snapshot_next) {
1584 org->lv_access &= ~LV_SNAPSHOT_ORG;
1586 - up(&org->lv_snapshot_sem);
1587 + up_write(&org->lv_lock);
1589 lvm_snapshot_release(lv_ptr);
1591 @@ -2060,6 +2211,7 @@
1592 /* reset generic hd */
1593 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
1594 lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
1595 + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
1596 lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
1598 /* reset VG/LV mapping */
1599 @@ -2191,8 +2343,7 @@
1600 new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
1601 end = min(old_stripe_size, new_stripe_size);
1603 - for (i = source = dest = 0;
1604 - i < new_lv->lv_stripes; i++) {
1605 + for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
1606 for (j = 0; j < end; j++) {
1607 new_lv->lv_current_pe[dest + j].reads +=
1608 old_lv->lv_current_pe[source + j].reads;
1609 @@ -2227,23 +2378,27 @@
1611 old_lv = vg_ptr->lv[l];
1613 - if (old_lv->lv_access & LV_SNAPSHOT)
1614 - r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
1616 + if (old_lv->lv_access & LV_SNAPSHOT) {
1617 + /* only perform this operation on active snapshots */
1618 + if (old_lv->lv_status & LV_ACTIVE)
1619 + r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
1624 r = __extend_reduce(vg_ptr, old_lv, new_lv);
1629 /* copy relevent fields */
1630 - down(&old_lv->lv_snapshot_sem);
1631 + down_write(&old_lv->lv_lock);
1633 if(new_lv->lv_access & LV_SNAPSHOT) {
1635 size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
1636 old_lv->lv_remap_ptr : new_lv->lv_remap_end;
1637 size *= sizeof(lv_block_exception_t);
1638 - memcpy(new_lv->lv_block_exception,
1639 + memcpy(new_lv->lv_block_exception,
1640 old_lv->lv_block_exception, size);
1642 old_lv->lv_remap_end = new_lv->lv_remap_end;
1643 @@ -2258,7 +2413,7 @@
1644 for (e = 0; e < new_lv->lv_remap_ptr; e++)
1645 lvm_hash_link(new_lv->lv_block_exception + e,
1646 new_lv->lv_block_exception[e].rdev_org,
1647 - new_lv->lv_block_exception[e].rsector_org,
1648 + new_lv->lv_block_exception[e].rsector_org,
1652 @@ -2278,7 +2433,7 @@
1654 for(snap = old_lv->lv_snapshot_next; snap;
1655 snap = snap->lv_snapshot_next) {
1656 - down(&snap->lv_snapshot_sem);
1657 + down_write(&snap->lv_lock);
1658 snap->lv_current_pe = old_lv->lv_current_pe;
1659 snap->lv_allocated_le =
1660 old_lv->lv_allocated_le;
1661 @@ -2290,13 +2445,13 @@
1662 lvm_size[MINOR(snap->lv_dev)] =
1663 old_lv->lv_size >> 1;
1664 __update_hardsectsize(snap);
1665 - up(&snap->lv_snapshot_sem);
1666 + up_write(&snap->lv_lock);
1671 __update_hardsectsize(old_lv);
1672 - up(&old_lv->lv_snapshot_sem);
1673 + up_write(&old_lv->lv_lock);
1676 } /* lvm_do_lv_extend_reduce() */
1677 @@ -2325,8 +2480,10 @@
1678 strcmp(lv_ptr->lv_name,
1679 lv_status_byname_req.lv_name) == 0) {
1680 /* Save usermode pointers */
1681 - saved_ptr1 = lv_status_byname_req.lv->lv_current_pe;
1682 - saved_ptr2 = lv_status_byname_req.lv->lv_block_exception;
1683 + if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
1685 + if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
1687 if (copy_to_user(lv_status_byname_req.lv,
1690 @@ -2339,7 +2496,8 @@
1693 /* Restore usermode pointers */
1694 - lv_status_byname_req.lv->lv_current_pe = saved_ptr1;
1695 + if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
1700 @@ -2368,8 +2526,11 @@
1703 /* Save usermode pointers */
1704 - saved_ptr1 = lv_status_byindex_req.lv->lv_current_pe;
1705 - saved_ptr2 = lv_status_byindex_req.lv->lv_block_exception;
1706 + if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
1708 + if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
1711 if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
1713 if (saved_ptr1 != NULL) {
1714 @@ -2381,7 +2542,8 @@
1717 /* Restore usermode pointers */
1718 - lv_status_byindex_req.lv->lv_current_pe = saved_ptr1;
1719 + if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
1723 } /* lvm_do_lv_status_byindex() */
1724 @@ -2411,8 +2573,10 @@
1725 lv_ptr = vg_ptr->lv[l];
1727 /* Save usermode pointers */
1728 - saved_ptr1 = lv_status_bydev_req.lv->lv_current_pe;
1729 - saved_ptr2 = lv_status_bydev_req.lv->lv_block_exception;
1730 + if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
1732 + if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
1735 if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
1737 @@ -2424,7 +2588,8 @@
1740 /* Restore usermode pointers */
1741 - lv_status_bydev_req.lv->lv_current_pe = saved_ptr1;
1742 + if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
1746 } /* lvm_do_lv_status_bydev() */
1747 @@ -2445,9 +2610,7 @@
1748 if (lv_ptr->lv_dev == lv->lv_dev)
1750 lvm_fs_remove_lv(vg_ptr, lv_ptr);
1751 - strncpy(lv_ptr->lv_name,
1754 + strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
1755 lvm_fs_create_lv(vg_ptr, lv_ptr);
1758 @@ -2465,9 +2628,7 @@
1762 -#ifdef LVM_GET_INODE
1763 - struct inode *inode_sav;
1765 + struct block_device *bd;
1767 if (vg_ptr == NULL) return -ENXIO;
1768 if (copy_from_user(&pv_change_req, arg,
1769 @@ -2479,20 +2640,17 @@
1770 if (pv_ptr != NULL &&
1771 strcmp(pv_ptr->pv_name,
1772 pv_change_req.pv_name) == 0) {
1773 -#ifdef LVM_GET_INODE
1774 - inode_sav = pv_ptr->inode;
1778 if (copy_from_user(pv_ptr,
1784 /* We don't need the PE list
1785 in kernel space as with LVs pe_t list */
1787 -#ifdef LVM_GET_INODE
1788 - pv_ptr->inode = inode_sav;
1793 @@ -2535,8 +2693,7 @@
1795 pv_flush_req_t pv_flush_req;
1797 - if (copy_from_user(&pv_flush_req, arg,
1798 - sizeof(pv_flush_req)) != 0)
1799 + if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
1802 fsync_dev(pv_flush_req.pv_dev);
1803 @@ -2571,5 +2728,82 @@
1804 } /* lvm_gen_init() */
1808 +/* Must have down_write(_pe_lock) when we enqueue buffers */
1809 +static void _queue_io(struct buffer_head *bh, int rw) {
1810 + if (bh->b_reqnext) BUG();
1811 + bh->b_reqnext = _pe_requests;
1812 + _pe_requests = bh;
1815 +/* Must have down_write(_pe_lock) when we dequeue buffers */
1816 +static struct buffer_head *_dequeue_io(void)
1818 + struct buffer_head *bh = _pe_requests;
1819 + _pe_requests = NULL;
1824 + * We do not need to hold _pe_lock to flush buffers. bh should be taken from
1825 + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
1826 + * NULL and we drop _pe_lock. Any new buffers defered at this time will be
1827 + * added to a new list, and the old buffers can have their I/O restarted
1830 + * If, for some reason, the same PE is locked again before all of these writes
1831 + * have finished, then these buffers will just be re-queued (i.e. no danger).
1833 +static void _flush_io(struct buffer_head *bh)
1836 + struct buffer_head *next = bh->b_reqnext;
1837 + bh->b_reqnext = NULL;
1838 + /* resubmit this buffer head */
1839 + generic_make_request(WRITE, bh);
1846 + * we must open the pv's before we use them
1848 +static int _open_pv(pv_t *pv) {
1850 + struct block_device *bd;
1852 + if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
1855 + err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
1865 +static void _close_pv(pv_t *pv) {
1866 + if(!pv || !pv->bd)
1869 + blkdev_put(pv->bd, BDEV_FILE);
1875 +static unsigned long _sectors_to_k(unsigned long sect)
1877 + if(SECTOR_SIZE > 1024) {
1878 + return sect * (SECTOR_SIZE / 1024);
1881 + return sect / (1024 / SECTOR_SIZE);
1884 module_init(lvm_init);
1885 module_exit(lvm_cleanup);
1886 --- linux/drivers/md/lvm-internal.h.orig Tue Oct 23 13:55:17 2001
1887 +++ linux/drivers/md/lvm-internal.h Tue Oct 23 13:55:51 2001
1891 * kernel/lvm_internal.h
1895 /* global variables, defined in lvm.c */
1896 extern char *lvm_version;
1897 -extern char *lvm_short_version;
1898 extern ushort lvm_iop_version;
1899 extern int loadtime;
1900 extern const char *const lvm_name;
1903 +extern uint vg_count;
1905 extern struct file_operations lvm_chr_fops;
1907 extern struct block_device_operations lvm_blk_dops;
1909 +#define lvm_sectsize(dev) get_hardsect_size(dev)
1911 +/* 2.4.8 had no global min/max macros, and 2.4.9's were flawed */
1917 void lvm_fs_create_vg(vg_t *vg_ptr);
1918 void lvm_fs_remove_vg(vg_t *vg_ptr);
1919 -void lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
1920 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
1921 void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv);
1922 void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv);
1923 void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv);
1924 --- linux/drivers/md/lvm-snap.c.orig Tue Oct 23 13:55:17 2001
1925 +++ linux/drivers/md/lvm-snap.c Tue Oct 23 13:55:51 2001
1927 * o pv number is returned in new uint * arg
1928 * o -1 returned on error
1929 * lvm_snapshot_fill_COW_table has a return value too.
1930 + * 15/10/2001 - fix snapshot alignment problem [CM]
1931 + * - fix snapshot full oops (always check lv_block_exception) [CM]
1936 #include <linux/types.h>
1937 #include <linux/iobuf.h>
1938 #include <linux/lvm.h>
1939 +#include <linux/devfs_fs_kernel.h>
1942 #include "lvm-internal.h"
1943 @@ -140,10 +143,20 @@
1944 unsigned long mask = lv->lv_snapshot_hash_mask;
1945 int chunk_size = lv->lv_chunk_size;
1949 hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
1950 list_add(&exception->hash, hash_table);
1954 + * Determine if we already have a snapshot chunk for this block.
1955 + * Return: 1 if it the chunk already exists
1956 + * 0 if we need to COW this block and allocate a new chunk
1957 + * -1 if the snapshot was disabled because it ran out of space
1959 + * We need to be holding at least a read lock on lv->lv_lock.
1961 int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
1962 unsigned long pe_start, lv_t * lv)
1965 int chunk_size = lv->lv_chunk_size;
1966 lv_block_exception_t * exception;
1968 + if (!lv->lv_block_exception)
1971 pe_off = pe_start % chunk_size;
1972 pe_adjustment = (*org_sector-pe_off) % chunk_size;
1973 __org_start = *org_sector - pe_adjustment;
1974 @@ -196,19 +212,25 @@
1978 -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
1979 +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks,
1980 unsigned long start,
1984 int i, sectors_per_block, nr_blocks;
1986 - sectors_per_block = blocksize >> 9;
1987 + sectors_per_block = blocksize / SECTOR_SIZE;
1989 + if(start & (sectors_per_block - 1))
1992 nr_blocks = nr_sectors / sectors_per_block;
1993 start /= sectors_per_block;
1995 for (i = 0; i < nr_blocks; i++)
1996 blocks[i] = start++;
2001 inline int lvm_get_blksize(kdev_t dev)
2005 * writes a COW exception table sector to disk (HM)
2007 + * We need to hold a write lock on lv_snap->lv_lock.
2009 int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
2011 @@ -309,6 +333,10 @@
2012 * if there is no exception storage space free any longer --> release snapshot.
2014 * this routine gets called for each _first_ write to a physical chunk.
2016 + * We need to hold a write lock on lv_snap->lv_lock. It is assumed that
2017 + * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block())
2018 + * when this function is called.
2020 int lvm_snapshot_COW(kdev_t org_phys_dev,
2021 unsigned long org_phys_sector,
2024 const char * reason;
2025 unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
2026 + unsigned long phys_start;
2027 int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
2028 struct kiobuf * iobuf;
2029 unsigned long blocks[KIO_MAX_SECTORS];
2032 iobuf = lv_snap->lv_iobuf;
2034 - blksize_org = lvm_get_blksize(org_phys_dev);
2035 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2036 + blksize_org = lvm_sectsize(org_phys_dev);
2037 + blksize_snap = lvm_sectsize(snap_phys_dev);
2038 max_blksize = max(blksize_org, blksize_snap);
2039 min_blksize = min(blksize_org, blksize_snap);
2040 max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
2042 if (chunk_size % (max_blksize>>9))
2045 + /* Don't change org_start, we need it to fill in the exception table */
2046 + phys_start = org_start;
2050 nr_sectors = min(chunk_size, max_sectors);
2051 @@ -368,17 +400,24 @@
2053 iobuf->length = nr_sectors << 9;
2055 - lvm_snapshot_prepare_blocks(blocks, org_start,
2056 - nr_sectors, blksize_org);
2057 + if (!lvm_snapshot_prepare_blocks(blocks, phys_start,
2058 + nr_sectors, blksize_org))
2059 + goto fail_prepare;
2061 if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks,
2062 blksize_org, lv_snap) != (nr_sectors<<9))
2065 - lvm_snapshot_prepare_blocks(blocks, snap_start,
2066 - nr_sectors, blksize_snap);
2067 + if (!lvm_snapshot_prepare_blocks(blocks, snap_start,
2068 + nr_sectors, blksize_snap))
2069 + goto fail_prepare;
2071 if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
2072 blksize_snap, lv_snap) != (nr_sectors<<9))
2073 goto fail_raw_write;
2075 + phys_start += nr_sectors;
2076 + snap_start += nr_sectors;
2079 #ifdef DEBUG_SNAPSHOT
2080 @@ -418,6 +457,11 @@
2082 reason = "blocksize error";
2086 + reason = "couldn't prepare kiovec blocks "
2087 + "(start probably isn't block aligned)";
2091 int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
2093 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
2094 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
2096 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2097 + blksize_snap = lvm_sectsize(snap_phys_dev);
2099 COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
2100 idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
2103 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
2104 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
2105 - blksize_snap = lvm_get_blksize(snap_phys_dev);
2106 + blksize_snap = lvm_sectsize(snap_phys_dev);
2107 blocks[0] = snap_pe_start >> (blksize_snap >> 10);
2110 --- linux/drivers/md/lvm-fs.c.orig Tue Oct 23 13:55:17 2001
2111 +++ linux/drivers/md/lvm-fs.c Tue Oct 23 13:55:51 2001
2114 * Copyright (C) 2001 Sistina Software
2116 - * January,February 2001
2117 + * January-April 2001
2119 * LVM driver is free software; you can redistribute it and/or modify
2120 * it under the terms of the GNU General Public License as published by
2124 * 11/01/2001 - First version (Joe Thornber)
2125 + * 21/03/2001 - added display of stripes and stripe size (HM)
2126 + * 04/10/2001 - corrected devfs_register() call in lvm_init_fs()
2127 + * 11/04/2001 - don't devfs_register("lvm") as user-space always does it
2128 + * 10/05/2001 - show more of PV name in /proc/lvm/global
2134 #include <linux/devfs_fs_kernel.h>
2135 #include <linux/proc_fs.h>
2136 +#include <linux/init.h>
2137 #include <linux/lvm.h>
2139 #include "lvm-internal.h"
2142 static void _show_uuid(const char *src, char *b, char *e);
2145 static devfs_handle_t lvm_devfs_handle;
2147 static devfs_handle_t vg_devfs_handle[MAX_VG];
2148 static devfs_handle_t ch_devfs_handle[MAX_VG];
2149 static devfs_handle_t lv_devfs_handle[MAX_LV];
2151 /* inline functions */
2153 /* public interface */
2154 -void lvm_init_fs() {
2155 +void __init lvm_init_fs() {
2156 struct proc_dir_entry *pde;
2158 +/* User-space has already registered this */
2160 lvm_devfs_handle = devfs_register(
2161 - 0 , "lvm", 0, 0, LVM_CHAR_MAJOR,
2162 + 0 , "lvm", 0, LVM_CHAR_MAJOR, 0,
2163 S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
2164 &lvm_chr_fops, NULL);
2167 lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root);
2169 lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR,
2175 devfs_unregister (lvm_devfs_handle);
2178 remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
2179 remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
2180 remove_proc_entry(LVM_DIR, &proc_root);
2181 @@ -137,8 +147,14 @@
2183 if(vg_ptr->vg_dir_pde) {
2184 remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
2185 + vg_ptr->lv_subdir_pde = NULL;
2187 remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
2188 + vg_ptr->pv_subdir_pde = NULL;
2190 remove_proc_entry("group", vg_ptr->vg_dir_pde);
2191 + vg_ptr->vg_dir_pde = NULL;
2193 remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
2200 -void lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
2201 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
2202 struct proc_dir_entry *pde;
2203 const char *name = _basename(lv->lv_name);
2206 pde->read_proc = _proc_read_lv;
2209 + return lv_devfs_handle[MINOR(lv->lv_dev)];
2212 void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) {
2213 @@ -256,6 +273,12 @@
2214 sz += sprintf(page + sz, "number: %u\n", lv->lv_number);
2215 sz += sprintf(page + sz, "open: %u\n", lv->lv_open);
2216 sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation);
2217 + if(lv->lv_stripes > 1) {
2218 + sz += sprintf(page + sz, "stripes: %u\n",
2220 + sz += sprintf(page + sz, "stripesize: %u\n",
2221 + lv->lv_stripesize);
2223 sz += sprintf(page + sz, "device: %02u:%02u\n",
2224 MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
2228 #ifdef DEBUG_LVM_PROC_GET_INFO
2230 - "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n",
2231 - lvm_name, pos, count, whence);
2232 + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d\n",
2233 + lvm_name, pos, count);
2236 if(pos != 0 && buf != NULL)
2240 "Total: %d VG%s %d PV%s %d LV%s ",
2241 - lvm_short_version,
2243 vg_counter, vg_counter == 1 ? "" : "s",
2244 pv_counter, pv_counter == 1 ? "" : "s",
2245 lv_counter, lv_counter == 1 ? "" : "s");
2247 allocation_flag = 'A';
2248 if (!(pv->pv_allocatable & PV_ALLOCATABLE))
2249 allocation_flag = 'N';
2250 - pv_name = strrchr(pv->pv_name+1,'/');
2251 + pv_name = strchr(pv->pv_name+1,'/');
2252 if ( pv_name == 0) pv_name = pv->pv_name;
2255 diff -ruN -X /home/joe/packages/dontdiff linux_2.4.1/drivers/md/lvm-snap.h linux/drivers/md/lvm-snap.h
2256 --- linux_2.4.1/drivers/md/lvm-snap.h Fri Feb 16 14:51:26 2001
2257 +++ linux/drivers/md/lvm-snap.h Thu Jan 1 01:00:00 1970
2260 - * kernel/lvm-snap.h
2262 - * Copyright (C) 2001 Sistina Software
2265 - * LVM driver is free software; you can redistribute it and/or modify
2266 - * it under the terms of the GNU General Public License as published by
2267 - * the Free Software Foundation; either version 2, or (at your option)
2268 - * any later version.
2270 - * LVM driver is distributed in the hope that it will be useful,
2271 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
2272 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2273 - * GNU General Public License for more details.
2275 - * You should have received a copy of the GNU General Public License
2276 - * along with GNU CC; see the file COPYING. If not, write to
2277 - * the Free Software Foundation, 59 Temple Place - Suite 330,
2278 - * Boston, MA 02111-1307, USA.
2285 - * 05/01/2001:Joe Thornber - Factored this file out of lvm.c
2292 -/* external snapshot calls */
2293 -extern inline int lvm_get_blksize(kdev_t);
2294 -extern int lvm_snapshot_alloc(lv_t *);
2295 -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
2296 -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
2297 -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
2298 -extern void lvm_snapshot_release(lv_t *);
2299 -extern int lvm_write_COW_table_block(vg_t *, lv_t *);
2300 -extern inline void lvm_hash_link(lv_block_exception_t *,
2301 - kdev_t, ulong, lv_t *);
2302 -extern int lvm_snapshot_alloc_hash_table(lv_t *);
2303 -extern void lvm_drop_snapshot(lv_t *, const char *);