]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.4.14-lvm-1.0.1rc4cvs.patch
- added CONFIG_PDC202XXX_FORCE, for new ide drivers
[packages/kernel.git] / linux-2.4.14-lvm-1.0.1rc4cvs.patch
1 --- linux/include/linux/lvm.h.orig      Mon Sep 17 22:25:26 2001
2 +++ linux/include/linux/lvm.h   Tue Nov 13 09:46:51 2001
3 @@ -3,28 +3,28 @@
4   * kernel/lvm.h
5   * tools/lib/lvm.h
6   *
7 - * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Sistina Software
8 + * Copyright (C) 1997 - 2001  Heinz Mauelshagen, Sistina Software
9   *
10   * February-November 1997
11   * May-July 1998
12   * January-March,July,September,October,Dezember 1999
13   * January,February,July,November 2000
14 - * January 2001
15 + * January-March,June,July 2001
16   *
17   * lvm is free software; you can redistribute it and/or modify
18   * it under the terms of the GNU General Public License as published by
19   * the Free Software Foundation; either version 2, or (at your option)
20   * any later version.
21 - * 
22 + *
23   * lvm is distributed in the hope that it will be useful,
24   * but WITHOUT ANY WARRANTY; without even the implied warranty of
25   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26   * GNU General Public License for more details.
27 - * 
28 + *
29   * You should have received a copy of the GNU General Public License
30   * along with GNU CC; see the file COPYING.  If not, write to
31   * the Free Software Foundation, 59 Temple Place - Suite 330,
32 - * Boston, MA 02111-1307, USA. 
33 + * Boston, MA 02111-1307, USA.
34   *
35   */
36  
37 @@ -52,7 +52,7 @@
38   *    08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit
39   *    01/01/2000 - extended lv_v2 core structure by wait_queue member
40   *    12/02/2000 - integrated Andrea Arcagnelli's snapshot work
41 - *    18/02/2000 - seperated user and kernel space parts by 
42 + *    18/02/2000 - seperated user and kernel space parts by
43   *                 #ifdef them with __KERNEL__
44   *    08/03/2000 - implemented cluster/shared bits for vg_access
45   *    26/06/2000 - implemented snapshot persistency and resizing support
46 @@ -60,6 +60,17 @@
47   *    12/11/2000 - removed unneeded timestamp definitions
48   *    24/12/2000 - removed LVM_TO_{CORE,DISK}*, use cpu_{from, to}_le*
49   *                 instead - Christoph Hellwig
50 + *    22/01/2001 - Change ulong to uint32_t
51 + *    14/02/2001 - changed LVM_SNAPSHOT_MIN_CHUNK to 1 page
52 + *    20/02/2001 - incremented IOP version to 11 because of incompatible
53 + *                 change in VG activation (in order to support devfs better)
54 + *    01/03/2001 - Revert to IOP10 and add VG_CREATE_OLD call for compatibility
55 + *    08/03/2001 - new lv_t (in core) version number 5: changed page member
56 + *                 to (struct kiobuf *) to use for COW exception table io
57 + *    26/03/2001 - changed lv_v4 to lv_v5 in structure definition (HM)
58 + *    21/06/2001 - changed BLOCK_SIZE back to 1024 for non S/390
59 + *    22/06/2001 - added Andreas Dilger's PE on 4k boundary alignment enhancements
60 + *    19/07/2001 - added rwsem compatibility macros for 2.2 kernels
61   *
62   */
63  
64 @@ -67,9 +78,11 @@
65  #ifndef _LVM_H_INCLUDE
66  #define _LVM_H_INCLUDE
67  
68 -#define        _LVM_KERNEL_H_VERSION   "LVM 0.9.1_beta2 (18/01/2001)"
69 +#define LVM_RELEASE_NAME "1.0.1-rc4"
70 +#define LVM_RELEASE_DATE "03/10/2001"
71 +
72 +#define        _LVM_KERNEL_H_VERSION   "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
73  
74 -#include <linux/config.h>
75  #include <linux/version.h>
76  
77  /*
78 @@ -90,13 +103,18 @@
79     #define DEBUG_READ
80     #define DEBUG_GENDISK
81     #define DEBUG_VG_CREATE
82 -   #define DEBUG_LVM_BLK_OPEN
83 +   #define DEBUG_DEVICE
84     #define DEBUG_KFREE
85   */
86 -#endif                         /* #ifdef __KERNEL__ */
87  
88  #include <linux/kdev_t.h>
89  #include <linux/list.h>
90 +#else
91 +#define __KERNEL__
92 +#include <linux/kdev_t.h>
93 +#include <linux/list.h>
94 +#undef __KERNEL__
95 +#endif                         /* #ifndef __KERNEL__ */
96  
97  #include <asm/types.h>
98  #include <linux/major.h>
99 @@ -107,6 +125,7 @@
100  #include <asm/semaphore.h>
101  #endif                         /* #ifdef __KERNEL__ */
102  
103 +
104  #include <asm/page.h>
105  
106  #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR)
107 @@ -117,7 +136,7 @@
108  #undef BLOCK_SIZE
109  #endif
110  
111 -#ifdef CONFIG_ARCH_S390 
112 +#ifdef CONFIG_ARCH_S390
113  #define BLOCK_SIZE     4096
114  #else
115  #define BLOCK_SIZE     1024
116 @@ -127,24 +146,11 @@
117  #define SECTOR_SIZE    512
118  #endif
119  
120 -#define LVM_STRUCT_VERSION     1       /* structure version */
121 +/* structure version */
122 +#define LVM_STRUCT_VERSION 1
123  
124  #define        LVM_DIR_PREFIX  "/dev/"
125  
126 -/* set the default structure version */
127 -#if ( LVM_STRUCT_VERSION == 1)
128 -#define pv_t pv_v2_t
129 -#define lv_t lv_v4_t
130 -#define vg_t vg_v3_t
131 -#define pv_disk_t pv_disk_v2_t
132 -#define lv_disk_t lv_disk_v3_t
133 -#define vg_disk_t vg_disk_v2_t
134 -#define lv_block_exception_t lv_block_exception_v1_t
135 -#define lv_COW_table_disk_t lv_COW_table_disk_v1_t
136 -#endif
137 -
138 -
139 -
140  /*
141   * i/o protocol version
142   *
143 @@ -218,40 +224,11 @@
144   */
145  
146  /* DONT TOUCH THESE !!! */
147 -/* base of PV structure in disk partition */
148 -#define        LVM_PV_DISK_BASE        0L
149  
150 -/* size reserved for PV structure on disk */
151 -#define        LVM_PV_DISK_SIZE        1024L
152  
153 -/* base of VG structure in disk partition */
154 -#define        LVM_VG_DISK_BASE        LVM_PV_DISK_SIZE
155  
156 -/* size reserved for VG structure */
157 -#define        LVM_VG_DISK_SIZE        ( 9 * 512L)
158 -
159 -/* size reserved for timekeeping */
160 -#define        LVM_TIMESTAMP_DISK_BASE ( LVM_VG_DISK_BASE +  LVM_VG_DISK_SIZE)
161 -#define        LVM_TIMESTAMP_DISK_SIZE 512L    /* reserved for timekeeping */
162 -
163 -/* name list of physical volumes on disk */
164 -#define        LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \
165 -                                    LVM_TIMESTAMP_DISK_SIZE)
166 -
167 -/* now for the dynamically calculated parts of the VGDA */
168 -#define        LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \
169 -                                   sizeof ( lv_disk_t) * b)
170 -#define        LVM_DISK_SIZE(pv)        ( (pv)->pe_on_disk.base + \
171 -                                   (pv)->pe_on_disk.size)
172 -#define        LVM_PE_DISK_OFFSET(pe, pv)      ( pe * pv->pe_size + \
173 -                                         ( LVM_DISK_SIZE ( pv) / SECTOR_SIZE))
174 -#define        LVM_PE_ON_DISK_BASE(pv) \
175 -   { int rest; \
176 -     pv->pe_on_disk.base = pv->lv_on_disk.base + pv->lv_on_disk.size; \
177 -     if ( ( rest = pv->pe_on_disk.base % SECTOR_SIZE) != 0) \
178 -        pv->pe_on_disk.base += ( SECTOR_SIZE - rest); \
179 -   }
180 -/* END default disk spaces and offsets for PVs */
181 +
182 +
183  
184  
185  /*
186 @@ -284,7 +261,7 @@
187  #define        LVM_MAX_SIZE            ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024)    /* 1TB[sectors] */
188  #define        LVM_MAX_MIRRORS         2       /* future use */
189  #define        LVM_MIN_READ_AHEAD      0       /* minimum read ahead sectors */
190 -#define        LVM_DEFAULT_READ_AHEAD  1024    /* default read ahead sectors for 512k scsi segments */
191 +#define        LVM_DEFAULT_READ_AHEAD  1024    /* sectors for 512k scsi segments */
192  #define        LVM_MAX_READ_AHEAD      10000   /* maximum read ahead sectors */
193  #define        LVM_MAX_LV_IO_TIMEOUT   60      /* seconds I/O timeout (future use) */
194  #define        LVM_PARTITION           0xfe    /* LVM partition id */
195 @@ -296,28 +273,15 @@
196  #define        LVM_SNAPSHOT_MIN_CHUNK  (PAGE_SIZE/1024)        /* 4 or 8 KB */
197  
198  #define        UNDEF   -1
199 -#define FALSE  0
200 -#define TRUE   1
201 -
202 -
203 -#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \
204 -       vg->pe_size / lv->lv_chunk_size)
205 -
206 -#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \
207 -{ \
208 -       int COW_table_entries_per_PE; \
209 -       int COW_table_chunks_per_PE; \
210 -\
211 -       COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \
212 -       COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \
213 -       COW_table_entries_per_PE - COW_table_chunks_per_PE;})
214 -
215  
216  /*
217   * ioctls
218 + * FIXME: the last parameter to _IO{W,R,WR} is a data type.  The macro will
219 + *       expand this using sizeof(), so putting "1" there is misleading
220 + *       because sizeof(1) = sizeof(int) = sizeof(2) = 4 on a 32-bit machine!
221   */
222  /* volume group */
223 -#define        VG_CREATE               _IOW ( 0xfe, 0x00, 1)
224 +#define        VG_CREATE_OLD           _IOW ( 0xfe, 0x00, 1)
225  #define        VG_REMOVE               _IOW ( 0xfe, 0x01, 1)
226  
227  #define        VG_EXTEND               _IOW ( 0xfe, 0x03, 1)
228 @@ -330,6 +294,8 @@
229  #define        VG_SET_EXTENDABLE       _IOW ( 0xfe, 0x08, 1)
230  #define        VG_RENAME               _IOW ( 0xfe, 0x09, 1)
231  
232 +/* Since 0.9beta6 */
233 +#define        VG_CREATE               _IOW ( 0xfe, 0x0a, 1)
234  
235  /* logical volume */
236  #define        LV_CREATE               _IOW ( 0xfe, 0x20, 1)
237 @@ -376,7 +342,12 @@
238  #endif
239  
240  /* lock the logical volume manager */
241 +#if LVM_DRIVER_IOP_VERSION > 11
242 +#define        LVM_LOCK_LVM            _IO ( 0xfe, 0x9A)
243 +#else
244 +/* This is actually the same as _IO ( 0xff, 0x00), oops.  Remove for IOP 12+ */
245  #define        LVM_LOCK_LVM            _IO ( 0xfe, 0x100)
246 +#endif
247  /* END ioctls */
248  
249  
250 @@ -412,6 +383,9 @@
251  #define        PV_ALLOCATABLE       0x02       /* pv_allocatable */
252  
253  
254 +/* misc */
255 +#define LVM_SNAPSHOT_DROPPED_SECTOR 1
256 +
257  /*
258   * Structure definitions core/disk follow
259   *
260 @@ -424,21 +398,21 @@
261  #define        UUID_LEN                32      /* don't change!!! */
262  
263  /* copy on write tables in disk format */
264 -typedef struct {
265 +typedef struct lv_COW_table_disk_v1 {
266         uint64_t pv_org_number;
267         uint64_t pv_org_rsector;
268         uint64_t pv_snap_number;
269         uint64_t pv_snap_rsector;
270 -} lv_COW_table_disk_v1_t;
271 +} lv_COW_table_disk_t;
272  
273  /* remap physical sector/rdev pairs including hash */
274 -typedef struct {
275 +typedef struct lv_block_exception_v1 {
276         struct list_head hash;
277 -       ulong rsector_org;
278 -       kdev_t rdev_org;
279 -       ulong rsector_new;
280 -       kdev_t rdev_new;
281 -} lv_block_exception_v1_t;
282 +       uint32_t rsector_org;
283 +       kdev_t   rdev_org;
284 +       uint32_t rsector_new;
285 +       kdev_t   rdev_new;
286 +} lv_block_exception_t;
287  
288  /* disk stored pe information */
289  typedef struct {
290 @@ -454,37 +428,11 @@
291  
292  
293  /*
294 - * Structure Physical Volume (PV) Version 1
295 + * physical volume structures
296   */
297  
298  /* core */
299 -typedef struct {
300 -       char id[2];             /* Identifier */
301 -       unsigned short version; /* HM lvm version */
302 -       lvm_disk_data_t pv_on_disk;
303 -       lvm_disk_data_t vg_on_disk;
304 -       lvm_disk_data_t pv_namelist_on_disk;
305 -       lvm_disk_data_t lv_on_disk;
306 -       lvm_disk_data_t pe_on_disk;
307 -       char pv_name[NAME_LEN];
308 -       char vg_name[NAME_LEN];
309 -       char system_id[NAME_LEN];       /* for vgexport/vgimport */
310 -       kdev_t pv_dev;
311 -       uint pv_number;
312 -       uint pv_status;
313 -       uint pv_allocatable;
314 -       uint pv_size;           /* HM */
315 -       uint lv_cur;
316 -       uint pe_size;
317 -       uint pe_total;
318 -       uint pe_allocated;
319 -       uint pe_stale;          /* for future use */
320 -       pe_disk_t *pe;          /* HM */
321 -       struct inode *inode;    /* HM */
322 -} pv_v1_t;
323 -
324 -/* core */
325 -typedef struct {
326 +typedef struct pv_v2 {
327         char id[2];             /* Identifier */
328         unsigned short version; /* HM lvm version */
329         lvm_disk_data_t pv_on_disk;
330 @@ -506,36 +454,17 @@
331         uint pe_allocated;
332         uint pe_stale;          /* for future use */
333         pe_disk_t *pe;          /* HM */
334 -       struct inode *inode;    /* HM */
335 +       struct block_device *bd;
336         char pv_uuid[UUID_LEN+1];
337 -} pv_v2_t;
338  
339 +#ifndef __KERNEL__
340 +       uint32_t pe_start;      /* in sectors */
341 +#endif
342 +} pv_t;
343  
344 -/* disk */
345 -typedef struct {
346 -       uint8_t id[2];          /* Identifier */
347 -       uint16_t version;               /* HM lvm version */
348 -       lvm_disk_data_t pv_on_disk;
349 -       lvm_disk_data_t vg_on_disk;
350 -       lvm_disk_data_t pv_namelist_on_disk;
351 -       lvm_disk_data_t lv_on_disk;
352 -       lvm_disk_data_t pe_on_disk;
353 -       uint8_t pv_name[NAME_LEN];
354 -       uint8_t vg_name[NAME_LEN];
355 -       uint8_t system_id[NAME_LEN];    /* for vgexport/vgimport */
356 -       uint32_t pv_major;
357 -       uint32_t pv_number;
358 -       uint32_t pv_status;
359 -       uint32_t pv_allocatable;
360 -       uint32_t pv_size;               /* HM */
361 -       uint32_t lv_cur;
362 -       uint32_t pe_size;
363 -       uint32_t pe_total;
364 -       uint32_t pe_allocated;
365 -} pv_disk_v1_t;
366  
367  /* disk */
368 -typedef struct {
369 +typedef struct pv_disk_v2 {
370         uint8_t id[2];          /* Identifier */
371         uint16_t version;               /* HM lvm version */
372         lvm_disk_data_t pv_on_disk;
373 @@ -555,7 +484,11 @@
374         uint32_t pe_size;
375         uint32_t pe_total;
376         uint32_t pe_allocated;
377 -} pv_disk_v2_t;
378 +       
379 +       /* new in struct version 2 */
380 +       uint32_t pe_start;              /* in sectors */
381 +
382 +} pv_disk_t;
383  
384  
385  /*
386 @@ -565,17 +498,17 @@
387  /* core PE information */
388  typedef struct {
389         kdev_t dev;
390 -       ulong pe;               /* to be changed if > 2TB */
391 -       ulong reads;
392 -       ulong writes;
393 +       uint32_t pe;            /* to be changed if > 2TB */
394 +       uint32_t reads;
395 +       uint32_t writes;
396  } pe_t;
397  
398  typedef struct {
399         char lv_name[NAME_LEN];
400         kdev_t old_dev;
401         kdev_t new_dev;
402 -       ulong old_pe;
403 -       ulong new_pe;
404 +       uint32_t old_pe;
405 +       uint32_t new_pe;
406  } le_remap_req_t;
407  
408  typedef struct lv_bmap {
409 @@ -588,7 +521,7 @@
410   */
411  
412  /* core */
413 -typedef struct lv_v4 {
414 +typedef struct lv_v5 {
415         char lv_name[NAME_LEN];
416         char vg_name[NAME_LEN];
417         uint lv_access;
418 @@ -611,9 +544,9 @@
419         uint lv_read_ahead;
420  
421         /* delta to version 1 starts here */
422 -       struct lv_v4 *lv_snapshot_org;
423 -       struct lv_v4 *lv_snapshot_prev;
424 -       struct lv_v4 *lv_snapshot_next;
425 +       struct lv_v5 *lv_snapshot_org;
426 +       struct lv_v5 *lv_snapshot_prev;
427 +       struct lv_v5 *lv_snapshot_next;
428         lv_block_exception_t *lv_block_exception;
429         uint lv_remap_ptr;
430         uint lv_remap_end;
431 @@ -621,23 +554,23 @@
432         uint lv_snapshot_minor;
433  #ifdef __KERNEL__
434         struct kiobuf *lv_iobuf;
435 -       struct semaphore lv_snapshot_sem;
436 +       struct kiobuf *lv_COW_table_iobuf;
437 +       struct rw_semaphore lv_lock;
438         struct list_head *lv_snapshot_hash_table;
439 -       ulong lv_snapshot_hash_table_size;
440 -       ulong lv_snapshot_hash_mask;
441 -       struct page *lv_COW_table_page;
442 +       uint32_t lv_snapshot_hash_table_size;
443 +       uint32_t lv_snapshot_hash_mask;
444         wait_queue_head_t lv_snapshot_wait;
445         int     lv_snapshot_use_rate;
446 -       void    *vg;
447 +       struct vg_v3    *vg;
448  
449         uint lv_allocated_snapshot_le;
450  #else
451         char dummy[200];
452  #endif
453 -} lv_v4_t;
454 +} lv_t;
455  
456  /* disk */
457 -typedef struct {
458 +typedef struct lv_disk_v3 {
459         uint8_t lv_name[NAME_LEN];
460         uint8_t vg_name[NAME_LEN];
461         uint32_t lv_access;
462 @@ -659,36 +592,14 @@
463         uint32_t lv_allocation;
464         uint32_t lv_io_timeout; /* for future use */
465         uint32_t lv_read_ahead; /* HM */
466 -} lv_disk_v3_t;
467 +} lv_disk_t;
468  
469  /*
470   * Structure Volume Group (VG) Version 1
471   */
472  
473  /* core */
474 -typedef struct {
475 -       char vg_name[NAME_LEN]; /* volume group name */
476 -       uint vg_number;         /* volume group number */
477 -       uint vg_access;         /* read/write */
478 -       uint vg_status;         /* active or not */
479 -       uint lv_max;            /* maximum logical volumes */
480 -       uint lv_cur;            /* current logical volumes */
481 -       uint lv_open;           /* open    logical volumes */
482 -       uint pv_max;            /* maximum physical volumes */
483 -       uint pv_cur;            /* current physical volumes FU */
484 -       uint pv_act;            /* active physical volumes */
485 -       uint dummy;             /* was obsolete max_pe_per_pv */
486 -       uint vgda;              /* volume group descriptor arrays FU */
487 -       uint pe_size;           /* physical extent size in sectors */
488 -       uint pe_total;          /* total of physical extents */
489 -       uint pe_allocated;      /* allocated physical extents */
490 -       uint pvg_total;         /* physical volume groups FU */
491 -       struct proc_dir_entry *proc;
492 -       pv_t *pv[ABS_MAX_PV + 1];       /* physical volume struct pointers */
493 -       lv_t *lv[ABS_MAX_LV + 1];       /* logical  volume struct pointers */
494 -} vg_v1_t;
495 -
496 -typedef struct {
497 +typedef struct vg_v3 {
498         char vg_name[NAME_LEN]; /* volume group name */
499         uint vg_number;         /* volume group number */
500         uint vg_access;         /* read/write */
501 @@ -716,30 +627,11 @@
502  #else
503         char dummy1[200];
504  #endif
505 -} vg_v3_t;
506 +} vg_t;
507  
508  
509  /* disk */
510 -typedef struct {
511 -       uint8_t vg_name[NAME_LEN];      /* volume group name */
512 -       uint32_t vg_number;     /* volume group number */
513 -       uint32_t vg_access;     /* read/write */
514 -       uint32_t vg_status;     /* active or not */
515 -       uint32_t lv_max;                /* maximum logical volumes */
516 -       uint32_t lv_cur;                /* current logical volumes */
517 -       uint32_t lv_open;               /* open    logical volumes */
518 -       uint32_t pv_max;                /* maximum physical volumes */
519 -       uint32_t pv_cur;                /* current physical volumes FU */
520 -       uint32_t pv_act;                /* active physical volumes */
521 -       uint32_t dummy;
522 -       uint32_t vgda;          /* volume group descriptor arrays FU */
523 -       uint32_t pe_size;               /* physical extent size in sectors */
524 -       uint32_t pe_total;              /* total of physical extents */
525 -       uint32_t pe_allocated;  /* allocated physical extents */
526 -       uint32_t pvg_total;     /* physical volume groups FU */
527 -} vg_disk_v1_t;
528 -
529 -typedef struct {
530 +typedef struct vg_disk_v2 {
531         uint8_t vg_uuid[UUID_LEN];      /* volume group UUID */
532         uint8_t vg_name_dummy[NAME_LEN-UUID_LEN];       /* rest of v1 VG name */
533         uint32_t vg_number;     /* volume group number */
534 @@ -757,7 +649,7 @@
535         uint32_t pe_total;              /* total of physical extents */
536         uint32_t pe_allocated;  /* allocated physical extents */
537         uint32_t pvg_total;     /* physical volume groups FU */
538 -} vg_disk_v2_t;
539 +} vg_disk_t;
540  
541  
542  /*
543 @@ -785,7 +677,7 @@
544         struct {
545                 kdev_t lv_dev;
546                 kdev_t pv_dev;
547 -               ulong pv_offset;
548 +               uint32_t pv_offset;
549         } data;
550  } pe_lock_req_t;
551  
552 @@ -798,7 +690,7 @@
553  
554  /* Request structure LV_STATUS_BYINDEX */
555  typedef struct {
556 -       ulong lv_index;
557 +       uint32_t lv_index;
558         lv_t *lv;
559         /* Transfer size because user space and kernel space differ */
560         ushort size;
561 @@ -807,7 +699,7 @@
562  /* Request structure LV_STATUS_BYDEV... */
563  typedef struct {
564         dev_t dev;
565 -       pv_t *lv;
566 +       lv_t *lv;
567  } lv_status_bydev_req_t;
568  
569  
570 @@ -817,4 +709,41 @@
571         int     rate;
572  } lv_snapshot_use_rate_req_t;
573  
574 +
575 +
576 +/* useful inlines */
577 +static inline ulong round_up(ulong n, ulong size) {
578 +       size--;
579 +       return (n + size) & ~size;
580 +}
581 +
582 +static inline ulong div_up(ulong n, ulong size) {
583 +       return round_up(n, size) / size;
584 +}
585 +
586 +/* FIXME: nasty capital letters */
587 +static int inline LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg_t *vg, lv_t *lv) {
588 +       return vg->pe_size / lv->lv_chunk_size;
589 +}
590 +
591 +static int inline LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg_t *vg, lv_t *lv) {
592 +       ulong chunks = vg->pe_size / lv->lv_chunk_size;
593 +       ulong entry_size = sizeof(lv_COW_table_disk_t);
594 +       ulong chunk_size = lv->lv_chunk_size * SECTOR_SIZE;
595 +       ulong entries = (vg->pe_size * SECTOR_SIZE) /
596 +               (entry_size + chunk_size);
597 +
598 +       if(chunks < 2)
599 +               return 0;
600 +
601 +       for(; entries; entries--)
602 +               if((div_up(entries * entry_size, chunk_size) + entries) <=
603 +                  chunks)
604 +                       break;
605 +
606 +       return entries;
607 +}
608 +
609 +
610  #endif                         /* #ifndef _LVM_H_INCLUDE */
611 +
612 --- linux/drivers/md/lvm.c.orig Thu Oct 25 22:58:35 2001
613 +++ linux/drivers/md/lvm.c      Tue Nov 13 09:46:52 2001
614 @@ -1,13 +1,13 @@
615  /*
616   * kernel/lvm.c
617   *
618 - * Copyright (C) 1997 - 2000  Heinz Mauelshagen, Sistina Software
619 + * Copyright (C) 1997 - 2001  Heinz Mauelshagen, Sistina Software
620   *
621   * February-November 1997
622   * April-May,July-August,November 1998
623   * January-March,May,July,September,October 1999
624   * January,February,July,September-November 2000
625 - * January 2001
626 + * January-April 2001
627   *
628   *
629   * LVM driver is free software; you can redistribute it and/or modify
630 @@ -43,7 +43,8 @@
631   *                 support for free (eg. longer) logical volume names
632   *    12/05/1998 - added spin_locks (thanks to Pascal van Dam
633   *                 <pascal@ramoth.xs4all.nl>)
634 - *    25/05/1998 - fixed handling of locked PEs in lvm_map() and lvm_chr_ioctl()
635 + *    25/05/1998 - fixed handling of locked PEs in lvm_map() and
636 + *                 lvm_chr_ioctl()
637   *    26/05/1998 - reactivated verify_area by access_ok
638   *    07/06/1998 - used vmalloc/vfree instead of kmalloc/kfree to go
639   *                 beyond 128/256 KB max allocation limit per call
640 @@ -125,7 +126,8 @@
641   *    14/02/2000 - support for 2.3.43
642   *               - integrated Andrea Arcagneli's snapshot code
643   *    25/06/2000 - james (chip) , IKKHAYD! roffl
644 - *    26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume support
645 + *    26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume
646 + *                 support
647   *    06/09/2000 - added devfs support
648   *    07/09/2000 - changed IOP version to 9
649   *               - started to add new char ioctl LV_STATUS_BYDEV_T to support
650 @@ -148,28 +150,87 @@
651   *                 procfs is always supported now. (JT)
652   *    12/01/2001 - avoided flushing logical volume in case of shrinking
653   *                 because of unecessary overhead in case of heavy updates
654 - *    05/04/2001 - lvm_map bugs: don't use b_blocknr/b_dev in lvm_map, it
655 - *                destroys stacking devices. call b_end_io on failed maps.
656 - *                (Jens Axboe)
657 + *    25/01/2001 - Allow RO open of an inactive LV so it can be reactivated.
658 + *    31/01/2001 - removed blk_init_queue/blk_cleanup_queue queueing will be
659 + *                 handled by the proper devices.
660 + *               - If you try and BMAP a snapshot you now get an -EPERM
661 + *    01/01/2001 - lvm_map() now calls buffer_IO_error on error for 2.4
662 + *               - factored __remap_snapshot out of lvm_map
663 + *    12/02/2001 - move devfs code to create VG before LVs
664 + *    13/02/2001 - allow VG_CREATE on /dev/lvm
665 + *    14/02/2001 - removed modversions.h
666 + *               - tidied device defines for blk.h
667 + *               - tidied debug statements
668 + *               - bug: vg[] member not set back to NULL if activation fails
669 + *               - more lvm_map tidying
670 + *    15/02/2001 - register /dev/lvm with devfs correctly (major/minor
671 + *                 were swapped)
672 + *    19/02/2001 - preallocated buffer_heads for rawio when using
673 + *                 snapshots [JT]
674 + *    28/02/2001 - introduced the P_DEV macro and changed some internel
675 + *                 functions to be static [AD]
676 + *    28/02/2001 - factored lvm_get_snapshot_use_rate out of blk_ioctl [AD]
677 + *               - fixed user address accessing bug in lvm_do_lv_create()
678 + *                 where the check for an existing LV takes place right at
679 + *                 the beginning
680 + *    01/03/2001 - Add VG_CREATE_OLD for IOP 10 compatibility
681 + *    02/03/2001 - Don't destroy usermode pointers in lv_t structures duing
682 + *                 LV_STATUS_BYxxx
683 + *                 and remove redundant lv_t variables from same.
684 + *               - avoid compilation of lvm_dummy_device_request in case of
685 + *                 Linux >= 2.3.0 to avoid a warning
686 + *               - added lvm_name argument to printk in buffer allocation
687 + *                 in order to avoid a warning
688 + *    04/03/2001 - moved linux/version.h above first use of KERNEL_VERSION
689 + *                 macros
690 + *    05/03/2001 - restore copying pe_t array in lvm_do_lv_status_byname. For
691 + *                 lvdisplay -v (PC)
692 + *               - restore copying pe_t array in lvm_do_lv_status_byindex (HM)
693 + *               - added copying pe_t array in lvm_do_lv_status_bydev (HM)
694 + *               - enhanced lvm_do_lv_status_by{name,index,dev} to be capable
695 + *                 to copy the lv_block_exception_t array to userspace (HM)
696 + *    08/03/2001 - initialize new lv_ptr->lv_COW_table_iobuf for snapshots;
697 + *                 removed obsolete lv_ptr->lv_COW_table_page initialization
698 + *               - factored lvm_do_pv_flush out of lvm_chr_ioctl (HM)
699 + *    09/03/2001 - Added _lock_open_count to ensure we only drop the lock
700 + *                 when the locking process closes.
701 + *    05/04/2001 - Defer writes to an extent that is being moved [JT]
702 + *    05/04/2001 - use b_rdev and b_rsector rather than b_dev and b_blocknr in
703 + *                 lvm_map() in order to make stacking devices more happy (HM)
704 + *    11/04/2001 - cleaned up the pvmove queue code. I no longer retain the
705 + *                 rw flag, instead WRITEA's are just dropped [JT]
706 + *    30/04/2001 - added KERNEL_VERSION > 2.4.3 get_hardsect_size() rather
707 + *                 than get_hardblocksize() call
708 + *    03/05/2001 - Use copy_to/from_user to preserve pointers in
709 + *                 lvm_do_status_by*
710 + *    11/05/2001 - avoid accesses to inactive snapshot data in
711 + *                 __update_hardsectsize() and lvm_do_lv_extend_reduce() (JW)
712 + *    28/05/2001 - implemented missing BLKSSZGET ioctl
713 + *    05/06/2001 - Move _pe_lock out of fast path for lvm_map when no PEs
714 + *                 locked.  Make buffer queue flush not need locking.
715 + *                 Fix lvm_user_bmap() to set b_rsector for new lvm_map(). [AED]
716 + *    30/06/2001 - Speed up __update_hardsectsize() by checking if PVs have
717 + *                 the same hardsectsize (very likely) before scanning all LEs
718 + *                 in the LV each time.  [AED]
719 + *    12/10/2001 - Use add/del_gendisk() routines in 2.4.10+
720 + *    01/11/2001 - Backport read_ahead change from Linus kernel [AED]
721   *
722   */
723  
724 +#include <linux/version.h>
725  
726 -static char *lvm_version = "LVM version 0.9.1_beta2  by Heinz Mauelshagen  (18/01/2001)\n";
727 -static char *lvm_short_version = "version 0.9.1_beta2 (18/01/2001)";
728 -
729 -#define MAJOR_NR       LVM_BLK_MAJOR
730 -#define        DEVICE_OFF(device)
731 +#define MAJOR_NR LVM_BLK_MAJOR
732 +#define DEVICE_OFF(device)
733 +#define LOCAL_END_REQUEST
734  
735  /* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */
736  /* #define     LVM_VFS_ENHANCEMENT */
737  
738  #include <linux/config.h>
739 -#include <linux/version.h>
740  #include <linux/module.h>
741 -
742  #include <linux/kernel.h>
743  #include <linux/vmalloc.h>
744 +
745  #include <linux/slab.h>
746  #include <linux/init.h>
747  
748 @@ -180,6 +241,9 @@
749  #include <linux/blkdev.h>
750  #include <linux/genhd.h>
751  #include <linux/locks.h>
752 +
753 +
754 +#include <linux/devfs_fs_kernel.h>
755  #include <linux/smp_lock.h>
756  #include <asm/ioctl.h>
757  #include <asm/segment.h>
758 @@ -195,7 +259,7 @@
759  #include <linux/errno.h>
760  #include <linux/lvm.h>
761  
762 -#include "lvm-snap.h"
763 +#include "lvm-internal.h"
764  
765  #define        LVM_CORRECT_READ_AHEAD(a)               \
766  do {                                           \
767 @@ -209,24 +273,6 @@
768  #  define WRITEA WRITE
769  #endif
770  
771 -/* debug macros */
772 -#ifdef DEBUG_IOCTL
773 -#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args)
774 -#else
775 -#define P_IOCTL(fmt, args...)
776 -#endif
777 -
778 -#ifdef DEBUG_MAP
779 -#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args)
780 -#else
781 -#define P_MAP(fmt, args...)
782 -#endif
783 -
784 -#ifdef DEBUG_KFREE
785 -#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args)
786 -#else
787 -#define P_KFREE(fmt, args...)
788 -#endif
789  
790  /*
791   * External function prototypes
792 @@ -236,27 +282,14 @@
793  static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong);
794  static int lvm_blk_open(struct inode *, struct file *);
795  
796 -static int lvm_chr_open(struct inode *, struct file *);
797 -
798 -static int lvm_chr_close(struct inode *, struct file *);
799  static int lvm_blk_close(struct inode *, struct file *);
800 +static int lvm_get_snapshot_use_rate(lv_t *lv_ptr, void *arg);
801  static int lvm_user_bmap(struct inode *, struct lv_bmap *);
802  
803 +static int lvm_chr_open(struct inode *, struct file *);
804 +static int lvm_chr_close(struct inode *, struct file *);
805  static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong);
806  
807 -int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *);
808 -int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *);
809 -int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *);
810 -static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *);
811 -
812 -void lvm_do_create_devfs_entry_of_vg ( vg_t *);
813 -
814 -void lvm_do_create_proc_entry_of_vg ( vg_t *);
815 -void lvm_do_remove_proc_entry_of_vg ( vg_t *);
816 -void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *);
817 -void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *);
818 -void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *);
819 -void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *);
820  
821  /* End external function prototypes */
822  
823 @@ -288,34 +321,41 @@
824  
825  static int lvm_do_pv_change(vg_t*, void*);
826  static int lvm_do_pv_status(vg_t *, void *);
827 +static int lvm_do_pv_flush(void *);
828  
829 -static int lvm_do_vg_create(int, void *);
830 +static int lvm_do_vg_create(void *, int minor);
831  static int lvm_do_vg_extend(vg_t *, void *);
832  static int lvm_do_vg_reduce(vg_t *, void *);
833  static int lvm_do_vg_rename(vg_t *, void *);
834  static int lvm_do_vg_remove(int);
835  static void lvm_geninit(struct gendisk *);
836 -static char *lvm_show_uuid ( char *);
837 +static void __update_hardsectsize(lv_t *lv);
838 +
839 +
840 +static void _queue_io(struct buffer_head *bh, int rw);
841 +static struct buffer_head *_dequeue_io(void);
842 +static void _flush_io(struct buffer_head *bh);
843 +
844 +static int _open_pv(pv_t *pv);
845 +static void _close_pv(pv_t *pv);
846 +
847 +static unsigned long _sectors_to_k(unsigned long sect);
848 +
849  #ifdef LVM_HD_NAME
850  void lvm_hd_name(char *, int);
851  #endif
852  /* END Internal function prototypes */
853  
854  
855 -/* volume group descriptor area pointers */
856 -static vg_t *vg[ABS_MAX_VG];
857 +/* variables */
858 +char *lvm_version = "LVM version "LVM_RELEASE_NAME"("LVM_RELEASE_DATE")";
859 +ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
860 +int loadtime = 0;
861 +const char *const lvm_name = LVM_NAME;
862  
863 -static devfs_handle_t lvm_devfs_handle;
864 -static devfs_handle_t vg_devfs_handle[MAX_VG];
865 -static devfs_handle_t ch_devfs_handle[MAX_VG];
866 -static devfs_handle_t lv_devfs_handle[MAX_LV];
867 -
868 -static pv_t *pvp = NULL;
869 -static lv_t *lvp = NULL;
870 -static pe_t *pep = NULL;
871 -static pe_t *pep1 = NULL;
872 -static char *basename = NULL;
873  
874 +/* volume group descriptor area pointers */
875 +vg_t *vg[ABS_MAX_VG];
876  
877  /* map from block minor number to VG and LV numbers */
878  typedef struct {
879 @@ -327,9 +367,8 @@
880  
881  /* Request structures (lvm_chr_ioctl()) */
882  static pv_change_req_t pv_change_req;
883 -static pv_flush_req_t pv_flush_req;
884  static pv_status_req_t pv_status_req;
885 -static pe_lock_req_t pe_lock_req;
886 +volatile static pe_lock_req_t pe_lock_req;
887  static le_remap_req_t le_remap_req;
888  static lv_req_t lv_req;
889  
890 @@ -339,36 +378,29 @@
891  
892  static char pv_name[NAME_LEN];
893  /* static char rootvg[NAME_LEN] = { 0, }; */
894 -const char *const lvm_name = LVM_NAME;
895  static int lock = 0;
896 -static int loadtime = 0;
897 +static int _lock_open_count = 0;
898  static uint vg_count = 0;
899  static long lvm_chr_open_count = 0;
900 -static ushort lvm_iop_version = LVM_DRIVER_IOP_VERSION;
901  static DECLARE_WAIT_QUEUE_HEAD(lvm_wait);
902 -static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait);
903  
904  static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED;
905  static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
906  
907 -static struct proc_dir_entry *lvm_proc_dir = NULL;
908 -static struct proc_dir_entry *lvm_proc_vg_subdir = NULL;
909 -struct proc_dir_entry *pde = NULL;
910 +static struct buffer_head *_pe_requests;
911 +static DECLARE_RWSEM(_pe_lock);
912  
913 -static struct file_operations lvm_chr_fops =
914 -{
915 -       owner:          THIS_MODULE,
916 +
917 +struct file_operations lvm_chr_fops = {
918         open:           lvm_chr_open,
919         release:        lvm_chr_close,
920         ioctl:          lvm_chr_ioctl,
921  };
922  
923 -
924  /* block device operations structure needed for 2.3.38? and above */
925 -static struct block_device_operations lvm_blk_dops =
926 +struct block_device_operations lvm_blk_dops =
927  {
928 -       owner:          THIS_MODULE,
929 -       open:           lvm_blk_open,
930 +       open:           lvm_blk_open,
931         release:        lvm_blk_close,
932         ioctl:          lvm_blk_ioctl,
933  };
934 @@ -376,10 +408,10 @@
935  
936  /* gendisk structures */
937  static struct hd_struct lvm_hd_struct[MAX_LV];
938 -static int lvm_blocksizes[MAX_LV] =
939 -{0,};
940 -static int lvm_size[MAX_LV] =
941 -{0,};
942 +static int lvm_blocksizes[MAX_LV];
943 +static int lvm_hardsectsizes[MAX_LV];
944 +static int lvm_size[MAX_LV];
945 +
946  static struct gendisk lvm_gendisk =
947  {
948         major:          MAJOR_NR,
949 @@ -391,38 +423,33 @@
950         nr_real:        MAX_LV,
951  };
952  
953 +
954  /*
955   * Driver initialization...
956   */
957  int lvm_init(void)
958  {
959 -       if (register_chrdev(LVM_CHAR_MAJOR, lvm_name, &lvm_chr_fops) < 0) {
960 -               printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name);
961 +       if (devfs_register_chrdev(LVM_CHAR_MAJOR,
962 +                                 lvm_name, &lvm_chr_fops) < 0) {
963 +               printk(KERN_ERR "%s -- devfs_register_chrdev failed\n",
964 +                      lvm_name);
965                 return -EIO;
966         }
967 -       if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
968 +       if (devfs_register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0)
969         {
970 -               printk("%s -- register_blkdev failed\n", lvm_name);
971 -               if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
972 -                       printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
973 +               printk("%s -- devfs_register_blkdev failed\n", lvm_name);
974 +               if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
975 +                       printk(KERN_ERR
976 +                              "%s -- devfs_unregister_chrdev failed\n",
977 +                              lvm_name);
978                 return -EIO;
979         }
980  
981 -       lvm_devfs_handle = devfs_register(
982 -               0 , "lvm", 0, 0, LVM_CHAR_MAJOR,
983 -               S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
984 -               &lvm_chr_fops, NULL);
985 -
986 -       lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root);
987 -       if (lvm_proc_dir != NULL) {
988 -               lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir);
989 -               pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir);
990 -               if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info;
991 -       }
992 -
993 +       lvm_init_fs();
994         lvm_init_vars();
995         lvm_geninit(&lvm_gendisk);
996  
997 +       /* insert our gendisk at the corresponding major */
998         add_gendisk(&lvm_gendisk);
999  
1000  #ifdef LVM_HD_NAME
1001 @@ -433,65 +460,64 @@
1002         blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn);
1003  
1004  
1005 +       /* initialise the pe lock */
1006 +       pe_lock_req.lock = UNLOCK_PE;
1007 +
1008         /* optional read root VGDA */
1009  /*
1010     if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg);
1011  */
1012  
1013 -       printk(KERN_INFO
1014 -              "%s%s -- "
1015  #ifdef MODULE
1016 -              "Module"
1017 +       printk(KERN_INFO "%s module loaded\n", lvm_version);
1018  #else
1019 -              "Driver"
1020 +       printk(KERN_INFO "%s\n", lvm_version);
1021  #endif
1022 -              " successfully initialized\n",
1023 -              lvm_version, lvm_name);
1024  
1025         return 0;
1026  } /* lvm_init() */
1027  
1028 -
1029  /*
1030   * cleanup...
1031   */
1032 +
1033  static void lvm_cleanup(void)
1034  {
1035 -       devfs_unregister (lvm_devfs_handle);
1036 +       if (devfs_unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0)
1037 +               printk(KERN_ERR "%s -- devfs_unregister_chrdev failed\n",
1038 +                      lvm_name);
1039 +       if (devfs_unregister_blkdev(MAJOR_NR, lvm_name) < 0)
1040 +               printk(KERN_ERR "%s -- devfs_unregister_blkdev failed\n",
1041 +                      lvm_name);
1042  
1043 -       if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) {
1044 -               printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name);
1045 -       }
1046 -       if (unregister_blkdev(MAJOR_NR, lvm_name) < 0) {
1047 -               printk(KERN_ERR "%s -- unregister_blkdev failed\n", lvm_name);
1048 -       }
1049  
1050  
1051 +       /* delete our gendisk from chain */
1052         del_gendisk(&lvm_gendisk);
1053  
1054         blk_size[MAJOR_NR] = NULL;
1055         blksize_size[MAJOR_NR] = NULL;
1056         hardsect_size[MAJOR_NR] = NULL;
1057  
1058 -       remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
1059 -       remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
1060 -       remove_proc_entry(LVM_DIR, &proc_root);
1061 -
1062  #ifdef LVM_HD_NAME
1063         /* reference from linux/drivers/block/genhd.c */
1064         lvm_hd_name_ptr = NULL;
1065  #endif
1066  
1067 +       /* unregister with procfs and devfs */
1068 +       lvm_fin_fs();
1069 +
1070 +#ifdef MODULE
1071         printk(KERN_INFO "%s -- Module successfully deactivated\n", lvm_name);
1072 +#endif
1073  
1074         return;
1075  }      /* lvm_cleanup() */
1076  
1077 -
1078  /*
1079   * support function to initialize lvm variables
1080   */
1081 -void __init lvm_init_vars(void)
1082 +static void __init lvm_init_vars(void)
1083  {
1084         int v;
1085  
1086 @@ -500,8 +526,8 @@
1087         lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED;
1088  
1089         pe_lock_req.lock = UNLOCK_PE;
1090 -       pe_lock_req.data.lv_dev = \
1091 -       pe_lock_req.data.pv_dev = \
1092 +       pe_lock_req.data.lv_dev = 0;
1093 +       pe_lock_req.data.pv_dev = 0;
1094         pe_lock_req.data.pv_offset = 0;
1095  
1096         /* Initialize VG pointers */
1097 @@ -524,19 +550,18 @@
1098   *
1099   ********************************************************************/
1100  
1101 +#define MODE_TO_STR(mode) (mode) & FMODE_READ ? "READ" : "", \
1102 +                         (mode) & FMODE_WRITE ? "WRITE" : ""
1103 +
1104  /*
1105   * character device open routine
1106   */
1107 -static int lvm_chr_open(struct inode *inode,
1108 -                       struct file *file)
1109 +static int lvm_chr_open(struct inode *inode, struct file *file)
1110  {
1111         int minor = MINOR(inode->i_rdev);
1112  
1113 -#ifdef DEBUG
1114 -       printk(KERN_DEBUG
1115 -        "%s -- lvm_chr_open MINOR: %d  VG#: %d  mode: 0x%X  lock: %d\n",
1116 -              lvm_name, minor, VG_CHR(minor), file->f_mode, lock);
1117 -#endif
1118 +       P_DEV("chr_open MINOR: %d  VG#: %d  mode: %s%s  lock: %d\n",
1119 +             minor, VG_CHR(minor), MODE_TO_STR(file->f_mode), lock);
1120  
1121         /* super user validation */
1122         if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1123 @@ -544,8 +569,15 @@
1124         /* Group special file open */
1125         if (VG_CHR(minor) > MAX_VG) return -ENXIO;
1126  
1127 +       spin_lock(&lvm_lock);
1128 +       if(lock == current->pid)
1129 +               _lock_open_count++;
1130 +       spin_unlock(&lvm_lock);
1131 +
1132         lvm_chr_open_count++;
1133  
1134 +       MOD_INC_USE_COUNT;
1135 +
1136         return 0;
1137  } /* lvm_chr_open() */
1138  
1139 @@ -569,9 +601,8 @@
1140         /* otherwise cc will complain about unused variables */
1141         (void) lvm_lock;
1142  
1143 -       P_IOCTL("%s -- lvm_chr_ioctl: command: 0x%X  MINOR: %d  "
1144 -               "VG#: %d  mode: 0x%X\n",
1145 -               lvm_name, command, minor, VG_CHR(minor), file->f_mode);
1146 +       P_IOCTL("chr MINOR: %d  command: 0x%X  arg: %p  VG#: %d  mode: %s%s\n",
1147 +               minor, command, arg, VG_CHR(minor), MODE_TO_STR(file->f_mode));
1148  
1149  #ifdef LVM_TOTAL_RESET
1150         if (lvm_reset_spindown > 0) return -EACCES;
1151 @@ -619,9 +650,13 @@
1152                    physical volume (move's done in user space's pvmove) */
1153                 return lvm_do_pe_lock_unlock(vg_ptr,arg);
1154  
1155 -       case VG_CREATE:
1156 +       case VG_CREATE_OLD:
1157                 /* create a VGDA */
1158 -               return lvm_do_vg_create(minor, arg);
1159 +               return lvm_do_vg_create(arg, minor);
1160 +
1161 +       case VG_CREATE:
1162 +               /* create a VGDA, assume VG number is filled in */
1163 +               return lvm_do_vg_create(arg, -1);
1164  
1165         case VG_EXTEND:
1166                 /* extend a volume group */
1167 @@ -672,7 +707,7 @@
1168  
1169  
1170         case VG_STATUS_GET_NAMELIST:
1171 -               /* get volume group count */
1172 +               /* get volume group names */
1173                 for (l = v = 0; v < ABS_MAX_VG; v++) {
1174                         if (vg[v] != NULL) {
1175                                 if (copy_to_user(arg + l * NAME_LEN,
1176 @@ -727,6 +762,7 @@
1177  
1178  
1179         case LV_STATUS_BYDEV:
1180 +               /* get status of a logical volume by device */
1181                 return lvm_do_lv_status_bydev(vg_ptr, arg);
1182  
1183  
1184 @@ -742,18 +778,12 @@
1185  
1186         case PV_FLUSH:
1187                 /* physical volume buffer flush/invalidate */
1188 -               if (copy_from_user(&pv_flush_req, arg,
1189 -                                  sizeof(pv_flush_req)) != 0)
1190 -                       return -EFAULT;
1191 -
1192 -               fsync_dev(pv_flush_req.pv_dev);
1193 -               invalidate_buffers(pv_flush_req.pv_dev);
1194 -               return 0;
1195 +               return lvm_do_pv_flush(arg);
1196  
1197  
1198         default:
1199                 printk(KERN_WARNING
1200 -                      "%s -- lvm_chr_ioctl: unknown command %x\n",
1201 +                      "%s -- lvm_chr_ioctl: unknown command 0x%x\n",
1202                        lvm_name, command);
1203                 return -EINVAL;
1204         }
1205 @@ -767,11 +797,8 @@
1206   */
1207  static int lvm_chr_close(struct inode *inode, struct file *file)
1208  {
1209 -#ifdef DEBUG
1210 -       int minor = MINOR(inode->i_rdev);
1211 -       printk(KERN_DEBUG
1212 -            "%s -- lvm_chr_close   VG#: %d\n", lvm_name, VG_CHR(minor));
1213 -#endif
1214 +       P_DEV("chr_close MINOR: %d  VG#: %d\n",
1215 +             MINOR(inode->i_rdev), VG_CHR(MINOR(inode->i_rdev)));
1216  
1217  #ifdef LVM_TOTAL_RESET
1218         if (lvm_reset_spindown > 0) {
1219 @@ -781,10 +808,19 @@
1220  #endif
1221  
1222         if (lvm_chr_open_count > 0) lvm_chr_open_count--;
1223 -       if (lock == current->pid) {
1224 -               lock = 0;       /* release lock */
1225 -               wake_up_interruptible(&lvm_wait);
1226 +
1227 +       spin_lock(&lvm_lock);
1228 +       if(lock == current->pid) {
1229 +               if(!_lock_open_count) {
1230 +                       P_DEV("chr_close: unlocking LVM for pid %d\n", lock);
1231 +                       lock = 0;
1232 +                       wake_up_interruptible(&lvm_wait);
1233 +               } else
1234 +                       _lock_open_count--;
1235         }
1236 +       spin_unlock(&lvm_lock);
1237 +
1238 +       MOD_DEC_USE_COUNT;
1239  
1240         return 0;
1241  } /* lvm_chr_close() */
1242 @@ -806,11 +842,8 @@
1243         lv_t *lv_ptr;
1244         vg_t *vg_ptr = vg[VG_BLK(minor)];
1245  
1246 -#ifdef DEBUG_LVM_BLK_OPEN
1247 -       printk(KERN_DEBUG
1248 -         "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  mode: 0x%X\n",
1249 -           lvm_name, minor, VG_BLK(minor), LV_BLK(minor), file->f_mode);
1250 -#endif
1251 +       P_DEV("blk_open MINOR: %d  VG#: %d  LV#: %d  mode: %s%s\n",
1252 +             minor, VG_BLK(minor), LV_BLK(minor), MODE_TO_STR(file->f_mode));
1253  
1254  #ifdef LVM_TOTAL_RESET
1255         if (lvm_reset_spindown > 0)
1256 @@ -827,8 +860,12 @@
1257                 if (lv_ptr->lv_status & LV_SPINDOWN) return -EPERM;
1258  
1259                 /* Check inactive LV and open for read/write */
1260 -               if (!(lv_ptr->lv_status & LV_ACTIVE))
1261 -                       return -EPERM;
1262 +               /* We need to be able to "read" an inactive LV
1263 +                  to re-activate it again */
1264 +               if ((file->f_mode & FMODE_WRITE) &&
1265 +                   (!(lv_ptr->lv_status & LV_ACTIVE)))
1266 +                   return -EPERM;
1267 +
1268                 if (!(lv_ptr->lv_access & LV_WRITE) &&
1269                     (file->f_mode & FMODE_WRITE))
1270                         return -EACCES;
1271 @@ -838,12 +875,9 @@
1272                 if (lv_ptr->lv_open == 0) vg_ptr->lv_open++;
1273                 lv_ptr->lv_open++;
1274  
1275 -#ifdef DEBUG_LVM_BLK_OPEN
1276 -               printk(KERN_DEBUG
1277 -                      "%s -- lvm_blk_open MINOR: %d  VG#: %d  LV#: %d  size: %d\n",
1278 -                      lvm_name, minor, VG_BLK(minor), LV_BLK(minor),
1279 -                      lv_ptr->lv_size);
1280 -#endif
1281 +               MOD_INC_USE_COUNT;
1282 +
1283 +               P_DEV("blk_open OK, LV size %d\n", lv_ptr->lv_size);
1284  
1285                 return 0;
1286         }
1287 @@ -863,31 +897,34 @@
1288         void *arg = (void *) a;
1289         struct hd_geometry *hd = (struct hd_geometry *) a;
1290  
1291 -       P_IOCTL("%s -- lvm_blk_ioctl MINOR: %d  command: 0x%X  arg: %X  "
1292 -               "VG#: %dl  LV#: %d\n",
1293 -               lvm_name, minor, command, (ulong) arg,
1294 -               VG_BLK(minor), LV_BLK(minor));
1295 +       P_IOCTL("blk MINOR: %d  command: 0x%X  arg: %p  VG#: %d  LV#: %d  "
1296 +               "mode: %s%s\n", minor, command, arg, VG_BLK(minor),
1297 +               LV_BLK(minor), MODE_TO_STR(file->f_mode));
1298  
1299         switch (command) {
1300 +       case BLKSSZGET:
1301 +               /* get block device sector size as needed e.g. by fdisk */
1302 +               return put_user(lvm_sectsize(inode->i_rdev), (int *) arg);
1303 +
1304         case BLKGETSIZE:
1305                 /* return device size */
1306 -               P_IOCTL("%s -- lvm_blk_ioctl -- BLKGETSIZE: %u\n",
1307 -                       lvm_name, lv_ptr->lv_size);
1308 +               P_IOCTL("BLKGETSIZE: %u\n", lv_ptr->lv_size);
1309                 if (put_user(lv_ptr->lv_size, (unsigned long *)arg))
1310                         return -EFAULT;
1311                 break;
1312  
1313 +#ifdef BLKGETSIZE64
1314         case BLKGETSIZE64:
1315                 if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg))
1316                         return -EFAULT;
1317                 break;
1318 -
1319 +#endif
1320  
1321         case BLKFLSBUF:
1322                 /* flush buffer cache */
1323                 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1324  
1325 -               P_IOCTL("%s -- lvm_blk_ioctl -- BLKFLSBUF\n", lvm_name);
1326 +               P_IOCTL("BLKFLSBUF\n");
1327  
1328                 fsync_dev(inode->i_rdev);
1329                 invalidate_buffers(inode->i_rdev);
1330 @@ -898,8 +935,8 @@
1331                 /* set read ahead for block device */
1332                 if (!capable(CAP_SYS_ADMIN)) return -EACCES;
1333  
1334 -               P_IOCTL("%s -- lvm_blk_ioctl -- BLKRASET: %d sectors for %02X:%02X\n",
1335 -                       lvm_name, (long) arg, MAJOR(inode->i_rdev), minor);
1336 +               P_IOCTL("BLKRASET: %ld sectors for %s\n",
1337 +                       (long) arg, kdevname(inode->i_rdev));
1338  
1339                 if ((long) arg < LVM_MIN_READ_AHEAD ||
1340                     (long) arg > LVM_MAX_READ_AHEAD)
1341 @@ -911,7 +948,7 @@
1342  
1343         case BLKRAGET:
1344                 /* get current read ahead setting */
1345 -               P_IOCTL("%s -- lvm_blk_ioctl -- BLKRAGET\n", lvm_name);
1346 +               P_IOCTL("BLKRAGET %d\n", lv_ptr->lv_read_ahead);
1347                 if (put_user(lv_ptr->lv_read_ahead, (long *)arg))
1348                         return -EFAULT;
1349                 break;
1350 @@ -937,10 +974,10 @@
1351                             copy_to_user((long *) &hd->start, &start,
1352                                          sizeof(start)) != 0)
1353                                 return -EFAULT;
1354 -               }
1355  
1356 -               P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
1357 -                       lvm_name, lv_ptr->lv_size / heads / sectors);
1358 +                       P_IOCTL("%s -- lvm_blk_ioctl -- cylinders: %d\n",
1359 +                               lvm_name, cylinders);
1360 +               }
1361                 break;
1362  
1363  
1364 @@ -979,40 +1016,11 @@
1365                 break;
1366  
1367         case LV_SNAPSHOT_USE_RATE:
1368 -               if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM;
1369 -               {
1370 -                       lv_snapshot_use_rate_req_t      lv_snapshot_use_rate_req;
1371 -
1372 -                       if (copy_from_user(&lv_snapshot_use_rate_req, arg,
1373 -                                          sizeof(lv_snapshot_use_rate_req_t)))
1374 -                               return -EFAULT;
1375 -                       if (lv_snapshot_use_rate_req.rate < 0 ||
1376 -                           lv_snapshot_use_rate_req.rate  > 100) return -EFAULT;
1377 -
1378 -                       switch (lv_snapshot_use_rate_req.block)
1379 -                       {
1380 -                       case 0:
1381 -                               lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate;
1382 -                               if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate)
1383 -                                       interruptible_sleep_on (&lv_ptr->lv_snapshot_wait);
1384 -                               break;
1385 -
1386 -                       case O_NONBLOCK:
1387 -                               break;
1388 -
1389 -                       default:
1390 -                               return -EFAULT;
1391 -                       }
1392 -                       lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end;
1393 -                       if (copy_to_user(arg, &lv_snapshot_use_rate_req,
1394 -                                        sizeof(lv_snapshot_use_rate_req_t)))
1395 -                               return -EFAULT;
1396 -               }
1397 -               break;
1398 +               return lvm_get_snapshot_use_rate(lv_ptr, arg);
1399  
1400         default:
1401                 printk(KERN_WARNING
1402 -                      "%s -- lvm_blk_ioctl: unknown command %d\n",
1403 +                      "%s -- lvm_blk_ioctl: unknown command 0x%x\n",
1404                        lvm_name, command);
1405                 return -EINVAL;
1406         }
1407 @@ -1030,18 +1038,49 @@
1408         vg_t *vg_ptr = vg[VG_BLK(minor)];
1409         lv_t *lv_ptr = vg_ptr->lv[LV_BLK(minor)];
1410  
1411 -#ifdef DEBUG
1412 -       printk(KERN_DEBUG
1413 -              "%s -- lvm_blk_close MINOR: %d  VG#: %d  LV#: %d\n",
1414 -              lvm_name, minor, VG_BLK(minor), LV_BLK(minor));
1415 -#endif
1416 +       P_DEV("blk_close MINOR: %d  VG#: %d  LV#: %d\n",
1417 +             minor, VG_BLK(minor), LV_BLK(minor));
1418  
1419         if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
1420         lv_ptr->lv_open--;
1421  
1422 +       MOD_DEC_USE_COUNT;
1423 +
1424         return 0;
1425  } /* lvm_blk_close() */
1426  
1427 +static int lvm_get_snapshot_use_rate(lv_t *lv, void *arg)
1428 +{
1429 +       lv_snapshot_use_rate_req_t lv_rate_req;
1430 +
1431 +       if (!(lv->lv_access & LV_SNAPSHOT))
1432 +               return -EPERM;
1433 +
1434 +       if (copy_from_user(&lv_rate_req, arg, sizeof(lv_rate_req)))
1435 +               return -EFAULT;
1436 +
1437 +       if (lv_rate_req.rate < 0 || lv_rate_req.rate > 100)
1438 +               return -EINVAL;
1439 +
1440 +       switch (lv_rate_req.block) {
1441 +       case 0:
1442 +               lv->lv_snapshot_use_rate = lv_rate_req.rate;
1443 +               if (lv->lv_remap_ptr * 100 / lv->lv_remap_end <
1444 +                   lv->lv_snapshot_use_rate)
1445 +                       interruptible_sleep_on(&lv->lv_snapshot_wait);
1446 +               break;
1447 +
1448 +       case O_NONBLOCK:
1449 +               break;
1450 +
1451 +       default:
1452 +               return -EINVAL;
1453 +       }
1454 +       lv_rate_req.rate = lv->lv_remap_ptr * 100 / lv->lv_remap_end;
1455 +
1456 +       return copy_to_user(arg, &lv_rate_req,
1457 +                           sizeof(lv_rate_req)) ? -EFAULT : 0;
1458 +}
1459  
1460  static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result)
1461  {
1462 @@ -1056,6 +1095,7 @@
1463         bh.b_blocknr = block;
1464         bh.b_dev = bh.b_rdev = inode->i_rdev;
1465         bh.b_size = lvm_get_blksize(bh.b_dev);
1466 +       bh.b_rsector = block * (bh.b_size >> 9);
1467         if ((err=lvm_map(&bh, READ)) < 0)  {
1468                 printk("lvm map failed: %d\n", err);
1469                 return -EINVAL;
1470 @@ -1063,562 +1103,206 @@
1471  
1472         return put_user(kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) ||
1473                put_user(bh.b_rsector/(bh.b_size>>9), &user_result->lv_block) ?
1474 -              -EFAULT : 0;
1475 +               -EFAULT : 0;
1476  }
1477  
1478  
1479  /*
1480 - * provide VG info for proc filesystem use (global)
1481 + * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1482 + * (see init_module/lvm_init)
1483   */
1484 -int lvm_vg_info(vg_t *vg_ptr, char *buf) {
1485 -       int sz = 0;
1486 -       char inactive_flag = ' ';
1487 -
1488 -       if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I';
1489 -       sz = sprintf(buf,
1490 -                    "\nVG: %c%s  [%d PV, %d LV/%d open] "
1491 -                    " PE Size: %d KB\n"
1492 -                    "  Usage [KB/PE]: %d /%d total  "
1493 -                    "%d /%d used  %d /%d free",
1494 -                    inactive_flag,
1495 -                    vg_ptr->vg_name,
1496 -                    vg_ptr->pv_cur,
1497 -                    vg_ptr->lv_cur,
1498 -                    vg_ptr->lv_open,
1499 -                    vg_ptr->pe_size >> 1,
1500 -                    vg_ptr->pe_size * vg_ptr->pe_total >> 1,
1501 -                    vg_ptr->pe_total,
1502 -                    vg_ptr->pe_allocated * vg_ptr->pe_size >> 1,
1503 -                    vg_ptr->pe_allocated,
1504 -                    (vg_ptr->pe_total - vg_ptr->pe_allocated) *
1505 -                    vg_ptr->pe_size >> 1,
1506 -                    vg_ptr->pe_total - vg_ptr->pe_allocated);
1507 -       return sz;
1508 -}
1509 +static void __remap_snapshot(kdev_t rdev, ulong rsector,
1510 +                                   ulong pe_start, lv_t *lv, vg_t *vg) {
1511  
1512 +       /* copy a chunk from the origin to a snapshot device */
1513 +       down_write(&lv->lv_lock);
1514  
1515 -/*
1516 - * provide LV info for proc filesystem use (global)
1517 - */
1518 -int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
1519 -       int sz = 0;
1520 -       char inactive_flag = 'A', allocation_flag = ' ',
1521 -            stripes_flag = ' ', rw_flag = ' ';
1522 -
1523 -       if (!(lv_ptr->lv_status & LV_ACTIVE))
1524 -               inactive_flag = 'I';
1525 -       rw_flag = 'R';
1526 -       if (lv_ptr->lv_access & LV_WRITE)
1527 -               rw_flag = 'W';
1528 -       allocation_flag = 'D';
1529 -       if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
1530 -               allocation_flag = 'C';
1531 -       stripes_flag = 'L';
1532 -       if (lv_ptr->lv_stripes > 1)
1533 -               stripes_flag = 'S';
1534 -       sz += sprintf(buf+sz,
1535 -                     "[%c%c%c%c",
1536 -                     inactive_flag,
1537 -        rw_flag,
1538 -                     allocation_flag,
1539 -                     stripes_flag);
1540 -       if (lv_ptr->lv_stripes > 1)
1541 -               sz += sprintf(buf+sz, "%-2d",
1542 -                             lv_ptr->lv_stripes);
1543 -       else
1544 -               sz += sprintf(buf+sz, "  ");
1545 -       basename = strrchr(lv_ptr->lv_name, '/');
1546 -       if ( basename == 0) basename = lv_ptr->lv_name;
1547 -       else                basename++;
1548 -       sz += sprintf(buf+sz, "] %-25s", basename);
1549 -       if (strlen(basename) > 25)
1550 -               sz += sprintf(buf+sz,
1551 -                             "\n                              ");
1552 -       sz += sprintf(buf+sz, "%9d /%-6d   ",
1553 -                     lv_ptr->lv_size >> 1,
1554 -                     lv_ptr->lv_size / vg_ptr->pe_size);
1555 +       /* we must redo lvm_snapshot_remap_block in order to avoid a
1556 +          race condition in the gap where no lock was held */
1557 +       if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
1558 +           !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
1559 +               lvm_write_COW_table_block(vg, lv);
1560  
1561 -       if (lv_ptr->lv_open == 0)
1562 -               sz += sprintf(buf+sz, "close");
1563 -       else
1564 -               sz += sprintf(buf+sz, "%dx open",
1565 -                             lv_ptr->lv_open);
1566 -
1567 -       return sz;
1568 +       up_write(&lv->lv_lock);
1569  }
1570  
1571 -
1572 -/*
1573 - * provide PV info for proc filesystem use (global)
1574 - */
1575 -int lvm_pv_info(pv_t *pv_ptr, char *buf) {
1576 -       int sz = 0;
1577 -       char inactive_flag = 'A', allocation_flag = ' ';
1578 -       char *pv_name = NULL;
1579 -
1580 -       if (!(pv_ptr->pv_status & PV_ACTIVE))
1581 -               inactive_flag = 'I';
1582 -       allocation_flag = 'A';
1583 -       if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE))
1584 -               allocation_flag = 'N';
1585 -       pv_name = strrchr(pv_ptr->pv_name+1,'/');
1586 -       if ( pv_name == 0) pv_name = pv_ptr->pv_name;
1587 -       else               pv_name++;
1588 -       sz = sprintf(buf,
1589 -                    "[%c%c] %-21s %8d /%-6d  "
1590 -                    "%8d /%-6d  %8d /%-6d",
1591 -                    inactive_flag,
1592 -                    allocation_flag,
1593 -                    pv_name,
1594 -                    pv_ptr->pe_total *
1595 -                    pv_ptr->pe_size >> 1,
1596 -                    pv_ptr->pe_total,
1597 -                    pv_ptr->pe_allocated *
1598 -                    pv_ptr->pe_size >> 1,
1599 -                    pv_ptr->pe_allocated,
1600 -                    (pv_ptr->pe_total -
1601 -                     pv_ptr->pe_allocated) *
1602 -                    pv_ptr->pe_size >> 1,
1603 -                    pv_ptr->pe_total -
1604 -                    pv_ptr->pe_allocated);
1605 -       return sz;
1606 +static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
1607 +                                  ulong pe_start, lv_t *lv, vg_t *vg) {
1608 +       int r;
1609 +
1610 +       /* check to see if this chunk is already in the snapshot */
1611 +       down_read(&lv->lv_lock);
1612 +       r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
1613 +       up_read(&lv->lv_lock);
1614 +
1615 +       if (!r)
1616 +               /* we haven't yet copied this block to the snapshot */
1617 +               __remap_snapshot(rdev, rsector, pe_start, lv, vg);
1618  }
1619  
1620  
1621  /*
1622 - * Support functions /proc-Filesystem
1623 + * extents destined for a pe that is on the move should be deferred
1624   */
1625 +static inline int _should_defer(kdev_t pv, ulong sector, uint32_t pe_size) {
1626 +       return ((pe_lock_req.lock == LOCK_PE) &&
1627 +               (pv == pe_lock_req.data.pv_dev) &&
1628 +               (sector >= pe_lock_req.data.pv_offset) &&
1629 +               (sector < (pe_lock_req.data.pv_offset + pe_size)));
1630 +}
1631  
1632 -#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
1633 -
1634 -/*
1635 - * provide global LVM information
1636 - */
1637 -static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data)
1638 +static inline int _defer_extent(struct buffer_head *bh, int rw,
1639 +                               kdev_t pv, ulong sector, uint32_t pe_size)
1640  {
1641 -       int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
1642 -        lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds;
1643 -       static off_t sz;
1644 -       off_t sz_last;
1645 -       static char *buf = NULL;
1646 -       static char dummy_buf[160];     /* sized for 2 lines */
1647 -       vg_t *vg_ptr;
1648 -       lv_t *lv_ptr;
1649 -       pv_t *pv_ptr;
1650 -
1651 -
1652 -#ifdef DEBUG_LVM_PROC_GET_INFO
1653 -       printk(KERN_DEBUG
1654 -              "%s - lvm_proc_get_global_info CALLED  pos: %lu  count: %d  whence: %d\n",
1655 -              lvm_name, pos, count, whence);
1656 -#endif
1657 -
1658 -       MOD_INC_USE_COUNT;
1659 -
1660 -       if (pos == 0 || buf == NULL) {
1661 -               sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
1662 -               lv_open_total = pe_t_bytes = hash_table_bytes = \
1663 -               lv_block_exception_t_bytes = 0;
1664 -
1665 -               /* search for activity */
1666 -               for (v = 0; v < ABS_MAX_VG; v++) {
1667 -                       if ((vg_ptr = vg[v]) != NULL) {
1668 -                               vg_counter++;
1669 -                               pv_counter += vg_ptr->pv_cur;
1670 -                               lv_counter += vg_ptr->lv_cur;
1671 -                               if (vg_ptr->lv_cur > 0) {
1672 -                                       for (l = 0; l < vg[v]->lv_max; l++) {
1673 -                                               if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
1674 -                                                       pe_t_bytes += lv_ptr->lv_allocated_le;
1675 -                                                       hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size;
1676 -                                                       if (lv_ptr->lv_block_exception != NULL)
1677 -                                                               lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
1678 -                                                       if (lv_ptr->lv_open > 0) {
1679 -                                                               lv_open_counter++;
1680 -                                                               lv_open_total += lv_ptr->lv_open;
1681 -                                                       }
1682 -                                               }
1683 -                                       }
1684 -                               }
1685 -                       }
1686 -               }
1687 -               pe_t_bytes *= sizeof(pe_t);
1688 -               lv_block_exception_t_bytes *= sizeof(lv_block_exception_t);
1689 -
1690 -               if (buf != NULL) {
1691 -                       P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__);
1692 -                       lock_kernel();
1693 -                       vfree(buf);
1694 -                       unlock_kernel();
1695 -                       buf = NULL;
1696 -               }
1697 -               /* 2 times: first to get size to allocate buffer,
1698 -                  2nd to fill the malloced buffer */
1699 -               for (i = 0; i < 2; i++) {
1700 -                       sz = 0;
1701 -                       sz += sprintf(LVM_PROC_BUF,
1702 -                                     "LVM "
1703 -#ifdef MODULE
1704 -                                     "module"
1705 -#else
1706 -                                     "driver"
1707 -#endif
1708 -                                     " %s\n\n"
1709 -                                   "Total:  %d VG%s  %d PV%s  %d LV%s ",
1710 -                                     lvm_short_version,
1711 -                                 vg_counter, vg_counter == 1 ? "" : "s",
1712 -                                 pv_counter, pv_counter == 1 ? "" : "s",
1713 -                                lv_counter, lv_counter == 1 ? "" : "s");
1714 -                       sz += sprintf(LVM_PROC_BUF,
1715 -                                     "(%d LV%s open",
1716 -                                     lv_open_counter,
1717 -                                     lv_open_counter == 1 ? "" : "s");
1718 -                       if (lv_open_total > 0)
1719 -                               sz += sprintf(LVM_PROC_BUF,
1720 -                                             " %d times)\n",
1721 -                                             lv_open_total);
1722 -                       else
1723 -                               sz += sprintf(LVM_PROC_BUF, ")");
1724 -                       sz += sprintf(LVM_PROC_BUF,
1725 -                                     "\nGlobal: %lu bytes malloced   IOP version: %d   ",
1726 -                                     vg_counter * sizeof(vg_t) +
1727 -                                     pv_counter * sizeof(pv_t) +
1728 -                                     lv_counter * sizeof(lv_t) +
1729 -                                     pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last,
1730 -                                     lvm_iop_version);
1731 -
1732 -                       seconds = CURRENT_TIME - loadtime;
1733 -                       if (seconds < 0)
1734 -                               loadtime = CURRENT_TIME + seconds;
1735 -                       if (seconds / 86400 > 0) {
1736 -                               sz += sprintf(LVM_PROC_BUF, "%d day%s ",
1737 -                                             seconds / 86400,
1738 -                                             seconds / 86400 == 0 ||
1739 -                                        seconds / 86400 > 1 ? "s" : "");
1740 -                       }
1741 -                       sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n",
1742 -                                     (seconds % 86400) / 3600,
1743 -                                     (seconds % 3600) / 60,
1744 -                                     seconds % 60);
1745 -
1746 -                       if (vg_counter > 0) {
1747 -                               for (v = 0; v < ABS_MAX_VG; v++) {
1748 -                                       /* volume group */
1749 -                                       if ((vg_ptr = vg[v]) != NULL) {
1750 -                                               sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF);
1751 -
1752 -                                               /* physical volumes */
1753 -                                               sz += sprintf(LVM_PROC_BUF,
1754 -                                                             "\n  PV%s ",
1755 -                                                             vg_ptr->pv_cur == 1 ? ": " : "s:");
1756 -                                               c = 0;
1757 -                                               for (p = 0; p < vg_ptr->pv_max; p++) {
1758 -                                                       if ((pv_ptr = vg_ptr->pv[p]) != NULL) {
1759 -                                                               sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF);
1760 -
1761 -                                                               c++;
1762 -                                                               if (c < vg_ptr->pv_cur)
1763 -                                                                       sz += sprintf(LVM_PROC_BUF,
1764 -                                                                                     "\n       ");
1765 -                                                       }
1766 -                                               }
1767 -
1768 -                                               /* logical volumes */
1769 -                                               sz += sprintf(LVM_PROC_BUF,
1770 -                                                          "\n    LV%s ",
1771 -                                                             vg_ptr->lv_cur == 1 ? ": " : "s:");
1772 -                                               c = 0;
1773 -                                               for (l = 0; l < vg_ptr->lv_max; l++) {
1774 -                                                       if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
1775 -                                                               sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF);
1776 -                                                               c++;
1777 -                                                               if (c < vg_ptr->lv_cur)
1778 -                                                                       sz += sprintf(LVM_PROC_BUF,
1779 -                                                                                     "\n         ");
1780 -                                                       }
1781 -                                               }
1782 -                                               if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none");
1783 -                                               sz += sprintf(LVM_PROC_BUF, "\n");
1784 -                                       }
1785 -                               }
1786 -                       }
1787 -                       if (buf == NULL) {
1788 -                               lock_kernel();
1789 -                               buf = vmalloc(sz);
1790 -                               unlock_kernel();
1791 -                               if (buf == NULL) {
1792 -                                       sz = 0;
1793 -                                       MOD_DEC_USE_COUNT;
1794 -                                       return sprintf(page, "%s - vmalloc error at line %d\n",
1795 -                                                    lvm_name, __LINE__);
1796 -                               }
1797 -                       }
1798 -                       sz_last = sz;
1799 +       if (pe_lock_req.lock == LOCK_PE) {
1800 +               down_read(&_pe_lock);
1801 +               if (_should_defer(pv, sector, pe_size)) {
1802 +                       up_read(&_pe_lock);
1803 +                       down_write(&_pe_lock);
1804 +                       if (_should_defer(pv, sector, pe_size))
1805 +                               _queue_io(bh, rw);
1806 +                       up_write(&_pe_lock);
1807 +                       return 1;
1808                 }
1809 +               up_read(&_pe_lock);
1810         }
1811 -       MOD_DEC_USE_COUNT;
1812 -       if (pos > sz - 1) {
1813 -               lock_kernel();
1814 -               vfree(buf);
1815 -               unlock_kernel();
1816 -               buf = NULL;
1817 -               return 0;
1818 -       }
1819 -       *start = &buf[pos];
1820 -       if (sz - pos < count)
1821 -               return sz - pos;
1822 -       else
1823 -               return count;
1824 -} /* lvm_proc_get_global_info() */
1825 -
1826 -
1827 -/*
1828 - * provide VG information
1829 - */
1830 -int lvm_proc_read_vg_info(char *page, char **start, off_t off,
1831 -                         int count, int *eof, void *data) {
1832 -       int sz = 0;
1833 -       vg_t *vg = data;
1834 -
1835 -       sz += sprintf ( page+sz, "name:         %s\n", vg->vg_name);
1836 -       sz += sprintf ( page+sz, "size:         %u\n",
1837 -                       vg->pe_total * vg->pe_size / 2);
1838 -       sz += sprintf ( page+sz, "access:       %u\n", vg->vg_access);
1839 -       sz += sprintf ( page+sz, "status:       %u\n", vg->vg_status);
1840 -       sz += sprintf ( page+sz, "number:       %u\n", vg->vg_number);
1841 -       sz += sprintf ( page+sz, "LV max:       %u\n", vg->lv_max);
1842 -       sz += sprintf ( page+sz, "LV current:   %u\n", vg->lv_cur);
1843 -       sz += sprintf ( page+sz, "LV open:      %u\n", vg->lv_open);
1844 -       sz += sprintf ( page+sz, "PV max:       %u\n", vg->pv_max);
1845 -       sz += sprintf ( page+sz, "PV current:   %u\n", vg->pv_cur);
1846 -       sz += sprintf ( page+sz, "PV active:    %u\n", vg->pv_act);
1847 -       sz += sprintf ( page+sz, "PE size:      %u\n", vg->pe_size / 2);
1848 -       sz += sprintf ( page+sz, "PE total:     %u\n", vg->pe_total);
1849 -       sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated);
1850 -       sz += sprintf ( page+sz, "uuid:         %s\n", lvm_show_uuid(vg->vg_uuid));
1851 -
1852 -       return sz;
1853 -}
1854 -
1855 -
1856 -/*
1857 - * provide LV information
1858 - */
1859 -int lvm_proc_read_lv_info(char *page, char **start, off_t off,
1860 -                         int count, int *eof, void *data) {
1861 -       int sz = 0;
1862 -       lv_t *lv = data;
1863 -
1864 -       sz += sprintf ( page+sz, "name:         %s\n", lv->lv_name);
1865 -       sz += sprintf ( page+sz, "size:         %u\n", lv->lv_size);
1866 -       sz += sprintf ( page+sz, "access:       %u\n", lv->lv_access);
1867 -       sz += sprintf ( page+sz, "status:       %u\n", lv->lv_status);
1868 -       sz += sprintf ( page+sz, "number:       %u\n", lv->lv_number);
1869 -       sz += sprintf ( page+sz, "open:         %u\n", lv->lv_open);
1870 -       sz += sprintf ( page+sz, "allocation:   %u\n", lv->lv_allocation);
1871 -       sz += sprintf ( page+sz, "device:       %02u:%02u\n",
1872 -                        MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
1873 -
1874 -       return sz;
1875 -}
1876 -
1877 -
1878 -/*
1879 - * provide PV information
1880 - */
1881 -int lvm_proc_read_pv_info(char *page, char **start, off_t off,
1882 -                         int count, int *eof, void *data) {
1883 -       int sz = 0;
1884 -       pv_t *pv = data;
1885 -
1886 -       sz += sprintf ( page+sz, "name:         %s\n", pv->pv_name);
1887 -       sz += sprintf ( page+sz, "size:         %u\n", pv->pv_size);
1888 -       sz += sprintf ( page+sz, "status:       %u\n", pv->pv_status);
1889 -       sz += sprintf ( page+sz, "number:       %u\n", pv->pv_number);
1890 -       sz += sprintf ( page+sz, "allocatable:  %u\n", pv->pv_allocatable);
1891 -       sz += sprintf ( page+sz, "LV current:   %u\n", pv->lv_cur);
1892 -       sz += sprintf ( page+sz, "PE size:      %u\n", pv->pe_size / 2);
1893 -       sz += sprintf ( page+sz, "PE total:     %u\n", pv->pe_total);
1894 -       sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated);
1895 -       sz += sprintf ( page+sz, "device:       %02u:%02u\n",
1896 -                        MAJOR(pv->pv_dev), MINOR(pv->pv_dev));
1897 -       sz += sprintf ( page+sz, "uuid:         %s\n", lvm_show_uuid(pv->pv_uuid));
1898 -
1899 -
1900 -       return sz;
1901 +       return 0;
1902  }
1903  
1904  
1905 -/*
1906 - * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c
1907 - * (see init_module/lvm_init)
1908 - */
1909  static int lvm_map(struct buffer_head *bh, int rw)
1910  {
1911         int minor = MINOR(bh->b_rdev);
1912 -       int ret = 0;
1913         ulong index;
1914         ulong pe_start;
1915         ulong size = bh->b_size >> 9;
1916 -       ulong rsector_tmp = bh->b_rsector;
1917 -       ulong rsector_sav;
1918 -       kdev_t rdev_tmp = bh->b_rdev;
1919 -       kdev_t rdev_sav;
1920 +       ulong rsector_org = bh->b_rsector;
1921 +       ulong rsector_map;
1922 +       kdev_t rdev_map;
1923         vg_t *vg_this = vg[VG_BLK(minor)];
1924         lv_t *lv = vg_this->lv[LV_BLK(minor)];
1925  
1926  
1927 +       down_read(&lv->lv_lock);
1928         if (!(lv->lv_status & LV_ACTIVE)) {
1929                 printk(KERN_ALERT
1930                        "%s - lvm_map: ll_rw_blk for inactive LV %s\n",
1931                        lvm_name, lv->lv_name);
1932 -               return -1;
1933 +               goto bad;
1934         }
1935  
1936         if ((rw == WRITE || rw == WRITEA) &&
1937             !(lv->lv_access & LV_WRITE)) {
1938                 printk(KERN_CRIT
1939 -                   "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1940 +                      "%s - lvm_map: ll_rw_blk write for readonly LV %s\n",
1941                        lvm_name, lv->lv_name);
1942 -               return -1;
1943 +               goto bad;
1944         }
1945  
1946 -       P_MAP("%s - lvm_map minor:%d  *rdev: %02d:%02d  *rsector: %lu  "
1947 -             "size:%lu\n",
1948 +       P_MAP("%s - lvm_map minor: %d  *rdev: %s  *rsector: %lu  size:%lu\n",
1949               lvm_name, minor,
1950 -             MAJOR(rdev_tmp),
1951 -             MINOR(rdev_tmp),
1952 -             rsector_tmp, size);
1953 +             kdevname(bh->b_rdev),
1954 +             rsector_org, size);
1955  
1956 -       if (rsector_tmp + size > lv->lv_size) {
1957 +       if (rsector_org + size > lv->lv_size) {
1958                 printk(KERN_ALERT
1959                        "%s - lvm_map access beyond end of device; *rsector: "
1960                         "%lu or size: %lu wrong for minor: %2d\n",
1961 -                       lvm_name, rsector_tmp, size, minor);
1962 -               return -1;
1963 +                       lvm_name, rsector_org, size, minor);
1964 +               goto bad;
1965         }
1966 -       rsector_sav = rsector_tmp;
1967 -       rdev_sav = rdev_tmp;
1968  
1969 -lvm_second_remap:
1970 -       /* linear mapping */
1971 -       if (lv->lv_stripes < 2) {
1972 +
1973 +       if (lv->lv_stripes < 2) { /* linear mapping */
1974                 /* get the index */
1975 -               index = rsector_tmp / vg_this->pe_size;
1976 +               index = rsector_org / vg_this->pe_size;
1977                 pe_start = lv->lv_current_pe[index].pe;
1978 -               rsector_tmp = lv->lv_current_pe[index].pe +
1979 -                   (rsector_tmp % vg_this->pe_size);
1980 -               rdev_tmp = lv->lv_current_pe[index].dev;
1981 -
1982 -               P_MAP("lv_current_pe[%ld].pe: %ld  rdev: %02d:%02d  "
1983 -                     "rsector:%ld\n",
1984 -                      index,
1985 -                      lv->lv_current_pe[index].pe,
1986 -                      MAJOR(rdev_tmp),
1987 -                      MINOR(rdev_tmp),
1988 -                      rsector_tmp);
1989 +               rsector_map = lv->lv_current_pe[index].pe +
1990 +                       (rsector_org % vg_this->pe_size);
1991 +               rdev_map = lv->lv_current_pe[index].dev;
1992 +
1993 +               P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n",
1994 +                     index, lv->lv_current_pe[index].pe,
1995 +                     kdevname(rdev_map), rsector_map);
1996  
1997 -               /* striped mapping */
1998 -       } else {
1999 +       } else {                /* striped mapping */
2000                 ulong stripe_index;
2001                 ulong stripe_length;
2002  
2003                 stripe_length = vg_this->pe_size * lv->lv_stripes;
2004 -               stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize;
2005 -               index = rsector_tmp / stripe_length +
2006 -                   (stripe_index % lv->lv_stripes) *
2007 -                   (lv->lv_allocated_le / lv->lv_stripes);
2008 +               stripe_index = (rsector_org % stripe_length) /
2009 +                       lv->lv_stripesize;
2010 +               index = rsector_org / stripe_length +
2011 +                       (stripe_index % lv->lv_stripes) *
2012 +                       (lv->lv_allocated_le / lv->lv_stripes);
2013                 pe_start = lv->lv_current_pe[index].pe;
2014 -               rsector_tmp = lv->lv_current_pe[index].pe +
2015 -                   (rsector_tmp % stripe_length) -
2016 -                   (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
2017 -                   stripe_index / lv->lv_stripes *
2018 -                   (lv->lv_stripes - 1) * lv->lv_stripesize;
2019 -               rdev_tmp = lv->lv_current_pe[index].dev;
2020 -       }
2021 -
2022 -       P_MAP("lv_current_pe[%ld].pe: %ld  rdev: %02d:%02d  rsector:%ld\n"
2023 -              "stripe_length: %ld  stripe_index: %ld\n",
2024 -              index,
2025 -              lv->lv_current_pe[index].pe,
2026 -              MAJOR(rdev_tmp),
2027 -              MINOR(rdev_tmp),
2028 -              rsector_tmp,
2029 -              stripe_length,
2030 -              stripe_index);
2031 +               rsector_map = lv->lv_current_pe[index].pe +
2032 +                       (rsector_org % stripe_length) -
2033 +                       (stripe_index % lv->lv_stripes) * lv->lv_stripesize -
2034 +                       stripe_index / lv->lv_stripes *
2035 +                       (lv->lv_stripes - 1) * lv->lv_stripesize;
2036 +               rdev_map = lv->lv_current_pe[index].dev;
2037 +
2038 +               P_MAP("lv_current_pe[%ld].pe: %d  rdev: %s  rsector:%ld\n"
2039 +                     "stripe_length: %ld  stripe_index: %ld\n",
2040 +                     index, lv->lv_current_pe[index].pe, kdevname(rdev_map),
2041 +                     rsector_map, stripe_length, stripe_index);
2042 +       }
2043 +
2044 +       /*
2045 +        * Queue writes to physical extents on the move until move completes.
2046 +        * Don't get _pe_lock until there is a reasonable expectation that
2047 +        * we need to queue this request, because this is in the fast path.
2048 +        */
2049 +       if (rw == WRITE || rw == WRITEA) {
2050 +               if(_defer_extent(bh, rw, rdev_map,
2051 +                                rsector_map, vg_this->pe_size)) {
2052  
2053 -       /* handle physical extents on the move */
2054 -       if (pe_lock_req.lock == LOCK_PE) {
2055 -               if (rdev_tmp == pe_lock_req.data.pv_dev &&
2056 -                   rsector_tmp >= pe_lock_req.data.pv_offset &&
2057 -                   rsector_tmp < (pe_lock_req.data.pv_offset +
2058 -                                  vg_this->pe_size)) {
2059 -                       sleep_on(&lvm_map_wait);
2060 -                       rsector_tmp = rsector_sav;
2061 -                       rdev_tmp = rdev_sav;
2062 -                       goto lvm_second_remap;
2063 +                       up_read(&lv->lv_lock);
2064 +                       return 0;
2065                 }
2066 -       }
2067 -       /* statistic */
2068 -       if (rw == WRITE || rw == WRITEA)
2069 -               lv->lv_current_pe[index].writes++;
2070 -       else
2071 -               lv->lv_current_pe[index].reads++;
2072 +
2073 +               lv->lv_current_pe[index].writes++;      /* statistic */
2074 +       } else
2075 +               lv->lv_current_pe[index].reads++;       /* statistic */
2076  
2077         /* snapshot volume exception handling on physical device address base */
2078 -       if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) {
2079 -               /* original logical volume */
2080 -               if (lv->lv_access & LV_SNAPSHOT_ORG) {
2081 -                       /* Serializes the access to the lv_snapshot_next list */
2082 -                       down(&lv->lv_snapshot_sem);
2083 -                       if (rw == WRITE || rw == WRITEA)
2084 -                       {
2085 -                               lv_t *lv_ptr;
2086 -
2087 -                               /* start with first snapshot and loop thrugh all of them */
2088 -                               for (lv_ptr = lv->lv_snapshot_next;
2089 -                                    lv_ptr != NULL;
2090 -                                    lv_ptr = lv_ptr->lv_snapshot_next) {
2091 -                                       /* Check for inactive snapshot */
2092 -                                       if (!(lv_ptr->lv_status & LV_ACTIVE)) continue;
2093 -                                       /* Serializes the COW with the accesses to the snapshot device */
2094 -                                       down(&lv_ptr->lv_snapshot_sem);
2095 -                                       /* do we still have exception storage for this snapshot free? */
2096 -                                       if (lv_ptr->lv_block_exception != NULL) {
2097 -                                               rdev_sav = rdev_tmp;
2098 -                                               rsector_sav = rsector_tmp;
2099 -                                               if (!lvm_snapshot_remap_block(&rdev_tmp,
2100 -                                                                             &rsector_tmp,
2101 -                                                                             pe_start,
2102 -                                                                             lv_ptr)) {
2103 -                                                       /* create a new mapping */
2104 -                                                       if (!(ret = lvm_snapshot_COW(rdev_tmp,
2105 -                                                                                    rsector_tmp,
2106 -                                                                                    pe_start,
2107 -                                                                                    rsector_sav,
2108 -                                                                                    lv_ptr)))
2109 -                                                               ret = lvm_write_COW_table_block(vg_this,
2110 -                                                                                               lv_ptr);
2111 -                                               }
2112 -                                               rdev_tmp = rdev_sav;
2113 -                                               rsector_tmp = rsector_sav;
2114 -                                       }
2115 -                                       up(&lv_ptr->lv_snapshot_sem);
2116 -                               }
2117 -                       }
2118 -                       up(&lv->lv_snapshot_sem);
2119 -               } else {
2120 -                       /* remap snapshot logical volume */
2121 -                       down(&lv->lv_snapshot_sem);
2122 -                       if (lv->lv_block_exception != NULL)
2123 -                               lvm_snapshot_remap_block(&rdev_tmp, &rsector_tmp, pe_start, lv);
2124 -                       up(&lv->lv_snapshot_sem);
2125 +       if (!(lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)))
2126 +               goto out;
2127 +
2128 +       if (lv->lv_access & LV_SNAPSHOT) { /* remap snapshot */
2129 +               if (lvm_snapshot_remap_block(&rdev_map, &rsector_map,
2130 +                                            pe_start, lv) < 0)
2131 +                       goto bad;
2132 +
2133 +       } else if (rw == WRITE || rw == WRITEA) { /* snapshot origin */
2134 +               lv_t *snap;
2135 +
2136 +               /* start with first snapshot and loop through all of
2137 +                  them */
2138 +               for (snap = lv->lv_snapshot_next; snap;
2139 +                    snap = snap->lv_snapshot_next) {
2140 +                       /* Check for inactive snapshot */
2141 +                       if (!(snap->lv_status & LV_ACTIVE))
2142 +                               continue;
2143 +
2144 +                       /* Serializes the COW with the accesses to the
2145 +                          snapshot device */
2146 +                       _remap_snapshot(rdev_map, rsector_map,
2147 +                                        pe_start, snap, vg_this);
2148                 }
2149         }
2150 -       bh->b_rdev = rdev_tmp;
2151 -       bh->b_rsector = rsector_tmp;
2152  
2153 -       return ret;
2154 + out:
2155 +       bh->b_rdev = rdev_map;
2156 +       bh->b_rsector = rsector_map;
2157 +       up_read(&lv->lv_lock);
2158 +       return 1;
2159 +
2160 + bad:
2161 +       buffer_IO_error(bh);
2162 +       up_read(&lv->lv_lock);
2163 +       return -1;
2164  } /* lvm_map() */
2165  
2166  
2167 @@ -1646,6 +1330,8 @@
2168  #endif
2169  
2170  
2171 +
2172 +
2173  /*
2174   * make request function
2175   */
2176 @@ -1653,11 +1339,7 @@
2177                                int rw,
2178                                struct buffer_head *bh)
2179  {
2180 -       if (lvm_map(bh, rw) >= 0)
2181 -               return 1;
2182 -
2183 -       buffer_IO_error(bh);
2184 -       return 0;
2185 +       return (lvm_map(bh, rw) <= 0) ? 0 : 1;
2186  }
2187  
2188  
2189 @@ -1674,8 +1356,7 @@
2190  lock_try_again:
2191         spin_lock(&lvm_lock);
2192         if (lock != 0 && lock != current->pid) {
2193 -               P_IOCTL("lvm_do_lock_lvm: %s is locked by pid %d ...\n",
2194 -                       lvm_name, lock);
2195 +               P_DEV("lvm_do_lock_lvm: locked by pid %d ...\n", lock);
2196                 spin_unlock(&lvm_lock);
2197                 interruptible_sleep_on(&lvm_wait);
2198                 if (current->sigpending != 0)
2199 @@ -1687,6 +1368,7 @@
2200                 goto lock_try_again;
2201         }
2202         lock = current->pid;
2203 +       P_DEV("lvm_do_lock_lvm: locking LVM for pid %d\n", lock);
2204         spin_unlock(&lvm_lock);
2205         return 0;
2206  } /* lvm_do_lock_lvm */
2207 @@ -1697,33 +1379,60 @@
2208   */
2209  static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg)
2210  {
2211 +       pe_lock_req_t new_lock;
2212 +       struct buffer_head *bh;
2213         uint p;
2214  
2215         if (vg_ptr == NULL) return -ENXIO;
2216 -       if (copy_from_user(&pe_lock_req, arg,
2217 -                          sizeof(pe_lock_req_t)) != 0) return -EFAULT;
2218 +       if (copy_from_user(&new_lock, arg, sizeof(new_lock)) != 0)
2219 +               return -EFAULT;
2220  
2221 -       switch (pe_lock_req.lock) {
2222 +       switch (new_lock.lock) {
2223         case LOCK_PE:
2224                 for (p = 0; p < vg_ptr->pv_max; p++) {
2225                         if (vg_ptr->pv[p] != NULL &&
2226 -                           pe_lock_req.data.pv_dev ==
2227 -                           vg_ptr->pv[p]->pv_dev)
2228 +                           new_lock.data.pv_dev == vg_ptr->pv[p]->pv_dev)
2229                                 break;
2230                 }
2231                 if (p == vg_ptr->pv_max) return -ENXIO;
2232  
2233 -               pe_lock_req.lock = UNLOCK_PE;
2234 +               /*
2235 +                * this sync releaves memory pressure to lessen the
2236 +                * likelyhood of pvmove being paged out - resulting in
2237 +                * deadlock.
2238 +                *
2239 +                * This method of doing a pvmove is broken
2240 +                */
2241                 fsync_dev(pe_lock_req.data.lv_dev);
2242 +
2243 +               down_write(&_pe_lock);
2244 +               if (pe_lock_req.lock == LOCK_PE) {
2245 +                       up_write(&_pe_lock);
2246 +                       return -EBUSY;
2247 +               }
2248 +
2249 +               /* Should we do to_kdev_t() on the pv_dev and lv_dev??? */
2250                 pe_lock_req.lock = LOCK_PE;
2251 +               pe_lock_req.data.lv_dev = new_lock.data.lv_dev;
2252 +               pe_lock_req.data.pv_dev = new_lock.data.pv_dev;
2253 +               pe_lock_req.data.pv_offset = new_lock.data.pv_offset;
2254 +               up_write(&_pe_lock);
2255 +
2256 +               /* some requests may have got through since the fsync */
2257 +               fsync_dev(pe_lock_req.data.pv_dev);
2258                 break;
2259  
2260         case UNLOCK_PE:
2261 +               down_write(&_pe_lock);
2262                 pe_lock_req.lock = UNLOCK_PE;
2263 -               pe_lock_req.data.lv_dev = \
2264 -               pe_lock_req.data.pv_dev = \
2265 +               pe_lock_req.data.lv_dev = 0;
2266 +               pe_lock_req.data.pv_dev = 0;
2267                 pe_lock_req.data.pv_offset = 0;
2268 -               wake_up(&lvm_map_wait);
2269 +               bh = _dequeue_io();
2270 +               up_write(&_pe_lock);
2271 +
2272 +               /* handle all deferred io for this PE */
2273 +               _flush_io(bh);
2274                 break;
2275  
2276         default:
2277 @@ -1760,6 +1469,8 @@
2278                                             le_remap_req.new_dev;
2279                                         lv_ptr->lv_current_pe[le].pe =
2280                                             le_remap_req.new_pe;
2281 +
2282 +                                       __update_hardsectsize(lv_ptr);
2283                                         return 0;
2284                                 }
2285                         }
2286 @@ -1773,7 +1484,7 @@
2287  /*
2288   * character device support function VGDA create
2289   */
2290 -int lvm_do_vg_create(int minor, void *arg)
2291 +static int lvm_do_vg_create(void *arg, int minor)
2292  {
2293         int ret = 0;
2294         ulong l, ls = 0, p, size;
2295 @@ -1781,8 +1492,6 @@
2296         vg_t *vg_ptr;
2297         lv_t **snap_lv_ptr;
2298  
2299 -       if (vg[VG_CHR(minor)] != NULL) return -EPERM;
2300 -
2301         if ((vg_ptr = kmalloc(sizeof(vg_t),GFP_KERNEL)) == NULL) {
2302                 printk(KERN_CRIT
2303                        "%s -- VG_CREATE: kmalloc error VG at line %d\n",
2304 @@ -1791,35 +1500,51 @@
2305         }
2306         /* get the volume group structure */
2307         if (copy_from_user(vg_ptr, arg, sizeof(vg_t)) != 0) {
2308 +               P_IOCTL("lvm_do_vg_create ERROR: copy VG ptr %p (%d bytes)\n",
2309 +                       arg, sizeof(vg_t));
2310                 kfree(vg_ptr);
2311                 return -EFAULT;
2312         }
2313  
2314 +       /* VG_CREATE now uses minor number in VG structure */
2315 +       if (minor == -1) minor = vg_ptr->vg_number;
2316 +
2317 +       /* Validate it */
2318 +       if (vg[VG_CHR(minor)] != NULL) {
2319 +               P_IOCTL("lvm_do_vg_create ERROR: VG %d in use\n", minor);
2320 +               kfree(vg_ptr);
2321 +               return -EPERM;
2322 +       }
2323 +
2324         /* we are not that active so far... */
2325         vg_ptr->vg_status &= ~VG_ACTIVE;
2326 -       vg[VG_CHR(minor)] = vg_ptr;
2327 -       vg[VG_CHR(minor)]->pe_allocated = 0;
2328 +       vg_ptr->pe_allocated = 0;
2329  
2330         if (vg_ptr->pv_max > ABS_MAX_PV) {
2331                 printk(KERN_WARNING
2332                        "%s -- Can't activate VG: ABS_MAX_PV too small\n",
2333                        lvm_name);
2334                 kfree(vg_ptr);
2335 -               vg[VG_CHR(minor)] = NULL;
2336                 return -EPERM;
2337         }
2338 +
2339         if (vg_ptr->lv_max > ABS_MAX_LV) {
2340                 printk(KERN_WARNING
2341                 "%s -- Can't activate VG: ABS_MAX_LV too small for %u\n",
2342                        lvm_name, vg_ptr->lv_max);
2343                 kfree(vg_ptr);
2344 -               vg_ptr = NULL;
2345                 return -EPERM;
2346         }
2347  
2348 +       /* create devfs and procfs entries */
2349 +       lvm_fs_create_vg(vg_ptr);
2350 +
2351 +       vg[VG_CHR(minor)] = vg_ptr;
2352 +
2353         /* get the physical volume structures */
2354         vg_ptr->pv_act = vg_ptr->pv_cur = 0;
2355         for (p = 0; p < vg_ptr->pv_max; p++) {
2356 +               pv_t *pvp;
2357                 /* user space address */
2358                 if ((pvp = vg_ptr->pv[p]) != NULL) {
2359                         ret = lvm_do_pv_create(pvp, vg_ptr, p);
2360 @@ -1843,9 +1568,12 @@
2361         /* get the logical volume structures */
2362         vg_ptr->lv_cur = 0;
2363         for (l = 0; l < vg_ptr->lv_max; l++) {
2364 +               lv_t *lvp;
2365                 /* user space address */
2366                 if ((lvp = vg_ptr->lv[l]) != NULL) {
2367                         if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
2368 +                               P_IOCTL("ERROR: copying LV ptr %p (%d bytes)\n",
2369 +                                       lvp, sizeof(lv_t));
2370                                 lvm_do_vg_remove(minor);
2371                                 return -EFAULT;
2372                         }
2373 @@ -1864,12 +1592,10 @@
2374                 }
2375         }
2376  
2377 -       lvm_do_create_devfs_entry_of_vg ( vg_ptr);
2378 -
2379         /* Second path to correct snapshot logical volumes which are not
2380            in place during first path above */
2381         for (l = 0; l < ls; l++) {
2382 -               lvp = snap_lv_ptr[l];
2383 +               lv_t *lvp = snap_lv_ptr[l];
2384                 if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) {
2385                         lvm_do_vg_remove(minor);
2386                         return -EFAULT;
2387 @@ -1880,8 +1606,6 @@
2388                 }
2389         }
2390  
2391 -       lvm_do_create_proc_entry_of_vg ( vg_ptr);
2392 -
2393         vfree(snap_lv_ptr);
2394  
2395         vg_count++;
2396 @@ -1913,7 +1637,6 @@
2397                                 if ( ret != 0) return ret;
2398                                 pv_ptr = vg_ptr->pv[p];
2399                                 vg_ptr->pe_total += pv_ptr->pe_total;
2400 -                               lvm_do_create_proc_entry_of_pv(vg_ptr, pv_ptr);
2401                                 return 0;
2402                         }
2403                 }
2404 @@ -1963,10 +1686,13 @@
2405         lv_t *lv_ptr = NULL;
2406         pv_t *pv_ptr = NULL;
2407  
2408 +       /* If the VG doesn't exist in the kernel then just exit */
2409 +       if (!vg_ptr) return 0;
2410 +
2411         if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0)
2412                 return -EFAULT;
2413  
2414 -       lvm_do_remove_proc_entry_of_vg ( vg_ptr);
2415 +       lvm_fs_remove_vg(vg_ptr);
2416  
2417         strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1);
2418         for ( l = 0; l < vg_ptr->lv_max; l++)
2419 @@ -1988,7 +1714,7 @@
2420                 strncpy(pv_ptr->vg_name, vg_name, NAME_LEN);
2421         }
2422  
2423 -       lvm_do_create_proc_entry_of_vg ( vg_ptr);
2424 +       lvm_fs_create_vg(vg_ptr);
2425  
2426         return 0;
2427  } /* lvm_do_vg_rename */
2428 @@ -2015,6 +1741,9 @@
2429         /* let's go inactive */
2430         vg_ptr->vg_status &= ~VG_ACTIVE;
2431  
2432 +       /* remove from procfs and devfs */
2433 +       lvm_fs_remove_vg(vg_ptr);
2434 +
2435         /* free LVs */
2436         /* first free snapshot logical volumes */
2437         for (i = 0; i < vg_ptr->lv_max; i++) {
2438 @@ -2042,11 +1771,6 @@
2439                 }
2440         }
2441  
2442 -       devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]);
2443 -       devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]);
2444 -
2445 -       lvm_do_remove_proc_entry_of_vg ( vg_ptr);
2446 -
2447         P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2448         kfree(vg_ptr);
2449         vg[VG_CHR(minor)] = NULL;
2450 @@ -2063,66 +1787,138 @@
2451   * character device support function physical volume create
2452   */
2453  static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) {
2454 -       pv_t *pv_ptr = NULL;
2455 +       pv_t *pv;
2456 +       int err;
2457  
2458 -       pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL);
2459 -       if (pv_ptr == NULL) {
2460 +       pv = kmalloc(sizeof(pv_t),GFP_KERNEL);
2461 +       if (pv == NULL) {
2462                 printk(KERN_CRIT
2463 -                      "%s -- VG_CREATE: kmalloc error PV at line %d\n",
2464 +                      "%s -- PV_CREATE: kmalloc error PV at line %d\n",
2465                        lvm_name, __LINE__);
2466                 return -ENOMEM;
2467         }
2468 -       if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) {
2469 +
2470 +       memset(pv, 0, sizeof(*pv));
2471 +
2472 +       if (copy_from_user(pv, pvp, sizeof(pv_t)) != 0) {
2473 +               P_IOCTL("lvm_do_pv_create ERROR: copy PV ptr %p (%d bytes)\n",
2474 +                       pvp, sizeof(pv_t));
2475 +               kfree(pv);
2476                 return -EFAULT;
2477         }
2478 +
2479 +       if ((err = _open_pv(pv))) {
2480 +               kfree(pv);
2481 +               return err;
2482 +       }
2483 +
2484         /* We don't need the PE list
2485            in kernel space as with LVs pe_t list (see below) */
2486 -       pv_ptr->pe = NULL;
2487 -       pv_ptr->pe_allocated = 0;
2488 -       pv_ptr->pv_status = PV_ACTIVE;
2489 +       pv->pe = NULL;
2490 +       pv->pe_allocated = 0;
2491 +       pv->pv_status = PV_ACTIVE;
2492         vg_ptr->pv_act++;
2493         vg_ptr->pv_cur++;
2494 +       lvm_fs_create_pv(vg_ptr, pv);
2495  
2496 +       vg_ptr->pv[p] = pv;
2497         return 0;
2498  } /* lvm_do_pv_create() */
2499  
2500  
2501  /*
2502 - * character device support function physical volume create
2503 + * character device support function physical volume remove
2504   */
2505  static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) {
2506 -       pv_t *pv_ptr = vg_ptr->pv[p];
2507 +       pv_t *pv = vg_ptr->pv[p];
2508  
2509 -       lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr);
2510 -       vg_ptr->pe_total -= pv_ptr->pe_total;
2511 +       lvm_fs_remove_pv(vg_ptr, pv);
2512 +
2513 +       vg_ptr->pe_total -= pv->pe_total;
2514         vg_ptr->pv_cur--;
2515         vg_ptr->pv_act--;
2516 -#ifdef LVM_GET_INODE
2517 -       lvm_clear_inode(pv_ptr->inode);
2518 -#endif
2519 -       kfree(pv_ptr);
2520 +
2521 +       _close_pv(pv);
2522 +       kfree(pv);
2523 +
2524         vg_ptr->pv[p] = NULL;
2525  
2526         return 0;
2527  }
2528  
2529  
2530 +static void __update_hardsectsize(lv_t *lv)
2531 +{
2532 +       int max_hardsectsize = 0, hardsectsize = 0;
2533 +       int p;
2534 +
2535 +       /* Check PVs first to see if they all have same sector size */
2536 +       for (p = 0; p < lv->vg->pv_cur; p++) {
2537 +               pv_t *pv = lv->vg->pv[p];
2538 +               if (pv && (hardsectsize = lvm_sectsize(pv->pv_dev))) {
2539 +                       if (max_hardsectsize == 0)
2540 +                               max_hardsectsize = hardsectsize;
2541 +                       else if (hardsectsize != max_hardsectsize) {
2542 +                               P_DEV("%s PV[%d] (%s) sector size %d, not %d\n",
2543 +                                     lv->lv_name, p, kdevname(pv->pv_dev),
2544 +                                     hardsectsize, max_hardsectsize);
2545 +                               break;
2546 +                       }
2547 +               }
2548 +       }
2549 +
2550 +       /* PVs have different block size, need to check each LE sector size */
2551 +       if (hardsectsize != max_hardsectsize) {
2552 +               int le;
2553 +               for (le = 0; le < lv->lv_allocated_le; le++) {
2554 +                       hardsectsize = lvm_sectsize(lv->lv_current_pe[le].dev);
2555 +                       if (hardsectsize > max_hardsectsize) {
2556 +                               P_DEV("%s LE[%d] (%s) blocksize %d not %d\n",
2557 +                                     lv->lv_name, le,
2558 +                                     kdevname(lv->lv_current_pe[le].dev),
2559 +                                     hardsectsize, max_hardsectsize);
2560 +                               max_hardsectsize = hardsectsize;
2561 +                       }
2562 +               }
2563 +
2564 +               /* only perform this operation on active snapshots */
2565 +               if ((lv->lv_access & LV_SNAPSHOT) &&
2566 +                   (lv->lv_status & LV_ACTIVE)) {
2567 +                       int e;
2568 +                       for (e = 0; e < lv->lv_remap_end; e++) {
2569 +                               hardsectsize = lvm_sectsize(lv->lv_block_exception[e].rdev_new);
2570 +                               if (hardsectsize > max_hardsectsize)
2571 +                                       max_hardsectsize = hardsectsize;
2572 +                       }
2573 +               }
2574 +       }
2575 +
2576 +       if (max_hardsectsize == 0)
2577 +               max_hardsectsize = SECTOR_SIZE;
2578 +       P_DEV("hardblocksize for LV %s is %d\n",
2579 +             kdevname(lv->lv_dev), max_hardsectsize);
2580 +       lvm_hardsectsizes[MINOR(lv->lv_dev)] = max_hardsectsize;
2581 +}
2582 +
2583  /*
2584   * character device support function logical volume create
2585   */
2586  static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv)
2587  {
2588 -       int e, ret, l, le, l_new, p, size;
2589 +       int e, ret, l, le, l_new, p, size, activate = 1;
2590         ulong lv_status_save;
2591         lv_block_exception_t *lvbe = lv->lv_block_exception;
2592         vg_t *vg_ptr = vg[VG_CHR(minor)];
2593         lv_t *lv_ptr = NULL;
2594 +       pe_t *pep;
2595  
2596 -       if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
2597 -       if (lv->lv_chunk_size > LVM_SNAPSHOT_MAX_CHUNK)
2598 +       if (!(pep = lv->lv_current_pe))
2599                 return -EINVAL;
2600  
2601 -       for (l = 0; l < vg_ptr->lv_max; l++) {
2602 +       if (_sectors_to_k(lv->lv_chunk_size) > LVM_SNAPSHOT_MAX_CHUNK)
2603 +               return -EINVAL;
2604 +
2605 +       for (l = 0; l < vg_ptr->lv_cur; l++) {
2606                 if (vg_ptr->lv[l] != NULL &&
2607                     strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2608                         return -EEXIST;
2609 @@ -2151,23 +1947,26 @@
2610  
2611         lv_status_save = lv_ptr->lv_status;
2612         lv_ptr->lv_status &= ~LV_ACTIVE;
2613 -       lv_ptr->lv_snapshot_org = \
2614 -       lv_ptr->lv_snapshot_prev = \
2615 +       lv_ptr->lv_snapshot_org = NULL;
2616 +       lv_ptr->lv_snapshot_prev = NULL;
2617         lv_ptr->lv_snapshot_next = NULL;
2618         lv_ptr->lv_block_exception = NULL;
2619         lv_ptr->lv_iobuf = NULL;
2620 +       lv_ptr->lv_COW_table_iobuf = NULL;
2621         lv_ptr->lv_snapshot_hash_table = NULL;
2622         lv_ptr->lv_snapshot_hash_table_size = 0;
2623         lv_ptr->lv_snapshot_hash_mask = 0;
2624 -       lv_ptr->lv_COW_table_page = NULL;
2625 -       init_MUTEX(&lv_ptr->lv_snapshot_sem);
2626 +       init_rwsem(&lv_ptr->lv_lock);
2627 +
2628         lv_ptr->lv_snapshot_use_rate = 0;
2629 +
2630         vg_ptr->lv[l] = lv_ptr;
2631  
2632         /* get the PE structures from user space if this
2633 -          is no snapshot logical volume */
2634 +          is not a snapshot logical volume */
2635         if (!(lv_ptr->lv_access & LV_SNAPSHOT)) {
2636                 size = lv_ptr->lv_allocated_le * sizeof(pe_t);
2637 +
2638                 if ((lv_ptr->lv_current_pe = vmalloc(size)) == NULL) {
2639                         printk(KERN_CRIT
2640                                "%s -- LV_CREATE: vmalloc error LV_CURRENT_PE of %d Byte "
2641 @@ -2179,6 +1978,8 @@
2642                         return -ENOMEM;
2643                 }
2644                 if (copy_from_user(lv_ptr->lv_current_pe, pep, size)) {
2645 +                       P_IOCTL("ERROR: copying PE ptr %p (%d bytes)\n",
2646 +                               pep, sizeof(size));
2647                         vfree(lv_ptr->lv_current_pe);
2648                         kfree(lv_ptr);
2649                         vg_ptr->lv[l] = NULL;
2650 @@ -2200,6 +2001,15 @@
2651                             vg_ptr->lv[LV_BLK(lv_ptr->lv_snapshot_minor)];
2652                         if (lv_ptr->lv_snapshot_org != NULL) {
2653                                 size = lv_ptr->lv_remap_end * sizeof(lv_block_exception_t);
2654 +
2655 +                               if (!size) {
2656 +                                       printk(KERN_WARNING
2657 +                                              "%s -- zero length exception table requested\n",
2658 +                                              lvm_name);
2659 +                                       kfree(lv_ptr);
2660 +                                       return -EINVAL;
2661 +                               }
2662 +
2663                                 if ((lv_ptr->lv_block_exception = vmalloc(size)) == NULL) {
2664                                         printk(KERN_CRIT
2665                                                "%s -- lvm_do_lv_create: vmalloc error LV_BLOCK_EXCEPTION "
2666 @@ -2217,6 +2027,16 @@
2667                                         vg_ptr->lv[l] = NULL;
2668                                         return -EFAULT;
2669                                 }
2670 +
2671 +                               if(lv_ptr->lv_block_exception[0].rsector_org ==
2672 +                                  LVM_SNAPSHOT_DROPPED_SECTOR)
2673 +                               {
2674 +                                       printk(KERN_WARNING
2675 +   "%s -- lvm_do_lv_create: snapshot has been dropped and will not be activated\n",
2676 +                                              lvm_name);
2677 +                                       activate = 0;
2678 +                               }
2679 +
2680                                 /* point to the original logical volume */
2681                                 lv_ptr = lv_ptr->lv_snapshot_org;
2682  
2683 @@ -2250,10 +2070,13 @@
2684                                                        lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2685                                 /* need to fill the COW exception table data
2686                                    into the page for disk i/o */
2687 -                               lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr);
2688 +                               if(lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr)) {
2689 +                                       kfree(lv_ptr);
2690 +                                       vg_ptr->lv[l] = NULL;
2691 +                                       return -EINVAL;
2692 +                               }
2693                                 init_waitqueue_head(&lv_ptr->lv_snapshot_wait);
2694                         } else {
2695 -                               vfree(lv_ptr->lv_block_exception);
2696                                 kfree(lv_ptr);
2697                                 vg_ptr->lv[l] = NULL;
2698                                 return -EFAULT;
2699 @@ -2274,22 +2097,9 @@
2700         LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
2701         vg_ptr->lv_cur++;
2702         lv_ptr->lv_status = lv_status_save;
2703 +       lv_ptr->vg = vg_ptr;
2704  
2705 -       {
2706 -       char *lv_tmp, *lv_buf = lv->lv_name;
2707 -
2708 -       strtok(lv->lv_name, "/");       /* /dev */
2709 -       while((lv_tmp = strtok(NULL, "/")) != NULL)
2710 -               lv_buf = lv_tmp;
2711 -
2712 -       lv_devfs_handle[lv->lv_number] = devfs_register(
2713 -               vg_devfs_handle[vg_ptr->vg_number], lv_buf,
2714 -               DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number,
2715 -               S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
2716 -               &lvm_blk_dops, NULL);
2717 -       }
2718 -
2719 -       lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
2720 +       __update_hardsectsize(lv_ptr);
2721  
2722         /* optionally add our new snapshot LV */
2723         if (lv_ptr->lv_access & LV_SNAPSHOT) {
2724 @@ -2302,19 +2112,24 @@
2725                 fsync_dev_lockfs(org->lv_dev);
2726  #endif
2727  
2728 -               down(&org->lv_snapshot_sem);
2729 +               down_write(&org->lv_lock);
2730                 org->lv_access |= LV_SNAPSHOT_ORG;
2731                 lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* this can only hide an userspace bug */
2732  
2733 +
2734                 /* Link in the list of snapshot volumes */
2735                 for (last = org; last->lv_snapshot_next; last = last->lv_snapshot_next);
2736                 lv_ptr->lv_snapshot_prev = last;
2737                 last->lv_snapshot_next = lv_ptr;
2738 -               up(&org->lv_snapshot_sem);
2739 +               up_write(&org->lv_lock);
2740         }
2741  
2742         /* activate the logical volume */
2743 -       lv_ptr->lv_status |= LV_ACTIVE;
2744 +       if(activate)
2745 +               lv_ptr->lv_status |= LV_ACTIVE;
2746 +       else
2747 +               lv_ptr->lv_status &= ~LV_ACTIVE;
2748 +
2749         if ( lv_ptr->lv_access & LV_WRITE)
2750                 set_device_ro(lv_ptr->lv_dev, 0);
2751         else
2752 @@ -2322,13 +2137,12 @@
2753  
2754  #ifdef LVM_VFS_ENHANCEMENT
2755  /* VFS function call to unlock the filesystem */
2756 -       if (lv_ptr->lv_access & LV_SNAPSHOT) {
2757 +       if (lv_ptr->lv_access & LV_SNAPSHOT)
2758                 unlockfs(lv_ptr->lv_snapshot_org->lv_dev);
2759 -       }
2760  #endif
2761  
2762 -       lv_ptr->vg = vg_ptr;
2763 -
2764 +       lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de =
2765 +           lvm_fs_create_lv(vg_ptr, lv_ptr);
2766         return 0;
2767  } /* lvm_do_lv_create() */
2768  
2769 @@ -2366,13 +2180,15 @@
2770             lv_ptr->lv_snapshot_next != NULL)
2771                 return -EPERM;
2772  
2773 +       lvm_fs_remove_lv(vg_ptr, lv_ptr);
2774 +
2775         if (lv_ptr->lv_access & LV_SNAPSHOT) {
2776                 /*
2777                  * Atomically make the the snapshot invisible
2778                  * to the original lv before playing with it.
2779                  */
2780                 lv_t * org = lv_ptr->lv_snapshot_org;
2781 -               down(&org->lv_snapshot_sem);
2782 +               down_write(&org->lv_lock);
2783  
2784                 /* remove this snapshot logical volume from the chain */
2785                 lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2786 @@ -2380,11 +2196,13 @@
2787                         lv_ptr->lv_snapshot_next->lv_snapshot_prev =
2788                             lv_ptr->lv_snapshot_prev;
2789                 }
2790 -               up(&org->lv_snapshot_sem);
2791  
2792                 /* no more snapshots? */
2793 -               if (!org->lv_snapshot_next)
2794 +               if (!org->lv_snapshot_next) {
2795                         org->lv_access &= ~LV_SNAPSHOT_ORG;
2796 +               }
2797 +               up_write(&org->lv_lock);
2798 +
2799                 lvm_snapshot_release(lv_ptr);
2800  
2801                 /* Update the VG PE(s) used by snapshot reserve space. */
2802 @@ -2404,6 +2222,7 @@
2803         /* reset generic hd */
2804         lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = -1;
2805         lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = 0;
2806 +       lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].de = 0;
2807         lvm_size[MINOR(lv_ptr->lv_dev)] = 0;
2808  
2809         /* reset VG/LV mapping */
2810 @@ -2427,10 +2246,6 @@
2811                 vfree(lv_ptr->lv_current_pe);
2812         }
2813  
2814 -       devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]);
2815 -
2816 -       lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr);
2817 -
2818         P_KFREE("%s -- kfree %d\n", lvm_name, __LINE__);
2819         kfree(lv_ptr);
2820         vg_ptr->lv[l] = NULL;
2821 @@ -2440,204 +2255,215 @@
2822  
2823  
2824  /*
2825 - * character device support function logical volume extend / reduce
2826 + * logical volume extend / reduce
2827   */
2828 -static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv)
2829 -{
2830 -       ulong end, l, le, p, size, old_allocated_le;
2831 -       vg_t *vg_ptr = vg[VG_CHR(minor)];
2832 -       lv_t *lv_ptr;
2833 -       pe_t *pe;
2834 +static int __extend_reduce_snapshot(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2835 +       ulong size;
2836 +       lv_block_exception_t *lvbe;
2837  
2838 -       if ((pep = lv->lv_current_pe) == NULL) return -EINVAL;
2839 +       if (!new_lv->lv_block_exception)
2840 +               return -ENXIO;
2841  
2842 -       for (l = 0; l < vg_ptr->lv_max; l++) {
2843 -               if (vg_ptr->lv[l] != NULL &&
2844 -                   strcmp(vg_ptr->lv[l]->lv_name, lv_name) == 0)
2845 -                       break;
2846 +       size = new_lv->lv_remap_end * sizeof(lv_block_exception_t);
2847 +       if ((lvbe = vmalloc(size)) == NULL) {
2848 +               printk(KERN_CRIT
2849 +                      "%s -- lvm_do_lv_extend_reduce: vmalloc "
2850 +                      "error LV_BLOCK_EXCEPTION of %lu Byte at line %d\n",
2851 +                      lvm_name, size, __LINE__);
2852 +               return -ENOMEM;
2853         }
2854 -       if (l == vg_ptr->lv_max) return -ENXIO;
2855 -       lv_ptr = vg_ptr->lv[l];
2856  
2857 -       /* check for active snapshot */
2858 -       if (lv->lv_access & LV_SNAPSHOT)
2859 -       {
2860 -               ulong e;
2861 -               lv_block_exception_t *lvbe, *lvbe_old;
2862 -               struct list_head * lvs_hash_table_old;
2863 -
2864 -               if (lv->lv_block_exception == NULL) return -ENXIO;
2865 -               size = lv->lv_remap_end * sizeof ( lv_block_exception_t);
2866 -               if ((lvbe = vmalloc(size)) == NULL)
2867 -               {
2868 -                       printk(KERN_CRIT
2869 -                       "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION "
2870 -                              "of %lu Byte at line %d\n",
2871 -                              lvm_name, size, __LINE__);
2872 -                       return -ENOMEM;
2873 -               }
2874 -               if (lv->lv_remap_end > lv_ptr->lv_remap_end)
2875 -               {
2876 -                       if (copy_from_user(lvbe, lv->lv_block_exception, size))
2877 -                       {
2878 -                               vfree(lvbe);
2879 -                               return -EFAULT;
2880 -                       }
2881 -               }
2882 -
2883 -               lvbe_old = lv_ptr->lv_block_exception;
2884 -               lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table;
2885 -
2886 -               /* we need to play on the safe side here... */
2887 -               down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2888 -               if (lv_ptr->lv_block_exception == NULL ||
2889 -                   lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end)
2890 -               {
2891 -                       up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2892 -                       vfree(lvbe);
2893 -                       return -EPERM;
2894 -               }
2895 -               memcpy(lvbe,
2896 -                      lv_ptr->lv_block_exception,
2897 -                      (lv->lv_remap_end > lv_ptr->lv_remap_end ?
2898 -                       lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t));
2899 -
2900 -               lv_ptr->lv_block_exception = lvbe;
2901 -               lv_ptr->lv_remap_end = lv->lv_remap_end;
2902 -               if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0)
2903 -               {
2904 -                       lvm_drop_snapshot(lv_ptr, "no memory for hash table");
2905 -                       up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2906 -                       vfree(lvbe_old);
2907 -                       vfree(lvs_hash_table_old);
2908 -                       return -ENOMEM;
2909 -               }
2910 -
2911 -               for (e = 0; e < lv_ptr->lv_remap_ptr; e++)
2912 -                       lvm_hash_link (lv_ptr->lv_block_exception + e,
2913 -                                      lv_ptr->lv_block_exception[e].rdev_org,
2914 -                                      lv_ptr->lv_block_exception[e].rsector_org, lv_ptr);
2915 -
2916 -               up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem);
2917 -
2918 -               vfree(lvbe_old);
2919 -               vfree(lvs_hash_table_old);
2920 +       if ((new_lv->lv_remap_end > old_lv->lv_remap_end) &&
2921 +           (copy_from_user(lvbe, new_lv->lv_block_exception, size))) {
2922 +               vfree(lvbe);
2923 +               return -EFAULT;
2924 +       }
2925 +       new_lv->lv_block_exception = lvbe;
2926  
2927 -               return 0;
2928 +       if (lvm_snapshot_alloc_hash_table(new_lv)) {
2929 +               vfree(new_lv->lv_block_exception);
2930 +               return -ENOMEM;
2931         }
2932  
2933 +       return 0;
2934 +}
2935  
2936 -       /* we drop in here in case it is an original logical volume */
2937 -       if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) {
2938 +static int __extend_reduce(vg_t *vg_ptr, lv_t *old_lv, lv_t *new_lv) {
2939 +       ulong size, l, p, end;
2940 +       pe_t *pe;
2941 +
2942 +       /* allocate space for new pe structures */
2943 +       size = new_lv->lv_current_le * sizeof(pe_t);
2944 +       if ((pe = vmalloc(size)) == NULL) {
2945                 printk(KERN_CRIT
2946 -               "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE "
2947 -                      "of %lu Byte at line %d\n",
2948 +                      "%s -- lvm_do_lv_extend_reduce: "
2949 +                      "vmalloc error LV_CURRENT_PE of %lu Byte at line %d\n",
2950                        lvm_name, size, __LINE__);
2951                 return -ENOMEM;
2952         }
2953 +
2954         /* get the PE structures from user space */
2955 -       if (copy_from_user(pe, pep, size)) {
2956 +       if (copy_from_user(pe, new_lv->lv_current_pe, size)) {
2957 +               if(old_lv->lv_access & LV_SNAPSHOT)
2958 +                       vfree(new_lv->lv_snapshot_hash_table);
2959                 vfree(pe);
2960                 return -EFAULT;
2961         }
2962  
2963 +       new_lv->lv_current_pe = pe;
2964 +
2965         /* reduce allocation counters on PV(s) */
2966 -       for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
2967 +       for (l = 0; l < old_lv->lv_allocated_le; l++) {
2968                 vg_ptr->pe_allocated--;
2969                 for (p = 0; p < vg_ptr->pv_cur; p++) {
2970                         if (vg_ptr->pv[p]->pv_dev ==
2971 -                       lv_ptr->lv_current_pe[le].dev) {
2972 +                           old_lv->lv_current_pe[l].dev) {
2973                                 vg_ptr->pv[p]->pe_allocated--;
2974                                 break;
2975                         }
2976                 }
2977         }
2978  
2979 -
2980 -       /* save pointer to "old" lv/pe pointer array */
2981 -       pep1 = lv_ptr->lv_current_pe;
2982 -       end = lv_ptr->lv_current_le;
2983 -
2984 -       /* save open counter... */
2985 -       lv->lv_open = lv_ptr->lv_open;
2986 -       lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev;
2987 -       lv->lv_snapshot_next = lv_ptr->lv_snapshot_next;
2988 -       lv->lv_snapshot_org  = lv_ptr->lv_snapshot_org;
2989 -
2990 -       lv->lv_current_pe = pe;
2991 -
2992 -       /* save # of old allocated logical extents */
2993 -       old_allocated_le = lv_ptr->lv_allocated_le;
2994 -
2995 -       /* copy preloaded LV */
2996 -       memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t));
2997 -
2998 -       lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0;
2999 -       lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
3000 -       lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
3001 -       /* vg_lv_map array doesn't have to be changed here */
3002 -
3003 -       LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead);
3004 +       /* extend the PE count in PVs */
3005 +       for (l = 0; l < new_lv->lv_allocated_le; l++) {
3006 +               vg_ptr->pe_allocated++;
3007 +               for (p = 0; p < vg_ptr->pv_cur; p++) {
3008 +                       if (vg_ptr->pv[p]->pv_dev ==
3009 +                            new_lv->lv_current_pe[l].dev) {
3010 +                               vg_ptr->pv[p]->pe_allocated++;
3011 +                               break;
3012 +                       }
3013 +               }
3014 +       }
3015  
3016         /* save availiable i/o statistic data */
3017 -       /* linear logical volume */
3018 -       if (lv_ptr->lv_stripes < 2) {
3019 -               /* Check what last LE shall be used */
3020 -               if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le;
3021 -               for (le = 0; le < end; le++) {
3022 -                       lv_ptr->lv_current_pe[le].reads  += pep1[le].reads;
3023 -                       lv_ptr->lv_current_pe[le].writes += pep1[le].writes;
3024 +       if (old_lv->lv_stripes < 2) {   /* linear logical volume */
3025 +               end = min(old_lv->lv_current_le, new_lv->lv_current_le);
3026 +               for (l = 0; l < end; l++) {
3027 +                       new_lv->lv_current_pe[l].reads +=
3028 +                               old_lv->lv_current_pe[l].reads;
3029 +
3030 +                       new_lv->lv_current_pe[l].writes +=
3031 +                               old_lv->lv_current_pe[l].writes;
3032                 }
3033 -               /* striped logical volume */
3034 -       } else {
3035 +
3036 +       } else {                /* striped logical volume */
3037                 uint i, j, source, dest, end, old_stripe_size, new_stripe_size;
3038  
3039 -               old_stripe_size = old_allocated_le / lv_ptr->lv_stripes;
3040 -               new_stripe_size = lv_ptr->lv_allocated_le / lv_ptr->lv_stripes;
3041 -               end = old_stripe_size;
3042 -               if (end > new_stripe_size) end = new_stripe_size;
3043 -               for (i = source = dest = 0;
3044 -                    i < lv_ptr->lv_stripes; i++) {
3045 +               old_stripe_size = old_lv->lv_allocated_le / old_lv->lv_stripes;
3046 +               new_stripe_size = new_lv->lv_allocated_le / new_lv->lv_stripes;
3047 +               end = min(old_stripe_size, new_stripe_size);
3048 +
3049 +               for (i = source = dest = 0; i < new_lv->lv_stripes; i++) {
3050                         for (j = 0; j < end; j++) {
3051 -                               lv_ptr->lv_current_pe[dest + j].reads +=
3052 -                                   pep1[source + j].reads;
3053 -                               lv_ptr->lv_current_pe[dest + j].writes +=
3054 -                                   pep1[source + j].writes;
3055 +                               new_lv->lv_current_pe[dest + j].reads +=
3056 +                                   old_lv->lv_current_pe[source + j].reads;
3057 +                               new_lv->lv_current_pe[dest + j].writes +=
3058 +                                   old_lv->lv_current_pe[source + j].writes;
3059                         }
3060                         source += old_stripe_size;
3061                         dest += new_stripe_size;
3062                 }
3063         }
3064  
3065 -       /* extend the PE count in PVs */
3066 -       for (le = 0; le < lv_ptr->lv_allocated_le; le++) {
3067 -               vg_ptr->pe_allocated++;
3068 -               for (p = 0; p < vg_ptr->pv_cur; p++) {
3069 -                       if (vg_ptr->pv[p]->pv_dev ==
3070 -                            lv_ptr->lv_current_pe[le].dev) {
3071 -                               vg_ptr->pv[p]->pe_allocated++;
3072 -                               break;
3073 -                       }
3074 -               }
3075 -       }
3076 +       return 0;
3077 +}
3078 +
3079 +static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *new_lv)
3080 +{
3081 +       int r;
3082 +       ulong l, e, size;
3083 +       vg_t *vg_ptr = vg[VG_CHR(minor)];
3084 +       lv_t *old_lv;
3085 +       pe_t *pe;
3086  
3087 -       vfree ( pep1);
3088 -       pep1 = NULL;
3089 +       if ((pe = new_lv->lv_current_pe) == NULL)
3090 +               return -EINVAL;
3091  
3092 -       if (lv->lv_access & LV_SNAPSHOT_ORG)
3093 -       {
3094 -               /* Correct the snapshot size information */
3095 -               while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL)
3096 -               {
3097 -                       lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe;
3098 -                       lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le;
3099 -                       lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le;
3100 -                       lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size;
3101 -                       lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size;
3102 -                       lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1;
3103 +       for (l = 0; l < vg_ptr->lv_max; l++)
3104 +               if (vg_ptr->lv[l] && !strcmp(vg_ptr->lv[l]->lv_name, lv_name))
3105 +                       break;
3106 +
3107 +       if (l == vg_ptr->lv_max)
3108 +               return -ENXIO;
3109 +
3110 +       old_lv = vg_ptr->lv[l];
3111 +
3112 +       if (old_lv->lv_access & LV_SNAPSHOT) {
3113 +               /* only perform this operation on active snapshots */
3114 +               if (old_lv->lv_status & LV_ACTIVE)
3115 +                       r = __extend_reduce_snapshot(vg_ptr, old_lv, new_lv);
3116 +               else
3117 +                       r = -EPERM;
3118 +
3119 +       } else
3120 +               r = __extend_reduce(vg_ptr, old_lv, new_lv);
3121 +
3122 +       if(r)
3123 +               return r;
3124 +
3125 +       /* copy relevent fields */
3126 +       down_write(&old_lv->lv_lock);
3127 +
3128 +       if(new_lv->lv_access & LV_SNAPSHOT) {
3129 +               size = (new_lv->lv_remap_end > old_lv->lv_remap_end) ?
3130 +                       old_lv->lv_remap_ptr : new_lv->lv_remap_end;
3131 +               size *= sizeof(lv_block_exception_t);
3132 +               memcpy(new_lv->lv_block_exception,
3133 +                      old_lv->lv_block_exception, size);
3134 +
3135 +               old_lv->lv_remap_end = new_lv->lv_remap_end;
3136 +               old_lv->lv_block_exception = new_lv->lv_block_exception;
3137 +               old_lv->lv_snapshot_hash_table =
3138 +                       new_lv->lv_snapshot_hash_table;
3139 +               old_lv->lv_snapshot_hash_table_size =
3140 +                       new_lv->lv_snapshot_hash_table_size;
3141 +               old_lv->lv_snapshot_hash_mask =
3142 +                       new_lv->lv_snapshot_hash_mask;
3143 +
3144 +               for (e = 0; e < new_lv->lv_remap_ptr; e++)
3145 +                       lvm_hash_link(new_lv->lv_block_exception + e,
3146 +                                     new_lv->lv_block_exception[e].rdev_org,
3147 +                                     new_lv->lv_block_exception[e].rsector_org,
3148 +                                     new_lv);
3149 +
3150 +       } else {
3151 +
3152 +               vfree(old_lv->lv_current_pe);
3153 +               vfree(old_lv->lv_snapshot_hash_table);
3154 +
3155 +               old_lv->lv_size = new_lv->lv_size;
3156 +               old_lv->lv_allocated_le = new_lv->lv_allocated_le;
3157 +               old_lv->lv_current_le = new_lv->lv_current_le;
3158 +               old_lv->lv_current_pe = new_lv->lv_current_pe;
3159 +               lvm_gendisk.part[MINOR(old_lv->lv_dev)].nr_sects =
3160 +                       old_lv->lv_size;
3161 +               lvm_size[MINOR(old_lv->lv_dev)] = old_lv->lv_size >> 1;
3162 +
3163 +               if (old_lv->lv_access & LV_SNAPSHOT_ORG) {
3164 +                       lv_t *snap;
3165 +                       for(snap = old_lv->lv_snapshot_next; snap;
3166 +                           snap = snap->lv_snapshot_next) {
3167 +                               down_write(&snap->lv_lock);
3168 +                               snap->lv_current_pe = old_lv->lv_current_pe;
3169 +                               snap->lv_allocated_le =
3170 +                                       old_lv->lv_allocated_le;
3171 +                               snap->lv_current_le = old_lv->lv_current_le;
3172 +                               snap->lv_size = old_lv->lv_size;
3173 +
3174 +                               lvm_gendisk.part[MINOR(snap->lv_dev)].nr_sects
3175 +                                       = old_lv->lv_size;
3176 +                               lvm_size[MINOR(snap->lv_dev)] =
3177 +                                       old_lv->lv_size >> 1;
3178 +                               __update_hardsectsize(snap);
3179 +                               up_write(&snap->lv_lock);
3180 +                       }
3181                 }
3182         }
3183  
3184 +       __update_hardsectsize(old_lv);
3185 +       up_write(&old_lv->lv_lock);
3186 +
3187         return 0;
3188  } /* lvm_do_lv_extend_reduce() */
3189  
3190 @@ -2648,10 +2474,10 @@
3191  static int lvm_do_lv_status_byname(vg_t *vg_ptr, void *arg)
3192  {
3193         uint l;
3194 -       ulong size;
3195 -       lv_t lv;
3196 -       lv_t *lv_ptr;
3197         lv_status_byname_req_t lv_status_byname_req;
3198 +       void *saved_ptr1;
3199 +       void *saved_ptr2;
3200 +       lv_t *lv_ptr;
3201  
3202         if (vg_ptr == NULL) return -ENXIO;
3203         if (copy_from_user(&lv_status_byname_req, arg,
3204 @@ -2659,28 +2485,30 @@
3205                 return -EFAULT;
3206  
3207         if (lv_status_byname_req.lv == NULL) return -EINVAL;
3208 -       if (copy_from_user(&lv, lv_status_byname_req.lv,
3209 -                          sizeof(lv_t)) != 0)
3210 -               return -EFAULT;
3211  
3212         for (l = 0; l < vg_ptr->lv_max; l++) {
3213 -               lv_ptr = vg_ptr->lv[l];
3214 -               if (lv_ptr != NULL &&
3215 +               if ((lv_ptr = vg_ptr->lv[l]) != NULL &&
3216                     strcmp(lv_ptr->lv_name,
3217 -                           lv_status_byname_req.lv_name) == 0) {
3218 -                       if (copy_to_user(lv_status_byname_req.lv,
3219 +                          lv_status_byname_req.lv_name) == 0) {
3220 +                       /* Save usermode pointers */
3221 +                       if (copy_from_user(&saved_ptr1, &lv_status_byname_req.lv->lv_current_pe, sizeof(void*)) != 0)
3222 +                               return -EFAULT;
3223 +                       if (copy_from_user(&saved_ptr2, &lv_status_byname_req.lv->lv_block_exception, sizeof(void*)) != 0)
3224 +                               return -EFAULT;
3225 +                       if (copy_to_user(lv_status_byname_req.lv,
3226                                          lv_ptr,
3227                                          sizeof(lv_t)) != 0)
3228                                 return -EFAULT;
3229 -
3230 -                       if (lv.lv_current_pe != NULL) {
3231 -                               size = lv_ptr->lv_allocated_le *
3232 -                                      sizeof(pe_t);
3233 -                               if (copy_to_user(lv.lv_current_pe,
3234 +                       if (saved_ptr1 != NULL) {
3235 +                               if (copy_to_user(saved_ptr1,
3236                                                  lv_ptr->lv_current_pe,
3237 -                                                size) != 0)
3238 +                                                lv_ptr->lv_allocated_le *
3239 +                                                sizeof(pe_t)) != 0)
3240                                         return -EFAULT;
3241                         }
3242 +                       /* Restore usermode pointers */
3243 +                       if (copy_to_user(&lv_status_byname_req.lv->lv_current_pe, &saved_ptr1, sizeof(void*)) != 0)
3244 +                               return -EFAULT;
3245                         return 0;
3246                 }
3247         }
3248 @@ -2693,34 +2521,41 @@
3249   */
3250  static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg)
3251  {
3252 -       ulong size;
3253 -       lv_t lv;
3254 -       lv_t *lv_ptr;
3255         lv_status_byindex_req_t lv_status_byindex_req;
3256 +       void *saved_ptr1;
3257 +       void *saved_ptr2;
3258 +       lv_t *lv_ptr;
3259  
3260         if (vg_ptr == NULL) return -ENXIO;
3261         if (copy_from_user(&lv_status_byindex_req, arg,
3262                            sizeof(lv_status_byindex_req)) != 0)
3263                 return -EFAULT;
3264  
3265 -       if ((lvp = lv_status_byindex_req.lv) == NULL)
3266 +       if (lv_status_byindex_req.lv == NULL)
3267                 return -EINVAL;
3268         if ( ( lv_ptr = vg_ptr->lv[lv_status_byindex_req.lv_index]) == NULL)
3269                 return -ENXIO;
3270  
3271 -       if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0)
3272 -               return -EFAULT;
3273 +       /* Save usermode pointers */
3274 +       if (copy_from_user(&saved_ptr1, &lv_status_byindex_req.lv->lv_current_pe, sizeof(void*)) != 0)
3275 +               return -EFAULT;
3276 +       if (copy_from_user(&saved_ptr2, &lv_status_byindex_req.lv->lv_block_exception, sizeof(void*)) != 0)
3277 +               return -EFAULT;
3278  
3279 -       if (copy_to_user(lvp, lv_ptr, sizeof(lv_t)) != 0)
3280 +       if (copy_to_user(lv_status_byindex_req.lv, lv_ptr, sizeof(lv_t)) != 0)
3281                 return -EFAULT;
3282 -
3283 -       if (lv.lv_current_pe != NULL) {
3284 -               size = lv_ptr->lv_allocated_le * sizeof(pe_t);
3285 -               if (copy_to_user(lv.lv_current_pe,
3286 -                                lv_ptr->lv_current_pe,
3287 -                                size) != 0)
3288 +       if (saved_ptr1 != NULL) {
3289 +               if (copy_to_user(saved_ptr1,
3290 +                                lv_ptr->lv_current_pe,
3291 +                                lv_ptr->lv_allocated_le *
3292 +                                sizeof(pe_t)) != 0)
3293                         return -EFAULT;
3294         }
3295 +
3296 +       /* Restore usermode pointers */
3297 +       if (copy_to_user(&lv_status_byindex_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
3298 +               return -EFAULT;
3299 +
3300         return 0;
3301  } /* lvm_do_lv_status_byindex() */
3302  
3303 @@ -2731,6 +2566,9 @@
3304  static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) {
3305         int l;
3306         lv_status_bydev_req_t lv_status_bydev_req;
3307 +       void *saved_ptr1;
3308 +       void *saved_ptr2;
3309 +       lv_t *lv_ptr;
3310  
3311         if (vg_ptr == NULL) return -ENXIO;
3312         if (copy_from_user(&lv_status_bydev_req, arg,
3313 @@ -2743,10 +2581,26 @@
3314         }
3315  
3316         if ( l == vg_ptr->lv_max) return -ENXIO;
3317 +       lv_ptr = vg_ptr->lv[l];
3318  
3319 -       if (copy_to_user(lv_status_bydev_req.lv,
3320 -                        vg_ptr->lv[l], sizeof(lv_t)) != 0)
3321 +       /* Save usermode pointers */
3322 +       if (copy_from_user(&saved_ptr1, &lv_status_bydev_req.lv->lv_current_pe, sizeof(void*)) != 0)
3323 +               return -EFAULT;
3324 +       if (copy_from_user(&saved_ptr2, &lv_status_bydev_req.lv->lv_block_exception, sizeof(void*)) != 0)
3325 +               return -EFAULT;
3326 +
3327 +       if (copy_to_user(lv_status_bydev_req.lv, lv_ptr, sizeof(lv_t)) != 0)
3328                 return -EFAULT;
3329 +       if (saved_ptr1 != NULL) {
3330 +               if (copy_to_user(saved_ptr1,
3331 +                                lv_ptr->lv_current_pe,
3332 +                                lv_ptr->lv_allocated_le *
3333 +                                sizeof(pe_t)) != 0)
3334 +                       return -EFAULT;
3335 +       }
3336 +       /* Restore usermode pointers */
3337 +       if (copy_to_user(&lv_status_bydev_req.lv->lv_current_pe, &saved_ptr1, sizeof(void *)) != 0)
3338 +               return -EFAULT;
3339  
3340         return 0;
3341  } /* lvm_do_lv_status_bydev() */
3342 @@ -2766,11 +2620,9 @@
3343                 if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3344                 if (lv_ptr->lv_dev == lv->lv_dev)
3345                 {
3346 -                       lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr);
3347 -                       strncpy(lv_ptr->lv_name,
3348 -                               lv_req->lv_name,
3349 -                               NAME_LEN);
3350 -                       lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
3351 +                       lvm_fs_remove_lv(vg_ptr, lv_ptr);
3352 +                       strncpy(lv_ptr->lv_name, lv_req->lv_name, NAME_LEN);
3353 +                       lvm_fs_create_lv(vg_ptr, lv_ptr);
3354                         break;
3355                 }
3356         }
3357 @@ -2787,9 +2639,7 @@
3358  {
3359         uint p;
3360         pv_t *pv_ptr;
3361 -#ifdef LVM_GET_INODE
3362 -       struct inode *inode_sav;
3363 -#endif
3364 +       struct block_device *bd;
3365  
3366         if (vg_ptr == NULL) return -ENXIO;
3367         if (copy_from_user(&pv_change_req, arg,
3368 @@ -2801,20 +2651,17 @@
3369                 if (pv_ptr != NULL &&
3370                     strcmp(pv_ptr->pv_name,
3371                                pv_change_req.pv_name) == 0) {
3372 -#ifdef LVM_GET_INODE
3373 -                       inode_sav = pv_ptr->inode;
3374 -#endif
3375 +
3376 +                       bd = pv_ptr->bd;
3377                         if (copy_from_user(pv_ptr,
3378                                            pv_change_req.pv,
3379                                            sizeof(pv_t)) != 0)
3380                                 return -EFAULT;
3381 +                       pv_ptr->bd = bd;
3382  
3383                         /* We don't need the PE list
3384                            in kernel space as with LVs pe_t list */
3385                         pv_ptr->pe = NULL;
3386 -#ifdef LVM_GET_INODE
3387 -                       pv_ptr->inode = inode_sav;
3388 -#endif
3389                         return 0;
3390                 }
3391         }
3392 @@ -2850,160 +2697,27 @@
3393  } /* lvm_do_pv_status() */
3394  
3395  
3396 -
3397  /*
3398 - * create a devfs entry for a volume group
3399 + * character device support function flush and invalidate all buffers of a PV
3400   */
3401 -void lvm_do_create_devfs_entry_of_vg ( vg_t *vg_ptr) {
3402 -       vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL);
3403 -       ch_devfs_handle[vg_ptr->vg_number] = devfs_register(
3404 -               vg_devfs_handle[vg_ptr->vg_number] , "group",
3405 -               DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number,
3406 -               S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
3407 -               &lvm_chr_fops, NULL);
3408 -}
3409 -
3410 -
3411 -/*
3412 - * create a /proc entry for a logical volume
3413 - */
3414 -void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) {
3415 -       char *basename;
3416 -
3417 -       if ( vg_ptr->lv_subdir_pde != NULL) {
3418 -               basename = strrchr(lv_ptr->lv_name, '/');
3419 -               if (basename == NULL) basename = lv_ptr->lv_name;
3420 -               else                  basename++;
3421 -               pde = create_proc_entry(basename, S_IFREG,
3422 -                                       vg_ptr->lv_subdir_pde);
3423 -               if ( pde != NULL) {
3424 -                       pde->read_proc = lvm_proc_read_lv_info;
3425 -                       pde->data = lv_ptr;
3426 -               }
3427 -       }
3428 -}
3429 -
3430 -
3431 -/*
3432 - * remove a /proc entry for a logical volume
3433 - */
3434 -void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) {
3435 -       char *basename;
3436 -
3437 -       if ( vg_ptr->lv_subdir_pde != NULL) {
3438 -               basename = strrchr(lv_ptr->lv_name, '/');
3439 -               if (basename == NULL) basename = lv_ptr->lv_name;
3440 -               else                  basename++;
3441 -               remove_proc_entry(basename, vg_ptr->lv_subdir_pde);
3442 -       }
3443 -}
3444 -
3445 -
3446 -/*
3447 - * create a /proc entry for a physical volume
3448 - */
3449 -void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) {
3450 -       int offset = 0;
3451 -       char *basename;
3452 -       char buffer[NAME_LEN];
3453 -
3454 -       basename = pv_ptr->pv_name;
3455 -       if (strncmp(basename, "/dev/", 5) == 0) offset = 5;
3456 -       strncpy(buffer, basename + offset, sizeof(buffer));
3457 -       basename = buffer;
3458 -       while ( ( basename = strchr ( basename, '/')) != NULL) *basename = '_';
3459 -       pde = create_proc_entry(buffer, S_IFREG, vg_ptr->pv_subdir_pde);
3460 -       if ( pde != NULL) {
3461 -               pde->read_proc = lvm_proc_read_pv_info;
3462 -               pde->data = pv_ptr;
3463 -       }
3464 -}
3465 -
3466 -
3467 -/*
3468 - * remove a /proc entry for a physical volume
3469 - */
3470 -void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) {
3471 -       char *basename;
3472 -
3473 -       basename = strrchr(pv_ptr->pv_name, '/');
3474 -       if ( vg_ptr->pv_subdir_pde != NULL) {
3475 -               basename = strrchr(pv_ptr->pv_name, '/');
3476 -               if (basename == NULL) basename = pv_ptr->pv_name;
3477 -               else                  basename++;
3478 -               remove_proc_entry(basename, vg_ptr->pv_subdir_pde);
3479 -       }
3480 -}
3481 -
3482 -
3483 -/*
3484 - * create a /proc entry for a volume group
3485 - */
3486 -void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) {
3487 -       int l, p;
3488 -       pv_t *pv_ptr;
3489 -       lv_t *lv_ptr;
3490 +static int lvm_do_pv_flush(void *arg)
3491 +{
3492 +       pv_flush_req_t pv_flush_req;
3493  
3494 -       pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR,
3495 -                               lvm_proc_vg_subdir);
3496 -       if ( pde != NULL) {
3497 -               vg_ptr->vg_dir_pde = pde;
3498 -               pde = create_proc_entry("group", S_IFREG,
3499 -                                       vg_ptr->vg_dir_pde);
3500 -               if ( pde != NULL) {
3501 -                       pde->read_proc = lvm_proc_read_vg_info;
3502 -                       pde->data = vg_ptr;
3503 -               }
3504 -                pde = create_proc_entry(LVM_LV_SUBDIR, S_IFDIR,
3505 -                                        vg_ptr->vg_dir_pde);
3506 -                if ( pde != NULL) {
3507 -                        vg_ptr->lv_subdir_pde = pde;
3508 -                        for ( l = 0; l < vg_ptr->lv_max; l++) {
3509 -                               if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3510 -                               lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr);
3511 -                        }
3512 -                }
3513 -                pde = create_proc_entry(LVM_PV_SUBDIR, S_IFDIR,
3514 -                                        vg_ptr->vg_dir_pde);
3515 -                if ( pde != NULL) {
3516 -                        vg_ptr->pv_subdir_pde = pde;
3517 -                       for ( p = 0; p < vg_ptr->pv_max; p++) {
3518 -                               if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue;
3519 -                               lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr);
3520 -                        }
3521 -                }
3522 -        }
3523 -}
3524 +       if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0)
3525 +               return -EFAULT;
3526  
3527 -/*
3528 - * remove a /proc entry for a volume group
3529 - */
3530 -void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) {
3531 -       int l, p;
3532 -       lv_t *lv_ptr;
3533 -       pv_t *pv_ptr;
3534 +       fsync_dev(pv_flush_req.pv_dev);
3535 +       invalidate_buffers(pv_flush_req.pv_dev);
3536  
3537 -       for ( l = 0; l < vg_ptr->lv_max; l++) {
3538 -               if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue;
3539 -               lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]);
3540 -       }
3541 -       for ( p = 0; p < vg_ptr->pv_max; p++) {
3542 -               if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue;
3543 -               lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]);
3544 -       }
3545 -       if ( vg_ptr->vg_dir_pde != NULL) {
3546 -               remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
3547 -               remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
3548 -               remove_proc_entry("group", vg_ptr->vg_dir_pde);
3549 -               remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
3550 -       }
3551 +       return 0;
3552  }
3553  
3554  
3555  /*
3556   * support function initialize gendisk variables
3557   */
3558 -void __init lvm_geninit(struct gendisk *lvm_gdisk)
3559 +static void __init lvm_geninit(struct gendisk *lvm_gdisk)
3560  {
3561         int i = 0;
3562  
3563 @@ -3019,36 +2733,94 @@
3564  
3565         blk_size[MAJOR_NR] = lvm_size;
3566         blksize_size[MAJOR_NR] = lvm_blocksizes;
3567 -       hardsect_size[MAJOR_NR] = lvm_blocksizes;
3568 +       hardsect_size[MAJOR_NR] = lvm_hardsectsizes;
3569  
3570         return;
3571  } /* lvm_gen_init() */
3572  
3573  
3574 +
3575 +/* Must have down_write(_pe_lock) when we enqueue buffers */
3576 +static void _queue_io(struct buffer_head *bh, int rw) {
3577 +       if (bh->b_reqnext) BUG();
3578 +       bh->b_reqnext = _pe_requests;
3579 +       _pe_requests = bh;
3580 +}
3581 +
3582 +/* Must have down_write(_pe_lock) when we dequeue buffers */
3583 +static struct buffer_head *_dequeue_io(void)
3584 +{
3585 +       struct buffer_head *bh = _pe_requests;
3586 +       _pe_requests = NULL;
3587 +       return bh;
3588 +}
3589 +
3590  /*
3591 - * return a pointer to a '-' padded uuid
3592 + * We do not need to hold _pe_lock to flush buffers.  bh should be taken from
3593 + * _pe_requests under down_write(_pe_lock), and then _pe_requests can be set
3594 + * NULL and we drop _pe_lock.  Any new buffers defered at this time will be
3595 + * added to a new list, and the old buffers can have their I/O restarted
3596 + * asynchronously.
3597 + *
3598 + * If, for some reason, the same PE is locked again before all of these writes
3599 + * have finished, then these buffers will just be re-queued (i.e. no danger).
3600   */
3601 -static char *lvm_show_uuid ( char *uuidstr) {
3602 -       int i, j;
3603 -       static char uuid[NAME_LEN] = { 0, };
3604 +static void _flush_io(struct buffer_head *bh)
3605 +{
3606 +       while (bh) {
3607 +               struct buffer_head *next = bh->b_reqnext;
3608 +               bh->b_reqnext = NULL;
3609 +               /* resubmit this buffer head */
3610 +               generic_make_request(WRITE, bh);
3611 +               bh = next;
3612 +       }
3613 +}
3614  
3615 -       memset ( uuid, 0, NAME_LEN);
3616  
3617 -       i = 6;
3618 -       memcpy ( uuid, uuidstr, i);
3619 -       uuidstr += i;
3620 +/*
3621 + * we must open the pv's before we use them
3622 + */
3623 +static int _open_pv(pv_t *pv) {
3624 +       int err;
3625 +       struct block_device *bd;
3626 +
3627 +       if (!(bd = bdget(kdev_t_to_nr(pv->pv_dev))))
3628 +               return -ENOMEM;
3629  
3630 -       for ( j = 0; j < 6; j++) {
3631 -               uuid[i++] = '-';
3632 -               memcpy ( &uuid[i], uuidstr, 4);
3633 -               uuidstr += 4;
3634 -               i += 4;
3635 +       err = blkdev_get(bd, FMODE_READ|FMODE_WRITE, 0, BDEV_FILE);
3636 +       if (err) {
3637 +               bdput(bd);
3638 +               return err;
3639         }
3640  
3641 -       memcpy ( &uuid[i], uuidstr, 2 );
3642 +       pv->bd = bd;
3643 +       return 0;
3644 +}
3645 +
3646 +static void _close_pv(pv_t *pv) {
3647 +       if(!pv || !pv->bd)
3648 +               return;
3649  
3650 -       return uuid;
3651 +       blkdev_put(pv->bd, BDEV_FILE);
3652 +       bdput(pv->bd);
3653 +       pv->bd = 0;
3654  }
3655  
3656 +
3657 +static unsigned long _sectors_to_k(unsigned long sect)
3658 +{
3659 +       if(SECTOR_SIZE > 1024) {
3660 +               return sect * (SECTOR_SIZE / 1024);
3661 +       }
3662 +
3663 +       return sect / (1024 / SECTOR_SIZE);
3664 +}
3665 +
3666 +MODULE_AUTHOR("Heinz Mauelshagen, Sistina Software");
3667 +MODULE_DESCRIPTION("Logical Volume Manager");
3668 +#ifdef MODULE_LICENSE
3669 +MODULE_LICENSE("GPL");
3670 +#endif
3671 +
3672  module_init(lvm_init);
3673  module_exit(lvm_cleanup);
3674 --- linux/drivers/md/lvm-internal.h.orig Tue Nov 13 08:46:52 2001
3675 +++ linux/drivers/md/lvm-internal.h Tue Nov 13 08:46:52 2001
3676 @@ -0,0 +1,108 @@
3677 +
3678 +/*
3679 + * kernel/lvm_internal.h
3680 + *
3681 + * Copyright (C) 2001 Sistina Software
3682 + *
3683 + *
3684 + * LVM driver is free software; you can redistribute it and/or modify
3685 + * it under the terms of the GNU General Public License as published by
3686 + * the Free Software Foundation; either version 2, or (at your option)
3687 + * any later version.
3688 + *
3689 + * LVM driver is distributed in the hope that it will be useful,
3690 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
3691 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3692 + * GNU General Public License for more details.
3693 + *
3694 + * You should have received a copy of the GNU General Public License
3695 + * along with GNU CC; see the file COPYING.  If not, write to
3696 + * the Free Software Foundation, 59 Temple Place - Suite 330,
3697 + * Boston, MA 02111-1307, USA.
3698 + *
3699 + */
3700 +
3701 +/*
3702 + * Changelog
3703 + *
3704 + *    05/01/2001 - Factored this file out of lvm.c (Joe Thornber)
3705 + *    11/01/2001 - Renamed lvm_internal and added declarations
3706 + *                 for lvm_fs.c stuff
3707 + *
3708 + */
3709 +
3710 +#ifndef LVM_INTERNAL_H
3711 +#define LVM_INTERNAL_H
3712 +
3713 +#include <linux/lvm.h>
3714 +
3715 +#define        _LVM_INTERNAL_H_VERSION "LVM "LVM_RELEASE_NAME" ("LVM_RELEASE_DATE")"
3716 +
3717 +/* global variables, defined in lvm.c */
3718 +extern char *lvm_version;
3719 +extern ushort lvm_iop_version;
3720 +extern int loadtime;
3721 +extern const char *const lvm_name;
3722 +
3723 +
3724 +extern uint vg_count;
3725 +extern vg_t *vg[];
3726 +extern struct file_operations lvm_chr_fops;
3727 +
3728 +extern struct block_device_operations lvm_blk_dops;
3729 +
3730 +#define lvm_sectsize(dev) get_hardsect_size(dev)
3731 +
3732 +/* 2.4.8 had no global min/max macros, and 2.4.9's were flawed */
3733 +
3734 +/* debug macros */
3735 +#ifdef DEBUG_IOCTL
3736 +#define P_IOCTL(fmt, args...) printk(KERN_DEBUG "lvm ioctl: " fmt, ## args)
3737 +#else
3738 +#define P_IOCTL(fmt, args...)
3739 +#endif
3740 +
3741 +#ifdef DEBUG_MAP
3742 +#define P_MAP(fmt, args...) printk(KERN_DEBUG "lvm map: " fmt, ## args)
3743 +#else
3744 +#define P_MAP(fmt, args...)
3745 +#endif
3746 +
3747 +#ifdef DEBUG_KFREE
3748 +#define P_KFREE(fmt, args...) printk(KERN_DEBUG "lvm kfree: " fmt, ## args)
3749 +#else
3750 +#define P_KFREE(fmt, args...)
3751 +#endif
3752 +
3753 +#ifdef DEBUG_DEVICE
3754 +#define P_DEV(fmt, args...) printk(KERN_DEBUG "lvm device: " fmt, ## args)
3755 +#else
3756 +#define P_DEV(fmt, args...)
3757 +#endif
3758 +
3759 +
3760 +/* lvm-snap.c */
3761 +int lvm_get_blksize(kdev_t);
3762 +int lvm_snapshot_alloc(lv_t *);
3763 +int lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
3764 +int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, vg_t *vg, lv_t *);
3765 +int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
3766 +void lvm_snapshot_release(lv_t *);
3767 +int lvm_write_COW_table_block(vg_t *, lv_t *);
3768 +void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *);
3769 +int lvm_snapshot_alloc_hash_table(lv_t *);
3770 +void lvm_drop_snapshot(vg_t *vg, lv_t *, const char *);
3771 +
3772 +
3773 +/* lvm_fs.c */
3774 +void lvm_init_fs(void);
3775 +void lvm_fin_fs(void);
3776 +
3777 +void lvm_fs_create_vg(vg_t *vg_ptr);
3778 +void lvm_fs_remove_vg(vg_t *vg_ptr);
3779 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv);
3780 +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv);
3781 +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv);
3782 +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv);
3783 +
3784 +#endif
3785 --- linux/drivers/md/lvm-snap.c.orig    Mon Sep 10 17:00:55 2001
3786 +++ linux/drivers/md/lvm-snap.c Tue Nov 13 09:46:52 2001
3787 @@ -2,22 +2,22 @@
3788   * kernel/lvm-snap.c
3789   *
3790   * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
3791 - *                    Heinz Mauelshagen, Sistina Software (persistent snapshots)
3792 + *               2000 - 2001 Heinz Mauelshagen, Sistina Software
3793   *
3794   * LVM snapshot driver is free software; you can redistribute it and/or modify
3795   * it under the terms of the GNU General Public License as published by
3796   * the Free Software Foundation; either version 2, or (at your option)
3797   * any later version.
3798 - * 
3799 + *
3800   * LVM snapshot driver is distributed in the hope that it will be useful,
3801   * but WITHOUT ANY WARRANTY; without even the implied warranty of
3802   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3803   * GNU General Public License for more details.
3804 - * 
3805 + *
3806   * You should have received a copy of the GNU General Public License
3807   * along with GNU CC; see the file COPYING.  If not, write to
3808   * the Free Software Foundation, 59 Temple Place - Suite 330,
3809 - * Boston, MA 02111-1307, USA. 
3810 + * Boston, MA 02111-1307, USA.
3811   *
3812   */
3813  
3814 @@ -26,6 +26,21 @@
3815   *
3816   *    05/07/2000 - implemented persistent snapshot support
3817   *    23/11/2000 - used cpu_to_le64 rather than my own macro
3818 + *    25/01/2001 - Put LockPage back in
3819 + *    01/02/2001 - A dropped snapshot is now set as inactive
3820 + *    14/02/2001 - tidied debug statements
3821 + *    19/02/2001 - changed rawio calls to pass in preallocated buffer_heads
3822 + *    26/02/2001 - introduced __brw_kiovec to remove a lot of conditional
3823 + *                 compiles.
3824 + *    07/03/2001 - fixed COW exception table not persistent on 2.2 (HM)
3825 + *    12/03/2001 - lvm_pv_get_number changes:
3826 + *                 o made it static
3827 + *                 o renamed it to _pv_get_number
3828 + *                 o pv number is returned in new uint * arg
3829 + *                 o -1 returned on error
3830 + *                 lvm_snapshot_fill_COW_table has a return value too.
3831 + *    15/10/2001 - fix snapshot alignment problem [CM]
3832 + *               - fix snapshot full oops (always check lv_block_exception) [CM]
3833   *
3834   */
3835  
3836 @@ -36,28 +51,52 @@
3837  #include <linux/types.h>
3838  #include <linux/iobuf.h>
3839  #include <linux/lvm.h>
3840 +#include <linux/devfs_fs_kernel.h>
3841 +
3842  
3843 +#include "lvm-internal.h"
3844  
3845 -#include "lvm-snap.h"
3846 +static char *lvm_snap_version __attribute__ ((unused)) = "LVM "LVM_RELEASE_NAME" snapshot code ("LVM_RELEASE_DATE")\n";
3847  
3848 -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9.1_beta2 snapshot code (18/01/2001)\n";
3849  
3850  extern const char *const lvm_name;
3851  extern int lvm_blocksizes[];
3852  
3853  void lvm_snapshot_release(lv_t *);
3854  
3855 -uint lvm_pv_get_number(vg_t * vg, kdev_t rdev)
3856 +static int _write_COW_table_block(vg_t *vg, lv_t *lv, int idx,
3857 +                                 const char **reason);
3858 +static void _disable_snapshot(vg_t *vg, lv_t *lv);
3859 +
3860 +
3861 +static inline int __brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
3862 +                              kdev_t dev, unsigned long b[], int size,
3863 +                              lv_t *lv) {
3864 +       return brw_kiovec(rw, nr, iovec, dev, b, size);
3865 +}
3866 +
3867 +
3868 +static int _pv_get_number(vg_t * vg, kdev_t rdev, uint *pvn)
3869  {
3870         uint p;
3871 +       for (p = 0; p < vg->pv_max; p++) {
3872 +               if (vg->pv[p] == NULL)
3873 +                       continue;
3874  
3875 -       for ( p = 0; p < vg->pv_max; p++)
3876 -       {
3877 -               if ( vg->pv[p] == NULL) continue;
3878 -               if ( vg->pv[p]->pv_dev == rdev) break;
3879 +               if (vg->pv[p]->pv_dev == rdev)
3880 +                       break;
3881         }
3882  
3883 -       return vg->pv[p]->pv_number;
3884 +       if (p >= vg->pv_max) {
3885 +               /* bad news, the snapshot COW table is probably corrupt */
3886 +               printk(KERN_ERR
3887 +                      "%s -- _pv_get_number failed for rdev = %u\n",
3888 +                      lvm_name, rdev);
3889 +               return -1;
3890 +       }
3891 +
3892 +       *pvn = vg->pv[p]->pv_number;
3893 +       return 0;
3894  }
3895  
3896  
3897 @@ -105,10 +144,20 @@
3898         unsigned long mask = lv->lv_snapshot_hash_mask;
3899         int chunk_size = lv->lv_chunk_size;
3900  
3901 +       if (!hash_table)
3902 +               BUG();
3903         hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
3904         list_add(&exception->hash, hash_table);
3905  }
3906  
3907 +/*
3908 + * Determine if we already have a snapshot chunk for this block.
3909 + * Return: 1 if it the chunk already exists
3910 + *         0 if we need to COW this block and allocate a new chunk
3911 + *        -1 if the snapshot was disabled because it ran out of space
3912 + *
3913 + * We need to be holding at least a read lock on lv->lv_lock.
3914 + */
3915  int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
3916                              unsigned long pe_start, lv_t * lv)
3917  {
3918 @@ -118,6 +167,9 @@
3919         int chunk_size = lv->lv_chunk_size;
3920         lv_block_exception_t * exception;
3921  
3922 +       if (!lv->lv_block_exception)
3923 +               return -1;
3924 +
3925         pe_off = pe_start % chunk_size;
3926         pe_adjustment = (*org_sector-pe_off) % chunk_size;
3927         __org_start = *org_sector - pe_adjustment;
3928 @@ -133,7 +185,7 @@
3929         return ret;
3930  }
3931  
3932 -void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
3933 +void lvm_drop_snapshot(vg_t *vg, lv_t *lv_snap, const char *reason)
3934  {
3935         kdev_t last_dev;
3936         int i;
3937 @@ -142,6 +194,9 @@
3938            or error on this snapshot --> release it */
3939         invalidate_buffers(lv_snap->lv_dev);
3940  
3941 +       /* wipe the snapshot since it's inconsistent now */
3942 +       _disable_snapshot(vg, lv_snap);
3943 +
3944         for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
3945                 if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
3946                         last_dev = lv_snap->lv_block_exception[i].rdev_new;
3947 @@ -150,26 +205,33 @@
3948         }
3949  
3950         lvm_snapshot_release(lv_snap);
3951 +       lv_snap->lv_status &= ~LV_ACTIVE;
3952  
3953         printk(KERN_INFO
3954 -              "%s -- giving up to snapshot %s on %s due %s\n",
3955 +              "%s -- giving up to snapshot %s on %s: %s\n",
3956                lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
3957                reason);
3958  }
3959  
3960 -static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
3961 +static inline int lvm_snapshot_prepare_blocks(unsigned long *blocks,
3962                                                unsigned long start,
3963                                                int nr_sectors,
3964                                                int blocksize)
3965  {
3966         int i, sectors_per_block, nr_blocks;
3967  
3968 -       sectors_per_block = blocksize >> 9;
3969 +       sectors_per_block = blocksize / SECTOR_SIZE;
3970 +
3971 +       if (start & (sectors_per_block - 1))
3972 +               return 0;
3973 +
3974         nr_blocks = nr_sectors / sectors_per_block;
3975         start /= sectors_per_block;
3976  
3977         for (i = 0; i < nr_blocks; i++)
3978                 blocks[i] = start++;
3979 +
3980 +       return 1;
3981  }
3982  
3983  inline int lvm_get_blksize(kdev_t dev)
3984 @@ -209,128 +271,61 @@
3985  #endif
3986  
3987  
3988 -void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
3989 +int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
3990  {
3991 -       int     id = 0, is = lv_snap->lv_remap_ptr;
3992 -       ulong   blksize_snap;
3993 -       lv_COW_table_disk_t * lv_COW_table =
3994 -          ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
3995 +       int id = 0, is = lv_snap->lv_remap_ptr;
3996 +       ulong blksize_snap;
3997 +       lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *)
3998 +               page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
3999 +
4000 +       if (is == 0)
4001 +               return 0;
4002  
4003 -       if (is == 0) return;
4004         is--;
4005 -        blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
4006 -        is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
4007 +       blksize_snap =
4008 +               lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
4009 +       is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
4010  
4011         memset(lv_COW_table, 0, blksize_snap);
4012         for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
4013                 /* store new COW_table entry */
4014 -               lv_COW_table[id].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org));
4015 -               lv_COW_table[id].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_org);
4016 -               lv_COW_table[id].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new));
4017 -               lv_COW_table[id].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[is].rsector_new);
4018 -       }
4019 -}
4020 +               lv_block_exception_t *be = lv_snap->lv_block_exception + is;
4021 +               uint pvn;
4022  
4023 +               if (_pv_get_number(vg, be->rdev_org, &pvn))
4024 +                       goto bad;
4025  
4026 -/*
4027 - * writes a COW exception table sector to disk (HM)
4028 - *
4029 - */
4030 -
4031 -int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
4032 -{
4033 -       int blksize_snap;
4034 -       int end_of_table;
4035 -       int idx = lv_snap->lv_remap_ptr, idx_COW_table;
4036 -       int nr_pages_tmp;
4037 -       int length_tmp;
4038 -       ulong snap_pe_start, COW_table_sector_offset,
4039 -             COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
4040 -       const char * reason;
4041 -       kdev_t snap_phys_dev;
4042 -       struct kiobuf * iobuf = lv_snap->lv_iobuf;
4043 -       struct page * page_tmp;
4044 -       lv_COW_table_disk_t * lv_COW_table =
4045 -          ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
4046 +               lv_COW_table[id].pv_org_number = cpu_to_le64(pvn);
4047 +               lv_COW_table[id].pv_org_rsector = cpu_to_le64(be->rsector_org);
4048  
4049 -       idx--;
4050 +               if (_pv_get_number(vg, be->rdev_new, &pvn))
4051 +                       goto bad;
4052  
4053 -       COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
4054 -       COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
4055 -
4056 -       /* get physical addresse of destination chunk */
4057 -       snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4058 -       snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4059 -
4060 -       blksize_snap = lvm_get_blksize(snap_phys_dev);
4061 -
4062 -        COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
4063 -        idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
4064 -
4065 -       if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
4066 -
4067 -       /* sector offset into the on disk COW table */
4068 -       COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
4069 -
4070 -        /* COW table block to write next */
4071 -       iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
4072 -
4073 -       /* store new COW_table entry */
4074 -       lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
4075 -       lv_COW_table[idx_COW_table].pv_org_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_org);
4076 -       lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(lvm_pv_get_number(vg, snap_phys_dev));
4077 -       lv_COW_table[idx_COW_table].pv_snap_rsector = cpu_to_le64(lv_snap->lv_block_exception[idx].rsector_new);
4078 -
4079 -       length_tmp = iobuf->length;
4080 -       iobuf->length = blksize_snap;
4081 -       page_tmp = iobuf->maplist[0];
4082 -        iobuf->maplist[0] = lv_snap->lv_COW_table_page;
4083 -       nr_pages_tmp = iobuf->nr_pages;
4084 -       iobuf->nr_pages = 1;
4085 -
4086 -       if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4087 -                      iobuf->blocks, blksize_snap) != blksize_snap)
4088 -               goto fail_raw_write;
4089 -
4090 -
4091 -       /* initialization of next COW exception table block with zeroes */
4092 -       end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
4093 -       if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
4094 -       {
4095 -               /* don't go beyond the end */
4096 -               if (idx + 1 >= lv_snap->lv_remap_end) goto good_out;
4097 -
4098 -               memset(lv_COW_table, 0, blksize_snap);
4099 -
4100 -               if (end_of_table)
4101 -               {
4102 -                       idx++;
4103 -                       snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4104 -                       snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4105 -                       blksize_snap = lvm_get_blksize(snap_phys_dev);
4106 -                       iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10);
4107 -               } else iobuf->blocks[0]++;
4108 -
4109 -               if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4110 -                              iobuf->blocks, blksize_snap) != blksize_snap)
4111 -                       goto fail_raw_write;
4112 +               lv_COW_table[id].pv_snap_number = cpu_to_le64(pvn);
4113 +               lv_COW_table[id].pv_snap_rsector = cpu_to_le64(be->rsector_new);
4114         }
4115  
4116 -
4117 - good_out:
4118 -       iobuf->length = length_tmp;
4119 -        iobuf->maplist[0] = page_tmp;
4120 -       iobuf->nr_pages = nr_pages_tmp;
4121         return 0;
4122  
4123 -       /* slow path */
4124 - out:
4125 -       lvm_drop_snapshot(lv_snap, reason);
4126 -       return 1;
4127 + bad:
4128 +       printk(KERN_ERR "%s -- lvm_snapshot_fill_COW_page failed", lvm_name);
4129 +       return -1;
4130 +}
4131  
4132 - fail_raw_write:
4133 -       reason = "write error";
4134 -       goto out;
4135 +
4136 +/*
4137 + * writes a COW exception table sector to disk (HM)
4138 + *
4139 + * We need to hold a write lock on lv_snap->lv_lock.
4140 + */
4141 +int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
4142 +{
4143 +       int r;
4144 +       const char *err;
4145 +       if((r = _write_COW_table_block(vg, lv_snap,
4146 +                                      lv_snap->lv_remap_ptr - 1, &err)))
4147 +               lvm_drop_snapshot(vg, lv_snap, err);
4148 +       return r;
4149  }
4150  
4151  /*
4152 @@ -340,17 +335,23 @@
4153   * if there is no exception storage space free any longer --> release snapshot.
4154   *
4155   * this routine gets called for each _first_ write to a physical chunk.
4156 + *
4157 + * We need to hold a write lock on lv_snap->lv_lock.  It is assumed that
4158 + * lv->lv_block_exception is non-NULL (checked by lvm_snapshot_remap_block())
4159 + * when this function is called.
4160   */
4161  int lvm_snapshot_COW(kdev_t org_phys_dev,
4162                      unsigned long org_phys_sector,
4163                      unsigned long org_pe_start,
4164                      unsigned long org_virt_sector,
4165 -                    lv_t * lv_snap)
4166 +                    vg_t *vg, lv_t* lv_snap)
4167  {
4168         const char * reason;
4169         unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
4170 +       unsigned long phys_start;
4171         int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
4172         struct kiobuf * iobuf;
4173 +       unsigned long blocks[KIO_MAX_SECTORS];
4174         int blksize_snap, blksize_org, min_blksize, max_blksize;
4175         int max_sectors, nr_sectors;
4176  
4177 @@ -370,13 +371,11 @@
4178  #ifdef DEBUG_SNAPSHOT
4179         printk(KERN_INFO
4180                "%s -- COW: "
4181 -              "org %02d:%02d faulting %lu start %lu, "
4182 -              "snap %02d:%02d start %lu, "
4183 +              "org %s faulting %lu start %lu, snap %s start %lu, "
4184                "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
4185                lvm_name,
4186 -              MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
4187 -              org_start,
4188 -              MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
4189 +              kdevname(org_phys_dev), org_phys_sector, org_start,
4190 +              kdevname(snap_phys_dev), snap_start,
4191                chunk_size,
4192                org_pe_start, pe_off,
4193                org_virt_sector);
4194 @@ -384,8 +383,8 @@
4195  
4196         iobuf = lv_snap->lv_iobuf;
4197  
4198 -       blksize_org = lvm_get_blksize(org_phys_dev);
4199 -       blksize_snap = lvm_get_blksize(snap_phys_dev);
4200 +       blksize_org = lvm_sectsize(org_phys_dev);
4201 +       blksize_snap = lvm_sectsize(snap_phys_dev);
4202         max_blksize = max(blksize_org, blksize_snap);
4203         min_blksize = min(blksize_org, blksize_snap);
4204         max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
4205 @@ -393,6 +392,9 @@
4206         if (chunk_size % (max_blksize>>9))
4207                 goto fail_blksize;
4208  
4209 +       /* Don't change org_start, we need it to fill in the exception table */
4210 +       phys_start = org_start;
4211 +
4212         while (chunk_size)
4213         {
4214                 nr_sectors = min(chunk_size, max_sectors);
4215 @@ -400,17 +402,24 @@
4216  
4217                 iobuf->length = nr_sectors << 9;
4218  
4219 -               lvm_snapshot_prepare_blocks(iobuf->blocks, org_start,
4220 -                                           nr_sectors, blksize_org);
4221 -               if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
4222 -                              iobuf->blocks, blksize_org) != (nr_sectors<<9))
4223 +               if (!lvm_snapshot_prepare_blocks(blocks, phys_start,
4224 +                                                nr_sectors, blksize_org))
4225 +                       goto fail_prepare;
4226 +
4227 +               if (__brw_kiovec(READ, 1, &iobuf, org_phys_dev, blocks,
4228 +                                blksize_org, lv_snap) != (nr_sectors<<9))
4229                         goto fail_raw_read;
4230  
4231 -               lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start,
4232 -                                           nr_sectors, blksize_snap);
4233 -               if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
4234 -                              iobuf->blocks, blksize_snap) != (nr_sectors<<9))
4235 +               if (!lvm_snapshot_prepare_blocks(blocks, snap_start,
4236 +                                                nr_sectors, blksize_snap))
4237 +                       goto fail_prepare;
4238 +
4239 +               if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
4240 +                                blksize_snap, lv_snap) != (nr_sectors<<9))
4241                         goto fail_raw_write;
4242 +
4243 +               phys_start += nr_sectors;
4244 +               snap_start += nr_sectors;
4245         }
4246  
4247  #ifdef DEBUG_SNAPSHOT
4248 @@ -434,52 +443,57 @@
4249         return 0;
4250  
4251         /* slow path */
4252 - out:
4253 -       lvm_drop_snapshot(lv_snap, reason);
4254 +out:
4255 +       lvm_drop_snapshot(vg, lv_snap, reason);
4256         return 1;
4257  
4258 - fail_out_of_space:
4259 +fail_out_of_space:
4260         reason = "out of space";
4261         goto out;
4262 - fail_raw_read:
4263 +fail_raw_read:
4264         reason = "read error";
4265         goto out;
4266 - fail_raw_write:
4267 +fail_raw_write:
4268         reason = "write error";
4269         goto out;
4270 - fail_blksize:
4271 +fail_blksize:
4272         reason = "blocksize error";
4273         goto out;
4274 +
4275 +fail_prepare:
4276 +       reason = "couldn't prepare kiovec blocks "
4277 +               "(start probably isn't block aligned)";
4278 +       goto out;
4279  }
4280  
4281  int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
4282  {
4283         int bytes, nr_pages, err, i;
4284  
4285 -       bytes = sectors << 9;
4286 +       bytes = sectors * SECTOR_SIZE;
4287         nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
4288         err = expand_kiobuf(iobuf, nr_pages);
4289 -       if (err)
4290 -               goto out;
4291 +       if (err) goto out;
4292  
4293         err = -ENOMEM;
4294 -       iobuf->locked = 0;
4295 +       iobuf->locked = 1;
4296         iobuf->nr_pages = 0;
4297         for (i = 0; i < nr_pages; i++)
4298         {
4299                 struct page * page;
4300  
4301                 page = alloc_page(GFP_KERNEL);
4302 -               if (!page)
4303 -                       goto out;
4304 +               if (!page) goto out;
4305  
4306                 iobuf->maplist[i] = page;
4307 +               LockPage(page);
4308                 iobuf->nr_pages++;
4309         }
4310         iobuf->offset = 0;
4311  
4312         err = 0;
4313 - out:
4314 +
4315 +out:
4316         return err;
4317  }
4318  
4319 @@ -521,40 +535,46 @@
4320         while (buckets--)
4321                 INIT_LIST_HEAD(hash+buckets);
4322         err = 0;
4323 - out:
4324 +out:
4325         return err;
4326  }
4327  
4328  int lvm_snapshot_alloc(lv_t * lv_snap)
4329  {
4330 -       int err, blocksize, max_sectors;
4331 +       int ret, max_sectors;
4332  
4333 -       err = alloc_kiovec(1, &lv_snap->lv_iobuf);
4334 -       if (err)
4335 -               goto out;
4336 +       /* allocate kiovec to do chunk io */
4337 +       ret = alloc_kiovec(1, &lv_snap->lv_iobuf);
4338 +       if (ret) goto out;
4339  
4340 -       blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
4341         max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
4342  
4343 -       err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
4344 -       if (err)
4345 -               goto out_free_kiovec;
4346 +       ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
4347 +       if (ret) goto out_free_kiovec;
4348  
4349 -       err = lvm_snapshot_alloc_hash_table(lv_snap);
4350 -       if (err)
4351 -               goto out_free_kiovec;
4352 +       /* allocate kiovec to do exception table io */
4353 +       ret = alloc_kiovec(1, &lv_snap->lv_COW_table_iobuf);
4354 +       if (ret) goto out_free_kiovec;
4355  
4356 +       ret = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_COW_table_iobuf,
4357 +                                            PAGE_SIZE/SECTOR_SIZE);
4358 +       if (ret) goto out_free_both_kiovecs;
4359  
4360 -               lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL);
4361 -               if (!lv_snap->lv_COW_table_page)
4362 -                       goto out_free_kiovec;
4363 +       ret = lvm_snapshot_alloc_hash_table(lv_snap);
4364 +       if (ret) goto out_free_both_kiovecs;
4365  
4366 - out:
4367 -       return err;
4368 +out:
4369 +       return ret;
4370 +
4371 +out_free_both_kiovecs:
4372 +       unmap_kiobuf(lv_snap->lv_COW_table_iobuf);
4373 +       free_kiovec(1, &lv_snap->lv_COW_table_iobuf);
4374 +       lv_snap->lv_COW_table_iobuf = NULL;
4375  
4376 - out_free_kiovec:
4377 +out_free_kiovec:
4378         unmap_kiobuf(lv_snap->lv_iobuf);
4379         free_kiovec(1, &lv_snap->lv_iobuf);
4380 +       lv_snap->lv_iobuf = NULL;
4381         vfree(lv_snap->lv_snapshot_hash_table);
4382         lv_snap->lv_snapshot_hash_table = NULL;
4383         goto out;
4384 @@ -580,9 +600,125 @@
4385                 free_kiovec(1, &lv->lv_iobuf);
4386                 lv->lv_iobuf = NULL;
4387         }
4388 -       if (lv->lv_COW_table_page)
4389 +       if (lv->lv_COW_table_iobuf)
4390         {
4391 -               free_page((ulong)lv->lv_COW_table_page);
4392 -               lv->lv_COW_table_page = NULL;
4393 +               kiobuf_wait_for_io(lv->lv_COW_table_iobuf);
4394 +               unmap_kiobuf(lv->lv_COW_table_iobuf);
4395 +               free_kiovec(1, &lv->lv_COW_table_iobuf);
4396 +               lv->lv_COW_table_iobuf = NULL;
4397 +       }
4398 +}
4399 +
4400 +
4401 +static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
4402 +                                 int idx, const char **reason) {
4403 +       int blksize_snap;
4404 +       int end_of_table;
4405 +       int idx_COW_table;
4406 +       uint pvn;
4407 +       ulong snap_pe_start, COW_table_sector_offset,
4408 +             COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
4409 +       ulong blocks[1];
4410 +       kdev_t snap_phys_dev;
4411 +       lv_block_exception_t *be;
4412 +       struct kiobuf *COW_table_iobuf = lv_snap->lv_COW_table_iobuf;
4413 +       lv_COW_table_disk_t * lv_COW_table =
4414 +          ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
4415 +
4416 +       COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
4417 +       COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
4418 +
4419 +       /* get physical addresse of destination chunk */
4420 +       snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4421 +       snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4422 +
4423 +       blksize_snap = lvm_sectsize(snap_phys_dev);
4424 +
4425 +        COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
4426 +        idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
4427 +
4428 +       if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
4429 +
4430 +       /* sector offset into the on disk COW table */
4431 +       COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
4432 +
4433 +        /* COW table block to write next */
4434 +       blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
4435 +
4436 +       /* store new COW_table entry */
4437 +       be = lv_snap->lv_block_exception + idx;
4438 +       if(_pv_get_number(vg, be->rdev_org, &pvn))
4439 +               goto fail_pv_get_number;
4440 +
4441 +       lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(pvn);
4442 +       lv_COW_table[idx_COW_table].pv_org_rsector =
4443 +               cpu_to_le64(be->rsector_org);
4444 +       if(_pv_get_number(vg, snap_phys_dev, &pvn))
4445 +               goto fail_pv_get_number;
4446 +
4447 +       lv_COW_table[idx_COW_table].pv_snap_number = cpu_to_le64(pvn);
4448 +       lv_COW_table[idx_COW_table].pv_snap_rsector =
4449 +               cpu_to_le64(be->rsector_new);
4450 +
4451 +       COW_table_iobuf->length = blksize_snap;
4452 +       /* COW_table_iobuf->nr_pages = 1; */
4453 +
4454 +       if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
4455 +                        blocks, blksize_snap, lv_snap) != blksize_snap)
4456 +               goto fail_raw_write;
4457 +
4458 +       /* initialization of next COW exception table block with zeroes */
4459 +       end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
4460 +       if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
4461 +       {
4462 +               /* don't go beyond the end */
4463 +               if (idx + 1 >= lv_snap->lv_remap_end) goto out;
4464 +
4465 +               memset(lv_COW_table, 0, blksize_snap);
4466 +
4467 +               if (end_of_table)
4468 +               {
4469 +                       idx++;
4470 +                       snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
4471 +                       snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
4472 +                       blksize_snap = lvm_sectsize(snap_phys_dev);
4473 +                       blocks[0] = snap_pe_start >> (blksize_snap >> 10);
4474 +               } else blocks[0]++;
4475 +
4476 +               if (__brw_kiovec(WRITE, 1, &COW_table_iobuf, snap_phys_dev,
4477 +                                 blocks, blksize_snap, lv_snap) !=
4478 +                    blksize_snap)
4479 +                       goto fail_raw_write;
4480 +       }
4481 +
4482 +out:
4483 +       return 0;
4484 +
4485 +fail_raw_write:
4486 +       *reason = "write error";
4487 +       return 1;
4488 +
4489 +fail_pv_get_number:
4490 +       *reason = "_pv_get_number failed";
4491 +       return 1;
4492 +}
4493 +
4494 +/*
4495 + * FIXME_1.2
4496 + * This function is a bit of a hack; we need to ensure that the
4497 + * snapshot is never made active again, because it will surely be
4498 + * corrupt.  At the moment we do not have access to the LVM metadata
4499 + * from within the kernel.  So we set the first exception to point to
4500 + * sector 1 (which will always be within the metadata, and as such
4501 + * invalid).  User land tools will check for this when they are asked
4502 + * to activate the snapshot and prevent this from happening.
4503 + */
4504 +
4505 +static void _disable_snapshot(vg_t *vg, lv_t *lv) {
4506 +       const char *err;
4507 +       lv->lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR;
4508 +       if(_write_COW_table_block(vg, lv, 0, &err) < 0) {
4509 +               printk(KERN_ERR "%s -- couldn't disable snapshot: %s\n",
4510 +                      lvm_name, err);
4511         }
4512  }
4513 --- linux/drivers/md/lvm-fs.c.orig Tue Nov 13 08:46:52 2001
4514 +++ linux/drivers/md/lvm-fs.c Tue Nov 13 08:46:52 2001
4515 @@ -0,0 +1,619 @@
4516 +/*
4517 + * kernel/lvm-fs.c
4518 + *
4519 + * Copyright (C) 2001 Sistina Software
4520 + *
4521 + * January-April 2001
4522 + *
4523 + * LVM driver is free software; you can redistribute it and/or modify
4524 + * it under the terms of the GNU General Public License as published by
4525 + * the Free Software Foundation; either version 2, or (at your option)
4526 + * any later version.
4527 + *
4528 + * LVM driver is distributed in the hope that it will be useful,
4529 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4530 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4531 + * GNU General Public License for more details.
4532 + *
4533 + * You should have received a copy of the GNU General Public License
4534 + * along with GNU CC; see the file COPYING.  If not, write to
4535 + * the Free Software Foundation, 59 Temple Place - Suite 330,
4536 + * Boston, MA 02111-1307, USA.
4537 + *
4538 + */
4539 +
4540 +/*
4541 + * Changelog
4542 + *
4543 + *    11/01/2001 - First version (Joe Thornber)
4544 + *    21/03/2001 - added display of stripes and stripe size (HM)
4545 + *    04/10/2001 - corrected devfs_register() call in lvm_init_fs()
4546 + *    11/04/2001 - don't devfs_register("lvm") as user-space always does it
4547 + *    10/05/2001 - show more of PV name in /proc/lvm/global
4548 + *
4549 + */
4550 +
4551 +#include <linux/config.h>
4552 +#include <linux/version.h>
4553 +
4554 +#include <linux/kernel.h>
4555 +#include <linux/vmalloc.h>
4556 +#include <linux/smp_lock.h>
4557 +
4558 +#include <linux/devfs_fs_kernel.h>
4559 +#include <linux/proc_fs.h>
4560 +#include <linux/init.h>
4561 +#include <linux/lvm.h>
4562 +
4563 +#include "lvm-internal.h"
4564 +
4565 +
4566 +static int _proc_read_vg(char *page, char **start, off_t off,
4567 +                        int count, int *eof, void *data);
4568 +static int _proc_read_lv(char *page, char **start, off_t off,
4569 +                        int count, int *eof, void *data);
4570 +static int _proc_read_pv(char *page, char **start, off_t off,
4571 +                        int count, int *eof, void *data);
4572 +static int _proc_read_global(char *page, char **start, off_t off,
4573 +                            int count, int *eof, void *data);
4574 +
4575 +static int _vg_info(vg_t *vg_ptr, char *buf);
4576 +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf);
4577 +static int _pv_info(pv_t *pv_ptr, char *buf);
4578 +
4579 +static void _show_uuid(const char *src, char *b, char *e);
4580 +
4581 +#if 0
4582 +static devfs_handle_t lvm_devfs_handle;
4583 +#endif
4584 +static devfs_handle_t vg_devfs_handle[MAX_VG];
4585 +static devfs_handle_t ch_devfs_handle[MAX_VG];
4586 +static devfs_handle_t lv_devfs_handle[MAX_LV];
4587 +
4588 +static struct proc_dir_entry *lvm_proc_dir = NULL;
4589 +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL;
4590 +
4591 +/* inline functions */
4592 +
4593 +/* public interface */
4594 +void __init lvm_init_fs() {
4595 +       struct proc_dir_entry *pde;
4596 +
4597 +/* User-space has already registered this */
4598 +#if 0
4599 +       lvm_devfs_handle = devfs_register(
4600 +               0 , "lvm", 0, LVM_CHAR_MAJOR, 0,
4601 +               S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
4602 +               &lvm_chr_fops, NULL);
4603 +#endif
4604 +       lvm_proc_dir = create_proc_entry(LVM_DIR, S_IFDIR, &proc_root);
4605 +       if (lvm_proc_dir) {
4606 +               lvm_proc_vg_subdir = create_proc_entry(LVM_VG_SUBDIR, S_IFDIR,
4607 +                                                      lvm_proc_dir);
4608 +               pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir);
4609 +               if ( pde != NULL) pde->read_proc = _proc_read_global;
4610 +       }
4611 +}
4612 +
4613 +void lvm_fin_fs() {
4614 +#if 0
4615 +       devfs_unregister (lvm_devfs_handle);
4616 +#endif
4617 +       remove_proc_entry(LVM_GLOBAL, lvm_proc_dir);
4618 +       remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir);
4619 +       remove_proc_entry(LVM_DIR, &proc_root);
4620 +}
4621 +
4622 +void lvm_fs_create_vg(vg_t *vg_ptr) {
4623 +       struct proc_dir_entry *pde;
4624 +
4625 +       vg_devfs_handle[vg_ptr->vg_number] =
4626 +               devfs_mk_dir(0, vg_ptr->vg_name, NULL);
4627 +
4628 +       ch_devfs_handle[vg_ptr->vg_number] = devfs_register(
4629 +               vg_devfs_handle[vg_ptr->vg_number] , "group",
4630 +               DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number,
4631 +               S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP,
4632 +               &lvm_chr_fops, NULL);
4633 +
4634 +       vg_ptr->vg_dir_pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR,
4635 +                                              lvm_proc_vg_subdir);
4636 +
4637 +       if((pde = create_proc_entry("group", S_IFREG, vg_ptr->vg_dir_pde))) {
4638 +               pde->read_proc = _proc_read_vg;
4639 +               pde->data = vg_ptr;
4640 +       }
4641 +
4642 +       vg_ptr->lv_subdir_pde =
4643 +               create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde);
4644 +
4645 +       vg_ptr->pv_subdir_pde =
4646 +               create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, vg_ptr->vg_dir_pde);
4647 +}
4648 +
4649 +void lvm_fs_remove_vg(vg_t *vg_ptr) {
4650 +       int i;
4651 +
4652 +       devfs_unregister(ch_devfs_handle[vg_ptr->vg_number]);
4653 +       devfs_unregister(vg_devfs_handle[vg_ptr->vg_number]);
4654 +
4655 +       /* remove lv's */
4656 +       for(i = 0; i < vg_ptr->lv_max; i++)
4657 +               if(vg_ptr->lv[i]) lvm_fs_remove_lv(vg_ptr, vg_ptr->lv[i]);
4658 +
4659 +       /* remove pv's */
4660 +       for(i = 0; i < vg_ptr->pv_max; i++)
4661 +               if(vg_ptr->pv[i]) lvm_fs_remove_pv(vg_ptr, vg_ptr->pv[i]);
4662 +
4663 +       if(vg_ptr->vg_dir_pde) {
4664 +               remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde);
4665 +               vg_ptr->lv_subdir_pde = NULL;
4666 +
4667 +               remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde);
4668 +               vg_ptr->pv_subdir_pde = NULL;
4669 +
4670 +               remove_proc_entry("group", vg_ptr->vg_dir_pde);
4671 +               vg_ptr->vg_dir_pde = NULL;
4672 +
4673 +               remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir);
4674 +       }
4675 +}
4676 +
4677 +
4678 +static inline const char *_basename(const char *str) {
4679 +       const char *name = strrchr(str, '/');
4680 +       name = name ? name + 1 : str;
4681 +       return name;
4682 +}
4683 +
4684 +devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
4685 +       struct proc_dir_entry *pde;
4686 +       const char *name = _basename(lv->lv_name);
4687 +
4688 +       lv_devfs_handle[MINOR(lv->lv_dev)] = devfs_register(
4689 +               vg_devfs_handle[vg_ptr->vg_number], name,
4690 +               DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, MINOR(lv->lv_dev),
4691 +               S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
4692 +               &lvm_blk_dops, NULL);
4693 +
4694 +       if(vg_ptr->lv_subdir_pde &&
4695 +          (pde = create_proc_entry(name, S_IFREG, vg_ptr->lv_subdir_pde))) {
4696 +               pde->read_proc = _proc_read_lv;
4697 +               pde->data = lv;
4698 +       }
4699 +       return lv_devfs_handle[MINOR(lv->lv_dev)];
4700 +}
4701 +
4702 +void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) {
4703 +       devfs_unregister(lv_devfs_handle[MINOR(lv->lv_dev)]);
4704 +
4705 +       if(vg_ptr->lv_subdir_pde) {
4706 +               const char *name = _basename(lv->lv_name);
4707 +               remove_proc_entry(name, vg_ptr->lv_subdir_pde);
4708 +       }
4709 +}
4710 +
4711 +
4712 +static inline void _make_pv_name(const char *src, char *b, char *e) {
4713 +       int offset = strlen(LVM_DIR_PREFIX);
4714 +       if(strncmp(src, LVM_DIR_PREFIX, offset))
4715 +               offset = 0;
4716 +
4717 +       e--;
4718 +       src += offset;
4719 +       while(*src && (b != e)) {
4720 +               *b++ = (*src == '/') ? '_' : *src;
4721 +               src++;
4722 +       }
4723 +       *b = '\0';
4724 +}
4725 +
4726 +void lvm_fs_create_pv(vg_t *vg_ptr, pv_t *pv) {
4727 +       struct proc_dir_entry *pde;
4728 +       char name[NAME_LEN];
4729 +
4730 +       if(!vg_ptr->pv_subdir_pde)
4731 +               return;
4732 +
4733 +       _make_pv_name(pv->pv_name, name, name + sizeof(name));
4734 +       if((pde = create_proc_entry(name, S_IFREG, vg_ptr->pv_subdir_pde))) {
4735 +               pde->read_proc = _proc_read_pv;
4736 +               pde->data = pv;
4737 +       }
4738 +}
4739 +
4740 +void lvm_fs_remove_pv(vg_t *vg_ptr, pv_t *pv) {
4741 +       char name[NAME_LEN];
4742 +
4743 +       if(!vg_ptr->pv_subdir_pde)
4744 +               return;
4745 +
4746 +       _make_pv_name(pv->pv_name, name, name + sizeof(name));
4747 +       remove_proc_entry(name, vg_ptr->pv_subdir_pde);
4748 +}
4749 +
4750 +
4751 +static int _proc_read_vg(char *page, char **start, off_t off,
4752 +                         int count, int *eof, void *data) {
4753 +       int sz = 0;
4754 +       vg_t *vg_ptr = data;
4755 +       char uuid[NAME_LEN];
4756 +
4757 +       sz += sprintf(page + sz, "name:         %s\n", vg_ptr->vg_name);
4758 +       sz += sprintf(page + sz, "size:         %u\n",
4759 +                     vg_ptr->pe_total * vg_ptr->pe_size / 2);
4760 +       sz += sprintf(page + sz, "access:       %u\n", vg_ptr->vg_access);
4761 +       sz += sprintf(page + sz, "status:       %u\n", vg_ptr->vg_status);
4762 +       sz += sprintf(page + sz, "number:       %u\n", vg_ptr->vg_number);
4763 +       sz += sprintf(page + sz, "LV max:       %u\n", vg_ptr->lv_max);
4764 +       sz += sprintf(page + sz, "LV current:   %u\n", vg_ptr->lv_cur);
4765 +       sz += sprintf(page + sz, "LV open:      %u\n", vg_ptr->lv_open);
4766 +       sz += sprintf(page + sz, "PV max:       %u\n", vg_ptr->pv_max);
4767 +       sz += sprintf(page + sz, "PV current:   %u\n", vg_ptr->pv_cur);
4768 +       sz += sprintf(page + sz, "PV active:    %u\n", vg_ptr->pv_act);
4769 +       sz += sprintf(page + sz, "PE size:      %u\n", vg_ptr->pe_size / 2);
4770 +       sz += sprintf(page + sz, "PE total:     %u\n", vg_ptr->pe_total);
4771 +       sz += sprintf(page + sz, "PE allocated: %u\n", vg_ptr->pe_allocated);
4772 +
4773 +       _show_uuid(vg_ptr->vg_uuid, uuid, uuid + sizeof(uuid));
4774 +       sz += sprintf(page + sz, "uuid:         %s\n", uuid);
4775 +
4776 +       return sz;
4777 +}
4778 +
4779 +static int _proc_read_lv(char *page, char **start, off_t off,
4780 +                         int count, int *eof, void *data) {
4781 +       int sz = 0;
4782 +       lv_t *lv = data;
4783 +
4784 +       sz += sprintf(page + sz, "name:         %s\n", lv->lv_name);
4785 +       sz += sprintf(page + sz, "size:         %u\n", lv->lv_size);
4786 +       sz += sprintf(page + sz, "access:       %u\n", lv->lv_access);
4787 +       sz += sprintf(page + sz, "status:       %u\n", lv->lv_status);
4788 +       sz += sprintf(page + sz, "number:       %u\n", lv->lv_number);
4789 +       sz += sprintf(page + sz, "open:         %u\n", lv->lv_open);
4790 +       sz += sprintf(page + sz, "allocation:   %u\n", lv->lv_allocation);
4791 +       if(lv->lv_stripes > 1) {
4792 +               sz += sprintf(page + sz, "stripes:      %u\n",
4793 +                             lv->lv_stripes);
4794 +               sz += sprintf(page + sz, "stripesize:   %u\n",
4795 +                             lv->lv_stripesize);
4796 +       }
4797 +       sz += sprintf(page + sz, "device:       %02u:%02u\n",
4798 +                     MAJOR(lv->lv_dev), MINOR(lv->lv_dev));
4799 +
4800 +       return sz;
4801 +}
4802 +
4803 +static int _proc_read_pv(char *page, char **start, off_t off,
4804 +                        int count, int *eof, void *data) {
4805 +       int sz = 0;
4806 +       pv_t *pv = data;
4807 +       char uuid[NAME_LEN];
4808 +
4809 +       sz += sprintf(page + sz, "name:         %s\n", pv->pv_name);
4810 +       sz += sprintf(page + sz, "size:         %u\n", pv->pv_size);
4811 +       sz += sprintf(page + sz, "status:       %u\n", pv->pv_status);
4812 +       sz += sprintf(page + sz, "number:       %u\n", pv->pv_number);
4813 +       sz += sprintf(page + sz, "allocatable:  %u\n", pv->pv_allocatable);
4814 +       sz += sprintf(page + sz, "LV current:   %u\n", pv->lv_cur);
4815 +       sz += sprintf(page + sz, "PE size:      %u\n", pv->pe_size / 2);
4816 +       sz += sprintf(page + sz, "PE total:     %u\n", pv->pe_total);
4817 +       sz += sprintf(page + sz, "PE allocated: %u\n", pv->pe_allocated);
4818 +       sz += sprintf(page + sz, "device:       %02u:%02u\n",
4819 +                      MAJOR(pv->pv_dev), MINOR(pv->pv_dev));
4820 +
4821 +       _show_uuid(pv->pv_uuid, uuid, uuid + sizeof(uuid));
4822 +       sz += sprintf(page + sz, "uuid:         %s\n", uuid);
4823 +
4824 +       return sz;
4825 +}
4826 +
4827 +static int _proc_read_global(char *page, char **start, off_t pos, int count,
4828 +                            int *eof, void *data) {
4829 +
4830 +#define  LVM_PROC_BUF   ( i == 0 ? dummy_buf : &buf[sz])
4831 +
4832 +       int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter,
4833 +               lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds;
4834 +       static off_t sz;
4835 +       off_t sz_last;
4836 +       static char *buf = NULL;
4837 +       static char dummy_buf[160];     /* sized for 2 lines */
4838 +       vg_t *vg_ptr;
4839 +       lv_t *lv_ptr;
4840 +       pv_t *pv_ptr;
4841 +
4842 +
4843 +#ifdef DEBUG_LVM_PROC_GET_INFO
4844 +       printk(KERN_DEBUG
4845 +              "%s - lvm_proc_get_global_info CALLED  pos: %lu  count: %d\n",
4846 +              lvm_name, pos, count);
4847 +#endif
4848 +
4849 +       if(pos != 0 && buf != NULL)
4850 +               goto out;
4851 +
4852 +       sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \
4853 +               lv_open_total = pe_t_bytes = hash_table_bytes = \
4854 +               lv_block_exception_t_bytes = 0;
4855 +
4856 +       /* get some statistics */
4857 +       for (v = 0; v < ABS_MAX_VG; v++) {
4858 +               if ((vg_ptr = vg[v]) != NULL) {
4859 +                       vg_counter++;
4860 +                       pv_counter += vg_ptr->pv_cur;
4861 +                       lv_counter += vg_ptr->lv_cur;
4862 +                       if (vg_ptr->lv_cur > 0) {
4863 +                               for (l = 0; l < vg[v]->lv_max; l++) {
4864 +                                       if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
4865 +                                               pe_t_bytes += lv_ptr->lv_allocated_le;
4866 +                                               hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size;
4867 +                                               if (lv_ptr->lv_block_exception != NULL)
4868 +                                                       lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
4869 +                                               if (lv_ptr->lv_open > 0) {
4870 +                                                       lv_open_counter++;
4871 +                                                       lv_open_total += lv_ptr->lv_open;
4872 +                                               }
4873 +                                       }
4874 +                               }
4875 +                       }
4876 +               }
4877 +       }
4878 +
4879 +       pe_t_bytes *= sizeof(pe_t);
4880 +       lv_block_exception_t_bytes *= sizeof(lv_block_exception_t);
4881 +
4882 +       if (buf != NULL) {
4883 +               P_KFREE("%s -- vfree %d\n", lvm_name, __LINE__);
4884 +               lock_kernel();
4885 +               vfree(buf);
4886 +               unlock_kernel();
4887 +               buf = NULL;
4888 +       }
4889 +       /* 2 times: first to get size to allocate buffer,
4890 +          2nd to fill the malloced buffer */
4891 +       for (i = 0; i < 2; i++) {
4892 +               sz = 0;
4893 +               sz += sprintf(LVM_PROC_BUF,
4894 +                             "LVM "
4895 +#ifdef MODULE
4896 +                             "module"
4897 +#else
4898 +                             "driver"
4899 +#endif
4900 +                             " %s\n\n"
4901 +                             "Total:  %d VG%s  %d PV%s  %d LV%s ",
4902 +                             lvm_version,
4903 +                             vg_counter, vg_counter == 1 ? "" : "s",
4904 +                             pv_counter, pv_counter == 1 ? "" : "s",
4905 +                             lv_counter, lv_counter == 1 ? "" : "s");
4906 +               sz += sprintf(LVM_PROC_BUF,
4907 +                             "(%d LV%s open",
4908 +                             lv_open_counter,
4909 +                             lv_open_counter == 1 ? "" : "s");
4910 +               if (lv_open_total > 0)
4911 +                       sz += sprintf(LVM_PROC_BUF,
4912 +                                     " %d times)\n",
4913 +                                     lv_open_total);
4914 +               else
4915 +                       sz += sprintf(LVM_PROC_BUF, ")");
4916 +               sz += sprintf(LVM_PROC_BUF,
4917 +                             "\nGlobal: %lu bytes malloced   IOP version: %d   ",
4918 +                             vg_counter * sizeof(vg_t) +
4919 +                             pv_counter * sizeof(pv_t) +
4920 +                             lv_counter * sizeof(lv_t) +
4921 +                             pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last,
4922 +                             lvm_iop_version);
4923 +
4924 +               seconds = CURRENT_TIME - loadtime;
4925 +               if (seconds < 0)
4926 +                       loadtime = CURRENT_TIME + seconds;
4927 +               if (seconds / 86400 > 0) {
4928 +                       sz += sprintf(LVM_PROC_BUF, "%d day%s ",
4929 +                                     seconds / 86400,
4930 +                                     seconds / 86400 == 0 ||
4931 +                                     seconds / 86400 > 1 ? "s" : "");
4932 +               }
4933 +               sz += sprintf(LVM_PROC_BUF, "%d:%02d:%02d active\n",
4934 +                             (seconds % 86400) / 3600,
4935 +                             (seconds % 3600) / 60,
4936 +                             seconds % 60);
4937 +
4938 +               if (vg_counter > 0) {
4939 +                       for (v = 0; v < ABS_MAX_VG; v++) {
4940 +                               /* volume group */
4941 +                               if ((vg_ptr = vg[v]) != NULL) {
4942 +                                       sz += _vg_info(vg_ptr, LVM_PROC_BUF);
4943 +
4944 +                                       /* physical volumes */
4945 +                                       sz += sprintf(LVM_PROC_BUF,
4946 +                                                     "\n  PV%s ",
4947 +                                                     vg_ptr->pv_cur == 1 ? ": " : "s:");
4948 +                                       c = 0;
4949 +                                       for (p = 0; p < vg_ptr->pv_max; p++) {
4950 +                                               if ((pv_ptr = vg_ptr->pv[p]) != NULL) {
4951 +                                                       sz += _pv_info(pv_ptr, LVM_PROC_BUF);
4952 +
4953 +                                                       c++;
4954 +                                                       if (c < vg_ptr->pv_cur)
4955 +                                                               sz += sprintf(LVM_PROC_BUF,
4956 +                                                                             "\n       ");
4957 +                                               }
4958 +                                       }
4959 +
4960 +                                       /* logical volumes */
4961 +                                       sz += sprintf(LVM_PROC_BUF,
4962 +                                                     "\n    LV%s ",
4963 +                                                     vg_ptr->lv_cur == 1 ? ": " : "s:");
4964 +                                       c = 0;
4965 +                                       for (l = 0; l < vg_ptr->lv_max; l++) {
4966 +                                               if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
4967 +                                                       sz += _lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF);
4968 +                                                       c++;
4969 +                                                       if (c < vg_ptr->lv_cur)
4970 +                                                               sz += sprintf(LVM_PROC_BUF,
4971 +                                                                             "\n         ");
4972 +                                               }
4973 +                                       }
4974 +                                       if (vg_ptr->lv_cur == 0) sz += sprintf(LVM_PROC_BUF, "none");
4975 +                                       sz += sprintf(LVM_PROC_BUF, "\n");
4976 +                               }
4977 +                       }
4978 +               }
4979 +               if (buf == NULL) {
4980 +                       lock_kernel();
4981 +                       buf = vmalloc(sz);
4982 +                       unlock_kernel();
4983 +                       if (buf == NULL) {
4984 +                               sz = 0;
4985 +                               return sprintf(page, "%s - vmalloc error at line %d\n",
4986 +                                              lvm_name, __LINE__);
4987 +                       }
4988 +               }
4989 +               sz_last = sz;
4990 +       }
4991 +
4992 + out:
4993 +       if (pos > sz - 1) {
4994 +               lock_kernel();
4995 +               vfree(buf);
4996 +               unlock_kernel();
4997 +               buf = NULL;
4998 +               return 0;
4999 +       }
5000 +       *start = &buf[pos];
5001 +       if (sz - pos < count)
5002 +               return sz - pos;
5003 +       else
5004 +               return count;
5005 +
5006 +#undef LVM_PROC_BUF
5007 +}
5008 +
5009 +/*
5010 + * provide VG info for proc filesystem use (global)
5011 + */
5012 +static int _vg_info(vg_t *vg_ptr, char *buf) {
5013 +       int sz = 0;
5014 +       char inactive_flag = ' ';
5015 +
5016 +       if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I';
5017 +       sz = sprintf(buf,
5018 +                    "\nVG: %c%s  [%d PV, %d LV/%d open] "
5019 +                    " PE Size: %d KB\n"
5020 +                    "  Usage [KB/PE]: %d /%d total  "
5021 +                    "%d /%d used  %d /%d free",
5022 +                    inactive_flag,
5023 +                    vg_ptr->vg_name,
5024 +                    vg_ptr->pv_cur,
5025 +                    vg_ptr->lv_cur,
5026 +                    vg_ptr->lv_open,
5027 +                    vg_ptr->pe_size >> 1,
5028 +                    vg_ptr->pe_size * vg_ptr->pe_total >> 1,
5029 +                    vg_ptr->pe_total,
5030 +                    vg_ptr->pe_allocated * vg_ptr->pe_size >> 1,
5031 +                    vg_ptr->pe_allocated,
5032 +                    (vg_ptr->pe_total - vg_ptr->pe_allocated) *
5033 +                    vg_ptr->pe_size >> 1,
5034 +                    vg_ptr->pe_total - vg_ptr->pe_allocated);
5035 +       return sz;
5036 +}
5037 +
5038 +
5039 +/*
5040 + * provide LV info for proc filesystem use (global)
5041 + */
5042 +static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
5043 +       int sz = 0;
5044 +       char inactive_flag = 'A', allocation_flag = ' ',
5045 +               stripes_flag = ' ', rw_flag = ' ', *basename;
5046 +
5047 +       if (!(lv_ptr->lv_status & LV_ACTIVE))
5048 +               inactive_flag = 'I';
5049 +       rw_flag = 'R';
5050 +       if (lv_ptr->lv_access & LV_WRITE)
5051 +               rw_flag = 'W';
5052 +       allocation_flag = 'D';
5053 +       if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
5054 +               allocation_flag = 'C';
5055 +       stripes_flag = 'L';
5056 +       if (lv_ptr->lv_stripes > 1)
5057 +               stripes_flag = 'S';
5058 +       sz += sprintf(buf+sz,
5059 +                     "[%c%c%c%c",
5060 +                     inactive_flag,
5061 +        rw_flag,
5062 +                     allocation_flag,
5063 +                     stripes_flag);
5064 +       if (lv_ptr->lv_stripes > 1)
5065 +               sz += sprintf(buf+sz, "%-2d",
5066 +                             lv_ptr->lv_stripes);
5067 +       else
5068 +               sz += sprintf(buf+sz, "  ");
5069 +
5070 +       /* FIXME: use _basename */
5071 +       basename = strrchr(lv_ptr->lv_name, '/');
5072 +       if ( basename == 0) basename = lv_ptr->lv_name;
5073 +       else                basename++;
5074 +       sz += sprintf(buf+sz, "] %-25s", basename);
5075 +       if (strlen(basename) > 25)
5076 +               sz += sprintf(buf+sz,
5077 +                             "\n                              ");
5078 +       sz += sprintf(buf+sz, "%9d /%-6d   ",
5079 +                     lv_ptr->lv_size >> 1,
5080 +                     lv_ptr->lv_size / vg_ptr->pe_size);
5081 +
5082 +       if (lv_ptr->lv_open == 0)
5083 +               sz += sprintf(buf+sz, "close");
5084 +       else
5085 +               sz += sprintf(buf+sz, "%dx open",
5086 +                             lv_ptr->lv_open);
5087 +
5088 +       return sz;
5089 +}
5090 +
5091 +
5092 +/*
5093 + * provide PV info for proc filesystem use (global)
5094 + */
5095 +static int _pv_info(pv_t *pv, char *buf) {
5096 +       int sz = 0;
5097 +       char inactive_flag = 'A', allocation_flag = ' ';
5098 +       char *pv_name = NULL;
5099 +
5100 +       if (!(pv->pv_status & PV_ACTIVE))
5101 +               inactive_flag = 'I';
5102 +       allocation_flag = 'A';
5103 +       if (!(pv->pv_allocatable & PV_ALLOCATABLE))
5104 +               allocation_flag = 'N';
5105 +       pv_name = strchr(pv->pv_name+1,'/');
5106 +       if ( pv_name == 0) pv_name = pv->pv_name;
5107 +       else               pv_name++;
5108 +       sz = sprintf(buf,
5109 +                    "[%c%c] %-21s %8d /%-6d  "
5110 +                    "%8d /%-6d  %8d /%-6d",
5111 +                    inactive_flag,
5112 +                    allocation_flag,
5113 +                    pv_name,
5114 +                    pv->pe_total * pv->pe_size >> 1,
5115 +                    pv->pe_total,
5116 +                    pv->pe_allocated * pv->pe_size >> 1,
5117 +                    pv->pe_allocated,
5118 +                    (pv->pe_total - pv->pe_allocated) *
5119 +                    pv->pe_size >> 1,
5120 +                    pv->pe_total - pv->pe_allocated);
5121 +       return sz;
5122 +}
5123 +
5124 +static void _show_uuid(const char *src, char *b, char *e) {
5125 +       int i;
5126 +
5127 +       e--;
5128 +       for(i = 0; *src && (b != e); i++) {
5129 +               if(i && !(i & 0x3))
5130 +                       *b++ = '-';
5131 +               *b++ = *src++;
5132 +       }
5133 +       *b = '\0';
5134 +}
5135 diff -ruN -X /home/joe/packages/dontdiff linux_2.4.1/drivers/md/lvm-snap.h linux/drivers/md/lvm-snap.h
5136 --- linux_2.4.1/drivers/md/lvm-snap.h   Fri Feb 16 14:51:26 2001
5137 +++ linux/drivers/md/lvm-snap.h Thu Jan  1 01:00:00 1970
5138 @@ -1,47 +0,0 @@
5139 -/*
5140 - * kernel/lvm-snap.h
5141 - *
5142 - * Copyright (C) 2001 Sistina Software
5143 - *
5144 - *
5145 - * LVM driver is free software; you can redistribute it and/or modify
5146 - * it under the terms of the GNU General Public License as published by
5147 - * the Free Software Foundation; either version 2, or (at your option)
5148 - * any later version.
5149 - *
5150 - * LVM driver is distributed in the hope that it will be useful,
5151 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
5152 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
5153 - * GNU General Public License for more details.
5154 - *
5155 - * You should have received a copy of the GNU General Public License
5156 - * along with GNU CC; see the file COPYING.  If not, write to
5157 - * the Free Software Foundation, 59 Temple Place - Suite 330,
5158 - * Boston, MA 02111-1307, USA.
5159 - *
5160 - */
5161 -
5162 -/*
5163 - * Changelog
5164 - *
5165 - *    05/01/2001:Joe Thornber - Factored this file out of lvm.c
5166 - *
5167 - */
5168 -
5169 -#ifndef LVM_SNAP_H
5170 -#define LVM_SNAP_H
5171 -
5172 -/* external snapshot calls */
5173 -extern inline int lvm_get_blksize(kdev_t);
5174 -extern int lvm_snapshot_alloc(lv_t *);
5175 -extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *);
5176 -extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *);
5177 -extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *);
5178 -extern void lvm_snapshot_release(lv_t *); 
5179 -extern int lvm_write_COW_table_block(vg_t *, lv_t *);
5180 -extern inline void lvm_hash_link(lv_block_exception_t *, 
5181 -                                kdev_t, ulong, lv_t *);
5182 -extern int lvm_snapshot_alloc_hash_table(lv_t *);
5183 -extern void lvm_drop_snapshot(lv_t *, const char *);
5184 -
5185 -#endif
This page took 0.428058 seconds and 3 git commands to generate.