]> git.pld-linux.org Git - packages/kernel.git/blob - linux-2.4.22-evms-2.1.1.patch
- obsolete
[packages/kernel.git] / linux-2.4.22-evms-2.1.1.patch
1 diff -urN linux-2.4.22/drivers/md/Config.in linux-2.4.22-evms/drivers/md/Config.in
2 --- linux-2.4.22/drivers/md/Config.in   2003-09-15 17:07:45.000000000 +0200
3 +++ linux-2.4.22-evms/drivers/md/Config.in      2003-09-15 17:09:48.000000000 +0200
4 @@ -16,5 +16,9 @@
5  dep_tristate ' Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM $CONFIG_MD
6  dep_tristate ' Device-mapper support' CONFIG_BLK_DEV_DM $CONFIG_MD
7  dep_tristate '  Mirror (RAID-1) support' CONFIG_BLK_DEV_DM_MIRROR $CONFIG_BLK_DEV_DM
8 +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
9 +   dep_tristate '   Bad Block Relocation Device Target' CONFIG_BLK_DEV_DM_BBR $CONFIG_BLK_DEV_DM
10 +   dep_tristate '   Sparse Device Target' CONFIG_BLK_DEV_DM_SPARSE $CONFIG_BLK_DEV_DM
11 +fi
12  
13  endmenu
14 diff -urN linux-2.4.22/drivers/md/Makefile linux-2.4.22-evms/drivers/md/Makefile
15 --- linux-2.4.22/drivers/md/Makefile    2003-09-15 17:07:45.000000000 +0200
16 +++ linux-2.4.22-evms/drivers/md/Makefile       2003-09-15 17:09:48.000000000 +0200
17 @@ -30,6 +30,8 @@
18  
19  obj-$(CONFIG_BLK_DEV_DM)               += dm-mod.o
20  obj-$(CONFIG_BLK_DEV_DM_MIRROR)                += dm-mirror.o
21 +obj-$(CONFIG_BLK_DEV_DM_BBR)           += dm-bbr.o
22 +obj-$(CONFIG_BLK_DEV_DM_SPARSE)                += dm-sparse.o
23  
24  include $(TOPDIR)/Rules.make
25  
26 diff -urN linux-2.4.22/drivers/md/dm-bbr.c linux-2.4.22-evms/drivers/md/dm-bbr.c
27 --- linux-2.4.22/drivers/md/dm-bbr.c    1970-01-01 01:00:00.000000000 +0100
28 +++ linux-2.4.22-evms/drivers/md/dm-bbr.c       2003-09-15 17:08:42.000000000 +0200
29 @@ -0,0 +1,1228 @@
30 +/*
31 + *   Copyright (c) International Business Machines  Corp., 2002-2003
32 + *
33 + *   This program is free software;  you can redistribute it and/or modify
34 + *   it under the terms of the GNU General Public License as published by
35 + *   the Free Software Foundation; either version 2 of the License, or
36 + *   (at your option) any later version.
37 + *
38 + *   This program is distributed in the hope that it will be useful,
39 + *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
40 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
41 + *   the GNU General Public License for more details.
42 + *
43 + *   You should have received a copy of the GNU General Public License
44 + *   along with this program;  if not, write to the Free Software
45 + *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
46 + *
47 + * linux/drivers/md/dm-bbr.c
48 + *
49 + * Bad-block-relocation (BBR) target for device-mapper.
50 + *
51 + * The BBR target is designed to remap I/O write failures to another safe
52 + * location on disk. Note that most disk drives have BBR built into them,
53 + * this means that our software BBR will be only activated when all hardware
54 + * BBR replacement sectors have been used.
55 + */
56 +
57 +#include <linux/kernel.h>
58 +#include <linux/module.h>
59 +#include <linux/init.h>
60 +#include <linux/blkdev.h>
61 +#include <linux/spinlock.h>
62 +#include <linux/smp_lock.h>
63 +#include <linux/slab.h>
64 +#include <linux/mempool.h>
65 +#include "dm.h"
66 +#include "dm-bbr.h"
67 +#include "dm-daemon.h"
68 +#include "dm-io.h"
69 +
70 +/* Number of active BBR devices. */
71 +static int bbr_instances = 0;
72 +static DECLARE_MUTEX(bbr_instances_lock);
73 +
74 +/* Data pertaining to the I/O thread. */
75 +static struct dm_daemon * bbr_io_thread = NULL;
76 +static spinlock_t bbr_io_list_lock = SPIN_LOCK_UNLOCKED;
77 +static LIST_HEAD(bbr_io_list);
78 +static void bbr_io_handler(void);
79 +
80 +/* Global pools for bbr_io_buf's and bbr_remap's. */
81 +static kmem_cache_t * bbr_io_buf_cache;
82 +static mempool_t * bbr_io_buf_pool;
83 +static kmem_cache_t * bbr_remap_cache;
84 +static mempool_t * bbr_remap_pool;
85 +
86 +static void bbr_free_remap(struct bbr_private * bbr_id);
87 +
88 +/**
89 + * destroy_pools
90 + *
91 + * Delete the pools for the remap list and I/O anchors.
92 + **/
93 +static void destroy_pools(void)
94 +{
95 +       if (bbr_io_buf_pool) {
96 +               mempool_destroy(bbr_io_buf_pool);
97 +               bbr_io_buf_pool = NULL;
98 +       }
99 +       if (bbr_io_buf_cache) {
100 +               kmem_cache_destroy(bbr_io_buf_cache);
101 +               bbr_io_buf_cache = NULL;
102 +       }
103 +       if (bbr_remap_pool) {
104 +               mempool_destroy(bbr_remap_pool);
105 +               bbr_remap_pool = NULL;
106 +       }
107 +       if (bbr_remap_cache) {
108 +               kmem_cache_destroy(bbr_remap_cache);
109 +               bbr_remap_cache = NULL;
110 +       }
111 +}
112 +
113 +/**
114 + * create_pools
115 + *
116 + * Create mempools for the remap list and I/O anchors.
117 + **/
118 +static int create_pools(void)
119 +{
120 +       if (!bbr_remap_cache) {
121 +               bbr_remap_cache = kmem_cache_create("BBR_Remap_Cache",
122 +                                                   sizeof(struct bbr_runtime_remap),
123 +                                                   0, SLAB_HWCACHE_ALIGN,
124 +                                                   NULL, NULL);
125 +               if (!bbr_remap_cache) {
126 +                       DMERR("Unable to create BBR remap cache.");
127 +                       goto out;
128 +               }
129 +       }
130 +       if (!bbr_remap_pool) {
131 +               bbr_remap_pool = mempool_create(64, mempool_alloc_slab,
132 +                                               mempool_free_slab,
133 +                                               bbr_remap_cache);
134 +               if (!bbr_remap_pool) {
135 +                       DMERR("Unable to create BBR remap mempool.");
136 +                       goto out;
137 +               }
138 +       }
139 +
140 +       if (!bbr_io_buf_cache) {
141 +               bbr_io_buf_cache = kmem_cache_create("BBR_IO_Buf_Cache",
142 +                                                    sizeof(struct bbr_io_buffer),
143 +                                                    0, SLAB_HWCACHE_ALIGN,
144 +                                                    NULL, NULL);
145 +               if (!bbr_io_buf_cache) {
146 +                       DMERR("Unable to create BBR I/O buffer cache.");
147 +                       goto out;
148 +               }
149 +       }
150 +       if (!bbr_io_buf_pool) {
151 +               bbr_io_buf_pool = mempool_create(256, mempool_alloc_slab,
152 +                                                mempool_free_slab,
153 +                                                bbr_io_buf_cache);
154 +               if (!bbr_io_buf_pool) {
155 +                       DMERR("Unable to create BBR I/O buffer mempool.");
156 +                       goto out;
157 +               }
158 +       }
159 +
160 +out:
161 +       if (!bbr_remap_cache  || !bbr_remap_pool ||
162 +           !bbr_io_buf_cache || !bbr_io_buf_pool ) {
163 +               destroy_pools();
164 +               return -ENOMEM;
165 +       }
166 +
167 +       return 0;
168 +}
169 +
170 +/**
171 + * stop_io_thread
172 + *
173 + * Use the dm-daemon services to stop the BBR I/O thread.
174 + **/
175 +static void stop_io_thread(void)
176 +{
177 +       if (bbr_io_thread) {
178 +               dm_daemon_stop(bbr_io_thread);
179 +               kfree(bbr_io_thread);
180 +               bbr_io_thread = NULL;
181 +       }
182 +}
183 +
184 +/**
185 + * stop_io_thread
186 + *
187 + * Use the dm-daemon services to start the BBR I/O thread.
188 + **/
189 +static int start_io_thread(void)
190 +{
191 +       int rc;
192 +
193 +       if (!bbr_io_thread) {
194 +               bbr_io_thread = kmalloc(sizeof(*bbr_io_thread), GFP_KERNEL);
195 +               if (!bbr_io_thread) {
196 +                       return -ENOMEM;
197 +               }
198 +
199 +               rc = dm_daemon_start(bbr_io_thread, "bbr_io", bbr_io_handler);
200 +               if (rc) {
201 +                       kfree(bbr_io_thread);
202 +                       return rc;
203 +               }
204 +       }
205 +
206 +       return 0;
207 +}
208 +
209 +/**
210 + * bbr_global_init
211 + *
212 + * Set up the mempools, I/O thread, and sync-I/O service. This should
213 + * be called only when the first bbr device is created.
214 + **/
215 +static int bbr_global_init(void)
216 +{
217 +       int rc;
218 +
219 +       rc = create_pools();
220 +       if (rc) {
221 +               goto out;
222 +       }
223 +
224 +       rc = start_io_thread();
225 +       if (rc) {
226 +               destroy_pools();
227 +               goto out;
228 +       }
229 +
230 +       rc = dm_io_get(1);
231 +       if (rc) {
232 +               destroy_pools();
233 +               stop_io_thread();
234 +               goto out;
235 +       }
236 +
237 +out:
238 +       return rc;
239 +}
240 +
241 +/**
242 + * bbr_global_cleanup
243 + *
244 + * Cleanup the mempools, I/O thread and sync-I/O service. This should
245 + * be called only when the last bbr device is removed.
246 + **/
247 +static void bbr_global_cleanup(void)
248 +{
249 +       destroy_pools();
250 +       stop_io_thread();
251 +       dm_io_put(1);
252 +}
253 +
254 +static struct bbr_private * bbr_alloc_private(void)
255 +{
256 +       struct bbr_private * bbr_id;
257 +
258 +       bbr_id = kmalloc(sizeof(*bbr_id), GFP_KERNEL);
259 +       if (bbr_id) {
260 +               memset(bbr_id, 0, sizeof(*bbr_id));
261 +               bbr_id->in_use_replacement_blks = (atomic_t)ATOMIC_INIT(0);
262 +               bbr_id->bbr_id_lock = SPIN_LOCK_UNLOCKED;
263 +       }
264 +       
265 +       return bbr_id;
266 +}
267 +
268 +static void bbr_free_private(struct bbr_private * bbr_id)
269 +{
270 +       if (bbr_id->bbr_table) {
271 +               kfree(bbr_id->bbr_table);
272 +       }
273 +       bbr_free_remap(bbr_id);
274 +       kfree(bbr_id);
275 +}
276 +
277 +static u32 crc_table[256];
278 +static u32 crc_table_built = 0;
279 +
280 +static void build_crc_table(void)
281 +{
282 +       u32 i, j, crc;
283 +
284 +       for (i = 0; i <= 255; i++) {
285 +               crc = i;
286 +               for (j = 8; j > 0; j--) {
287 +                       if (crc & 1)
288 +                               crc = (crc >> 1) ^ CRC_POLYNOMIAL;
289 +                       else
290 +                               crc >>= 1;
291 +               }
292 +               crc_table[i] = crc;
293 +       }
294 +       crc_table_built = 1;
295 +}
296 +
297 +static u32 calculate_crc(u32 crc, void * buffer, u32 buffersize)
298 +{
299 +       unsigned char * current_byte;
300 +       u32 temp1, temp2, i;
301 +
302 +       current_byte = (unsigned char *) buffer;
303 +       /* Make sure the crc table is available */
304 +       if (!crc_table_built)
305 +               build_crc_table();
306 +       /* Process each byte in the buffer. */
307 +       for (i = 0; i < buffersize; i++) {
308 +               temp1 = (crc >> 8) & 0x00FFFFFF;
309 +               temp2 = crc_table[(crc ^ (u32) * current_byte) &
310 +                                 (u32) 0xff];
311 +               current_byte++;
312 +               crc = temp1 ^ temp2;
313 +       }
314 +       return crc;
315 +}
316 +
317 +/**
318 + * le_bbr_table_sector_to_cpu
319 + *
320 + * Convert bbr meta data from on-disk (LE) format
321 + * to the native cpu endian format.
322 + **/
323 +static void le_bbr_table_sector_to_cpu(struct bbr_table * p)
324 +{
325 +       int i;
326 +       p->signature            = le32_to_cpup(&p->signature);
327 +       p->crc                  = le32_to_cpup(&p->crc);
328 +       p->sequence_number      = le32_to_cpup(&p->sequence_number);
329 +       p->in_use_cnt           = le32_to_cpup(&p->in_use_cnt);
330 +       for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
331 +               p->entries[i].bad_sect =
332 +                       le64_to_cpup(&p->entries[i].bad_sect);
333 +               p->entries[i].replacement_sect =
334 +                       le64_to_cpup(&p->entries[i].replacement_sect);
335 +       }
336 +}
337 +
338 +/**
339 + * cpu_bbr_table_sector_to_le
340 + *
341 + * Convert bbr meta data from cpu endian format to on-disk (LE) format
342 + **/
343 +static void cpu_bbr_table_sector_to_le(struct bbr_table * p,
344 +                                      struct bbr_table * le)
345 +{
346 +       int i;
347 +       le->signature           = cpu_to_le32p(&p->signature);
348 +       le->crc                 = cpu_to_le32p(&p->crc);
349 +       le->sequence_number     = cpu_to_le32p(&p->sequence_number);
350 +       le->in_use_cnt          = cpu_to_le32p(&p->in_use_cnt);
351 +       for (i = 0; i < BBR_ENTRIES_PER_SECT; i++) {
352 +               le->entries[i].bad_sect =
353 +                       cpu_to_le64p(&p->entries[i].bad_sect);
354 +               le->entries[i].replacement_sect =
355 +                       cpu_to_le64p(&p->entries[i].replacement_sect);
356 +       }
357 +}
358 +
359 +/**
360 + * validate_bbr_table_sector
361 + *
362 + * Check the specified BBR table sector for a valid signature and CRC. If it's
363 + * valid, endian-convert the table sector.
364 + **/
365 +static int validate_bbr_table_sector(struct bbr_table * p)
366 +{
367 +       int rc = 0;
368 +       int org_crc, final_crc;
369 +
370 +       if (le32_to_cpup(&p->signature) != BBR_TABLE_SIGNATURE) {
371 +               DMERR("BBR table signature doesn't match!");
372 +               DMERR("Found 0x%x. Expecting 0x%x",
373 +                     le32_to_cpup(&p->signature), BBR_TABLE_SIGNATURE);
374 +               rc = -EINVAL;
375 +               goto out;
376 +       }
377 +
378 +       if (!p->crc) {
379 +               DMERR("BBR table sector has no CRC!");
380 +               rc = -EINVAL;
381 +               goto out;
382 +       }
383 +
384 +       org_crc = le32_to_cpup(&p->crc);
385 +       p->crc = 0;
386 +       final_crc = calculate_crc(INITIAL_CRC, (void *)p, sizeof(*p));
387 +       if (final_crc != org_crc) {
388 +               DMERR("CRC failed!");
389 +               DMERR("Found 0x%x. Expecting 0x%x",
390 +                     org_crc, final_crc);
391 +               rc = -EINVAL;
392 +               goto out;
393 +       }
394 +
395 +       p->crc = cpu_to_le32p(&org_crc);
396 +       le_bbr_table_sector_to_cpu(p);
397 +
398 +out:
399 +       return rc;
400 +}
401 +
402 +/**
403 + * bbr_binary_tree_insert
404 + *
405 + * Insert a node into the binary tree.
406 + **/
407 +static void bbr_binary_tree_insert(struct bbr_runtime_remap ** root,
408 +                                  struct bbr_runtime_remap * newnode)
409 +{
410 +       struct bbr_runtime_remap ** node = root;
411 +       while (node && *node) {
412 +               if (newnode->remap.bad_sect > (*node)->remap.bad_sect) {
413 +                       node = &((*node)->right);
414 +               } else {
415 +                       node = &((*node)->left);
416 +               }
417 +       }
418 +       
419 +       newnode->left = newnode->right = NULL;
420 +       *node = newnode;
421 +}
422 +
423 +/**
424 + * bbr_binary_search
425 + *
426 + * Search for a node that contains bad_sect == lsn.
427 + **/
428 +static struct bbr_runtime_remap * bbr_binary_search(
429 +       struct bbr_runtime_remap * root,
430 +       u64 lsn)
431 +{
432 +       struct bbr_runtime_remap * node = root;
433 +       while (node) {
434 +               if (node->remap.bad_sect == lsn) {
435 +                       break;
436 +               }
437 +               if (lsn > node->remap.bad_sect) {
438 +                       node = node->right;
439 +               } else {
440 +                       node = node->left;
441 +               }
442 +       }
443 +       return node;
444 +}
445 +
446 +/**
447 + * bbr_binary_tree_destroy
448 + *
449 + * Destroy the binary tree.
450 + **/
451 +static void bbr_binary_tree_destroy(struct bbr_runtime_remap * root,
452 +                                   struct bbr_private * bbr_id)
453 +{
454 +       struct bbr_runtime_remap ** link = NULL;
455 +       struct bbr_runtime_remap * node = root;
456 +
457 +       while (node) {
458 +               if (node->left) {
459 +                       link = &(node->left);
460 +                       node = node->left;
461 +                       continue;
462 +               }
463 +               if (node->right) {
464 +                       link = &(node->right);
465 +                       node = node->right;
466 +                       continue;
467 +               }
468 +
469 +               mempool_free(node, bbr_remap_pool);
470 +               if (node == root) {
471 +                       /* If root is deleted, we're done. */
472 +                       break;
473 +               }
474 +
475 +               /* Back to root. */
476 +               node = root;
477 +               *link = NULL;
478 +       }
479 +}
480 +
481 +static void bbr_free_remap(struct bbr_private * bbr_id)
482 +{
483 +       spin_lock_irq(&bbr_id->bbr_id_lock);   
484 +       bbr_binary_tree_destroy(bbr_id->remap_root, bbr_id);
485 +       bbr_id->remap_root = NULL;
486 +       spin_unlock_irq(&bbr_id->bbr_id_lock);
487 +}
488 +
489 +/**
490 + * bbr_insert_remap_entry
491 + *
492 + * Create a new remap entry and add it to the binary tree for this node.
493 + **/
494 +static int bbr_insert_remap_entry(struct bbr_private * bbr_id,
495 +                                 struct bbr_table_entry * new_bbr_entry)
496 +{
497 +       struct bbr_runtime_remap * newnode;
498 +
499 +       newnode = mempool_alloc(bbr_remap_pool, GFP_NOIO);
500 +       if (!newnode) {
501 +               DMERR("Could not allocate from remap mempool!");
502 +               return -ENOMEM;
503 +       }
504 +       newnode->remap.bad_sect  = new_bbr_entry->bad_sect;
505 +       newnode->remap.replacement_sect = new_bbr_entry->replacement_sect;
506 +       spin_lock_irq(&bbr_id->bbr_id_lock);
507 +       bbr_binary_tree_insert(&bbr_id->remap_root, newnode);
508 +       spin_unlock_irq(&bbr_id->bbr_id_lock);
509 +       return 0;
510 +}
511 +
512 +/**
513 + * bbr_table_to_remap_list
514 + *
515 + * The on-disk bbr table is sorted by the replacement sector LBA. In order to
516 + * improve run time performance, the in memory remap list must be sorted by
517 + * the bad sector LBA. This function is called at discovery time to initialize
518 + * the remap list. This function assumes that at least one copy of meta data
519 + * is valid.
520 + **/
521 +static u32 bbr_table_to_remap_list(struct bbr_private * bbr_id)
522 +{
523 +       u32 in_use_blks = 0;
524 +       int i, j;
525 +       struct bbr_table * p;
526 +       
527 +
528 +       for (i = 0, p = bbr_id->bbr_table;
529 +            i < bbr_id->nr_sects_bbr_table;
530 +            i++, p++ ) {
531 +               if (!p->in_use_cnt) {
532 +                       break;
533 +               }
534 +               in_use_blks += p->in_use_cnt;
535 +               for (j = 0; j < p->in_use_cnt; j++) {
536 +                       bbr_insert_remap_entry(bbr_id, &p->entries[j]);
537 +               }
538 +       }
539 +       if (in_use_blks)
540 +               DMWARN("There are %u BBR entries for device %u:%u",
541 +                      in_use_blks, MAJOR(bbr_id->dev->dev),
542 +                      MINOR(bbr_id->dev->dev));
543 +
544 +       return in_use_blks;
545 +}
546 +
547 +/**
548 + * bbr_search_remap_entry
549 + *
550 + * Search remap entry for the specified sector. If found, return a pointer to
551 + * the table entry. Otherwise, return NULL.
552 + **/
553 +static struct bbr_table_entry * bbr_search_remap_entry(
554 +       struct bbr_private * bbr_id,
555 +       u64 lsn)
556 +{
557 +       struct bbr_runtime_remap * p;
558 +
559 +       spin_lock_irq(&bbr_id->bbr_id_lock);
560 +       p = bbr_binary_search(bbr_id->remap_root, lsn);
561 +       spin_unlock_irq(&bbr_id->bbr_id_lock);
562 +       if (p) {
563 +               return (&p->remap);
564 +       } else {
565 +               return NULL;
566 +       }
567 +}
568 +
569 +/**
570 + * bbr_remap
571 + *
572 + * If *lsn is in the remap table, return TRUE and modify *lsn,
573 + * else, return FALSE.
574 + **/
575 +static inline int bbr_remap(struct bbr_private * bbr_id,
576 +                           u64 * lsn)
577 +{
578 +       struct bbr_table_entry * e;
579 +
580 +       if (atomic_read(&bbr_id->in_use_replacement_blks)) {
581 +               e = bbr_search_remap_entry(bbr_id, *lsn);
582 +               if (e) {
583 +                       *lsn = e->replacement_sect;
584 +                       return 1;
585 +               }
586 +       }
587 +       return 0;
588 +}
589 +
590 +/**
591 + * bbr_remap_probe
592 + *
593 + * If any of the sectors in the range [lsn, lsn+nr_sects] are in the remap
594 + * table return TRUE, Else, return FALSE.
595 + **/
596 +static inline int bbr_remap_probe(struct bbr_private * bbr_id,
597 +                                 u64 lsn, u64 nr_sects)
598 +{
599 +       u64 tmp, cnt;
600 +
601 +       if (atomic_read(&bbr_id->in_use_replacement_blks)) {
602 +               for (cnt = 0, tmp = lsn;
603 +                    cnt < nr_sects;
604 +                    cnt += bbr_id->blksize_in_sects, tmp = lsn + cnt) {
605 +                       if (bbr_remap(bbr_id,&tmp)) {
606 +                               return 1;
607 +                       }
608 +               }
609 +       }
610 +       return 0;
611 +}
612 +
613 +/**
614 + * bbr_setup
615 + *
616 + * Read the remap tables from disk and set up the initial remap tree.
617 + **/
618 +static int bbr_setup(struct bbr_private * bbr_id)
619 +{
620 +       struct bbr_table * table = bbr_id->bbr_table;
621 +       struct page * page;
622 +       struct io_region job;
623 +       unsigned int error, offset;
624 +       int i, rc = 0;
625 +
626 +       job.dev = bbr_id->dev->dev;
627 +       job.count = 1;
628 +
629 +       /* Read and verify each BBR table sector individually. */
630 +       for (i = 0; i < bbr_id->nr_sects_bbr_table; i++, table++) {
631 +               job.sector = bbr_id->lba_table1 + i;
632 +               page = virt_to_page(table);
633 +               offset = (unsigned long)table & ~PAGE_MASK;
634 +               rc = dm_io_sync(1, &job, READ, page, offset, &error);
635 +               if (rc && bbr_id->lba_table2) {
636 +                       job.sector = bbr_id->lba_table2 + i;
637 +                       rc = dm_io_sync(1, &job, READ, page, offset, &error);
638 +               }
639 +               if (rc) {
640 +                       goto out;
641 +               }
642 +
643 +               rc = validate_bbr_table_sector(table);
644 +               if (rc) {
645 +                       goto out;
646 +               }
647 +       }
648 +       atomic_set(&bbr_id->in_use_replacement_blks,
649 +                  bbr_table_to_remap_list(bbr_id));
650 +
651 +out:
652 +       if (rc) {
653 +               DMERR("dm-bbr: error during device setup: %d", rc);
654 +       }
655 +       return rc;
656 +}
657 +
658 +static struct bbr_io_buffer * allocate_bbr_io_buf(struct bbr_private * bbr_id,
659 +                                                 struct buffer_head * bh,
660 +                                                 int rw)
661 +{
662 +       struct bbr_io_buffer * bbr_io_buf;
663 +
664 +       bbr_io_buf = mempool_alloc(bbr_io_buf_pool, GFP_NOIO);
665 +       if (bbr_io_buf) {
666 +               memset(bbr_io_buf, 0, sizeof(struct bbr_io_buffer));
667 +               INIT_LIST_HEAD(&bbr_io_buf->bbr_io_list);
668 +               bbr_io_buf->bbr_id = bbr_id;
669 +               bbr_io_buf->sector = bh->b_rsector;
670 +               bbr_io_buf->bh = bh;
671 +               bbr_io_buf->rw = rw;
672 +       } else {
673 +               DMWARN("Could not allocate from BBR I/O buffer pool!");
674 +       }
675 +       return bbr_io_buf;
676 +}
677 +
678 +static void free_bbr_io_buf(struct bbr_io_buffer * bbr_io_buf)
679 +{
680 +       mempool_free(bbr_io_buf, bbr_io_buf_pool);
681 +}
682 +
683 +/**
684 + * bbr_io_remap_error
685 + * @bbr_id:            Private data for the BBR node.
686 + * @rw:                        READ or WRITE.
687 + * @starting_lsn:      Starting sector of request to remap.
688 + * @count:             Number of sectors in the request.
689 + * @buffer:            Data buffer for the request.
690 + *
691 + * For the requested range, try to write each sector individually. For each
692 + * sector that fails, find the next available remap location and write the
693 + * data to that new location. Then update the table and write both copies
694 + * of the table to disk. Finally, update the in-memory mapping and do any
695 + * other necessary bookkeeping.
696 + **/
697 +static int bbr_io_remap_error(struct bbr_private * bbr_id,
698 +                             int rw,
699 +                             u64 starting_lsn,
700 +                             u64 count,
701 +                             char * buffer)
702 +{
703 +       struct bbr_table * bbr_table;
704 +       struct io_region job;
705 +       struct page * page;
706 +       unsigned long table_sector_index;
707 +       unsigned long table_sector_offset;
708 +       unsigned long index;
709 +       unsigned int offset_in_page, error;
710 +       u64 lsn, new_lsn;
711 +       int rc;
712 +
713 +       if (rw == READ) {
714 +               /* Nothing can be done about read errors. */
715 +               return -EIO;
716 +       }
717 +
718 +       job.dev = bbr_id->dev->dev;
719 +
720 +       /* For each sector in the request. */
721 +       for (lsn = 0; lsn < count; lsn++, buffer += SECTOR_SIZE) {
722 +               job.sector = starting_lsn + lsn;
723 +               job.count = 1;
724 +               page = virt_to_page(buffer);
725 +               offset_in_page = (unsigned long)buffer & ~PAGE_MASK;
726 +               rc = dm_io_sync(1, &job, rw, page, offset_in_page, &error);
727 +               while (rc) {
728 +                       /* Find the next available relocation sector. */
729 +                       new_lsn = atomic_read(&bbr_id->in_use_replacement_blks);
730 +                       if (new_lsn >= bbr_id->nr_replacement_blks) {
731 +                               /* No more replacement sectors available. */
732 +                               return -EIO;
733 +                       }
734 +                       new_lsn += bbr_id->start_replacement_sect;
735 +
736 +                       /* Write the data to its new location. */
737 +                       DMWARN("dm-bbr: device %u:%u: Trying to remap bad sector "PFU64" to sector "PFU64,
738 +                              MAJOR(bbr_id->dev->dev), MINOR(bbr_id->dev->dev),
739 +                              starting_lsn + lsn, new_lsn);
740 +                       job.sector = new_lsn;
741 +                       rc = dm_io_sync(1, &job, rw, page, offset_in_page, &error);
742 +                       if (rc) {
743 +                               /* This replacement sector is bad.
744 +                                * Try the next one.
745 +                                */
746 +                               DMERR("dm-bbr: device %u:%u: replacement sector "PFU64" is bad. Skipping.",
747 +                                     MAJOR(bbr_id->dev->dev), MINOR(bbr_id->dev->dev), new_lsn);
748 +                               atomic_inc(&bbr_id->in_use_replacement_blks);
749 +                               continue;
750 +                       }
751 +
752 +                       /* Add this new entry to the on-disk table. */
753 +                       table_sector_index = new_lsn -
754 +                                            bbr_id->start_replacement_sect;
755 +                       table_sector_offset = table_sector_index /
756 +                                             BBR_ENTRIES_PER_SECT;
757 +                       index = table_sector_index % BBR_ENTRIES_PER_SECT;
758 +
759 +                       bbr_table = &bbr_id->bbr_table[table_sector_offset];
760 +                       bbr_table->entries[index].bad_sect = starting_lsn + lsn;
761 +                       bbr_table->entries[index].replacement_sect = new_lsn;
762 +                       bbr_table->in_use_cnt++;
763 +                       bbr_table->sequence_number++;
764 +                       bbr_table->crc = 0;
765 +                       bbr_table->crc = calculate_crc(INITIAL_CRC,
766 +                                                      bbr_table,
767 +                                                      sizeof(struct bbr_table));
768 +
769 +                       /* Write the table to disk. */
770 +                       cpu_bbr_table_sector_to_le(bbr_table, bbr_table);
771 +                       page = virt_to_page(bbr_table);
772 +                       offset_in_page = (unsigned long)bbr_table & ~PAGE_MASK;
773 +                       if (bbr_id->lba_table1) {
774 +                               job.sector = bbr_id->lba_table1 + table_sector_offset;
775 +                               job.count = 1;
776 +                               rc = dm_io_sync(1, &job, WRITE, page, offset_in_page, &error);
777 +                       }
778 +                       if (bbr_id->lba_table2) {
779 +                               job.sector = bbr_id->lba_table2 + table_sector_offset;
780 +                               rc |= dm_io_sync(1, &job, WRITE, page, offset_in_page, &error);
781 +                       }
782 +                       le_bbr_table_sector_to_cpu(bbr_table);
783 +
784 +                       if (rc) {
785 +                               /* Error writing one of the tables to disk. */
786 +                               DMERR("dm-bbr: device %u:%u: error updating BBR tables on disk.",
787 +                                     MAJOR(bbr_id->dev->dev), MINOR(bbr_id->dev->dev));
788 +                               return rc;
789 +                       }
790 +
791 +                       /* Insert a new entry in the remapping binary-tree. */
792 +                       rc = bbr_insert_remap_entry(bbr_id,
793 +                                                   &bbr_table->entries[index]);
794 +                       if (rc) {
795 +                               DMERR("dm-bbr: device %u:%u: error adding new entry to remap tree.",
796 +                                     MAJOR(bbr_id->dev->dev), MINOR(bbr_id->dev->dev));
797 +                               return rc;
798 +                       }
799 +
800 +                       atomic_inc(&bbr_id->in_use_replacement_blks);
801 +               }
802 +       }
803 +
804 +       return 0;
805 +}
806 +
807 +/**
808 + * bbr_io_process_request
809 + *
810 + * For each sector in this request, check if the sector has already
811 + * been remapped. If so, process all previous sectors in the request,
812 + * followed by the remapped sector. Then reset the starting lsn and
813 + * count, and keep going with the rest of the request as if it were
814 + * a whole new request. If any of the sync_io's return an error,
815 + * call the remapper to relocate the bad sector(s).
816 + **/
817 +static int bbr_io_process_request(struct bbr_io_buffer * bbr_io_buf)
818 +{
819 +       struct bbr_private * bbr_id = bbr_io_buf->bbr_id;
820 +       struct io_region job;
821 +       u64 starting_lsn = bbr_io_buf->sector;
822 +       u64 count = bbr_io_buf->bh->b_size >> SECTOR_SHIFT;
823 +       u64 lsn, remapped_lsn;
824 +       char * buffer = bbr_io_buf->bh->b_data;
825 +       struct page * page = virt_to_page(buffer);
826 +       unsigned int offset_in_page = (unsigned long)buffer & ~PAGE_MASK;
827 +       unsigned int error;
828 +       int rw = bbr_io_buf->rw;
829 +       int rc = 0;
830 +
831 +       job.dev = bbr_id->dev->dev;
832 +
833 +       /* For each sector in this request, check if this sector has already
834 +        * been remapped. If so, process all previous sectors in this request,
835 +        * followed by the remapped sector. Then reset the starting lsn and
836 +        * count and keep going with the rest of the request as if it were
837 +        * a whole new request.
838 +        */
839 +       for (lsn = 0; lsn < count; lsn++) {
840 +               remapped_lsn = starting_lsn + lsn;
841 +               rc = bbr_remap(bbr_id, &remapped_lsn);
842 +               if (!rc) {
843 +                       /* This sector is fine. */
844 +                       continue;
845 +               }
846 +
847 +               /* Process all sectors in the request up to this one. */
848 +               if (lsn > 0) {
849 +                       job.sector = starting_lsn;
850 +                       job.count = lsn;
851 +                       rc = dm_io_sync(1, &job, rw, page, offset_in_page, &error);
852 +                       if (rc) {
853 +                               /* If this I/O failed, then one of the sectors
854 +                                * in this request needs to be relocated.
855 +                                */
856 +                               rc = bbr_io_remap_error(bbr_id, bbr_io_buf->rw, starting_lsn,
857 +                                                       lsn, buffer);
858 +                               if (rc) {
859 +                                       return rc;
860 +                               }
861 +                       }
862 +                       buffer += (lsn << SECTOR_SHIFT);
863 +                       page = virt_to_page(buffer);
864 +                       offset_in_page = (unsigned long)buffer & ~PAGE_MASK;
865 +               }
866 +
867 +               /* Process the remapped sector. */
868 +               job.sector = remapped_lsn;
869 +               job.count = 1;
870 +               rc = dm_io_sync(1, &job, rw, page, offset_in_page, &error);
871 +               if (rc) {
872 +                       /* BUGBUG - Need more processing if this caused an
873 +                        * an error. If this I/O failed, then the existing
874 +                        * remap is now bad, and we need to find a new remap.
875 +                        * Can't use bbr_io_remap_error(), because the existing
876 +                        * map entry needs to be changed, not added again, and
877 +                        * the original table entry also needs to be changed.
878 +                        */
879 +                       return rc;
880 +               }
881 +
882 +               buffer          += SECTOR_SIZE;
883 +               starting_lsn    += (lsn + 1);
884 +               count           -= (lsn + 1);
885 +               lsn             = -1;
886 +               page            = virt_to_page(buffer);
887 +               offset_in_page  = (unsigned long)buffer & ~PAGE_MASK;
888 +       }
889 +
890 +       /* Check for any remaining sectors after the last split. This could
891 +        * potentially be the whole request, but that should be a rare case
892 +        * because requests should only be processed by the thread if we know
893 +        * an error occurred or they contained one or more remapped sectors.
894 +        */
895 +       if (count) {
896 +               job.sector = starting_lsn;
897 +               job.count = count;
898 +               rc = dm_io_sync(1, &job, rw, page, offset_in_page, &error);
899 +               if (rc) {
900 +                       /* If this I/O failed, then one of the sectors in this
901 +                        * request needs to be relocated.
902 +                        */
903 +                       rc = bbr_io_remap_error(bbr_id, bbr_io_buf->rw, starting_lsn,
904 +                                               count, buffer);
905 +                       if (rc) {
906 +                               return rc;
907 +                       }
908 +               }
909 +       }
910 +
911 +       return 0;
912 +}
913 +
914 +/**
915 + * bbr_io_handler
916 + *
917 + * This is the handler for the bbr_io_thread. It continuously loops,
918 + * taking I/O requests off its list and processing them. If nothing
919 + * is on the list, the thread goes back to sleep until specifically
920 + * woken up.
921 + *
922 + * I/O requests should only be sent to this thread if we know that:
923 + * a) the request contains at least one remapped sector.
924 + *   or
925 + * b) the request caused an error on the normal I/O path.
926 + * This function uses synchronous I/O, so sending a request to this
927 + * thread that doesn't need special processing will cause severe
928 + * performance degredation.
929 + **/
930 +static void bbr_io_handler(void)
931 +{
932 +       struct bbr_io_buffer * bbr_io_buf;
933 +       struct buffer_head * bh;
934 +       unsigned long flags;
935 +       int rc;
936 +
937 +       while (1) {
938 +               /* Process bbr_io_list, one entry at a time. */
939 +               spin_lock_irqsave(&bbr_io_list_lock, flags);
940 +               if (list_empty(&bbr_io_list)) {
941 +                       /* No more items on the list. */
942 +                       spin_unlock_irqrestore(&bbr_io_list_lock, flags);
943 +                       break;
944 +               }
945 +               bbr_io_buf = list_entry(bbr_io_list.next,
946 +                                       struct bbr_io_buffer, bbr_io_list);
947 +               list_del_init(&bbr_io_buf->bbr_io_list);
948 +               spin_unlock_irqrestore(&bbr_io_list_lock, flags);
949 +
950 +               rc = bbr_io_process_request(bbr_io_buf);
951 +
952 +               /* Clean up and complete the original I/O. */
953 +               bbr_io_buf->flags |= BBR_IO_HANDLED;
954 +               bh = bbr_io_buf->bh;
955 +               if (bh->b_end_io) {
956 +                       /* If this was the bbr_io_buf for an error on the
957 +                        * normal WRITE, don't free it here. It will be
958 +                        * freed later in bbr_callback()
959 +                        */
960 +                       if (!(bbr_io_buf->flags & BBR_IO_RELOCATE))
961 +                               free_bbr_io_buf(bbr_io_buf);
962 +                       bh->b_end_io(bh, rc ? 0 : 1);
963 +               }
964 +       }
965 +}
966 +
967 +/**
968 + * bbr_schedule_io
969 + *
970 + * Place the specified bbr_io_buf on the thread's processing list.
971 + **/
972 +static void bbr_schedule_io(struct bbr_io_buffer * bbr_io_buf)
973 +{
974 +       unsigned long flags;
975 +       spin_lock_irqsave(&bbr_io_list_lock, flags);
976 +       list_add_tail(&bbr_io_buf->bbr_io_list, &bbr_io_list);
977 +       spin_unlock_irqrestore(&bbr_io_list_lock, flags);
978 +       dm_daemon_wake(bbr_io_thread);
979 +}
980 +
981 +/**
982 + * bbr_read
983 + *
984 + * If there are any remapped sectors on this object, send this request over
985 + * to the thread for processing. Otherwise send it down the stack normally.
986 + **/
987 +static int bbr_read(struct bbr_private * bbr_id,
988 +                   struct buffer_head * bh)
989 +{
990 +       struct bbr_io_buffer * bbr_io_buf;
991 +
992 +
993 +       if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
994 +           !bbr_remap_probe(bbr_id, bh->b_rsector,
995 +                            bh->b_size >> SECTOR_SHIFT)) {
996 +               /* No existing remaps or this request doesn't
997 +                * contain any remapped sectors.
998 +                */
999 +               bh->b_rdev = bbr_id->dev->dev;
1000 +               return 1;
1001 +       }
1002 +
1003 +       /* This request has at least one remapped sector. */
1004 +       bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, READ);
1005 +       if (!bbr_io_buf) {
1006 +               /* Can't get memory to track the I/O. */
1007 +               bh->b_end_io(bh, 0);
1008 +               return -ENOMEM;
1009 +       }
1010 +
1011 +       bbr_schedule_io(bbr_io_buf);
1012 +       return 0;
1013 +}
1014 +
1015 +/**
1016 + * bbr_callback
1017 + *
1018 + * This is the callback for normal write requests. Check for an error
1019 + * during the I/O, and send to the thread for processing if necessary.
1020 + **/
1021 +static int bbr_callback(struct dm_target * ti,
1022 +                       struct buffer_head * bh,
1023 +                       int rw,
1024 +                       int error,
1025 +                       union map_info * map_context)
1026 +{
1027 +       struct bbr_io_buffer * bbr_io_buf = (struct bbr_io_buffer *) map_context->ptr;
1028 +
1029 +       if (!bbr_io_buf)
1030 +               return error;
1031 +
1032 +       /* Will try to relocate the WRITE if:
1033 +        * - It is an error, and
1034 +        * - It is not an error of BBR relocation, and
1035 +        */
1036 +       if (error && !(bbr_io_buf->flags & BBR_IO_HANDLED)) {
1037 +               DMERR("dm-bbr: device %u:%u: Write failure on sector %lu. Scheduling for retry.",
1038 +                     MAJOR(bh->b_rdev), MINOR(bh->b_rdev),
1039 +                     (unsigned long)bbr_io_buf->sector);
1040 +               /* Indicate this bbr_io_buf is for an error on normal WRITE */
1041 +               bbr_io_buf->flags |= BBR_IO_RELOCATE;
1042 +               bbr_schedule_io(bbr_io_buf);
1043 +               /* Returns >0 so that DM will let us retry the I/O */
1044 +               return 1;
1045 +       }
1046 +
1047 +       free_bbr_io_buf(bbr_io_buf);
1048 +       return error;
1049 +}
1050 +
1051 +/**
1052 + * bbr_write
1053 + *
1054 + * If there are any remapped sectors on this object, send the request over
1055 + * to the thread for processing. Otherwise, register for callback
1056 + * notification, and send the request down normally.
1057 + **/
1058 +static int bbr_write(struct bbr_private * bbr_id,
1059 +                    struct buffer_head * bh,
1060 +                    union map_info * map_context)
1061 +{
1062 +       struct bbr_io_buffer * bbr_io_buf;
1063 +
1064 +       bbr_io_buf = allocate_bbr_io_buf(bbr_id, bh, WRITE);
1065 +       if (!bbr_io_buf) {
1066 +               /* Can't get memory to track the I/O. */
1067 +               bh->b_end_io(bh, 0);
1068 +               return -ENOMEM;
1069 +       }
1070 +
1071 +       if (atomic_read(&bbr_id->in_use_replacement_blks) == 0 ||
1072 +           !bbr_remap_probe(bbr_id, bh->b_rsector,
1073 +                            bh->b_size >> SECTOR_SHIFT)) {
1074 +               /* No existing remaps or this request
1075 +                * contains no remapped sectors.
1076 +                */
1077 +               bh->b_rdev = bbr_id->dev->dev;
1078 +               map_context->ptr = bbr_io_buf;
1079 +               return 1;
1080 +       } else {
1081 +               /* This request contains at least one remapped sector. */
1082 +               map_context->ptr = NULL;
1083 +               bbr_schedule_io(bbr_io_buf);
1084 +       }
1085 +       return 0;
1086 +}
1087 +
1088 +/**
1089 + * Construct a bbr mapping
1090 + **/
1091 +static int bbr_ctr(struct dm_target * ti, unsigned int argc, char ** argv)
1092 +{
1093 +       struct bbr_private * bbr_id;
1094 +       u32 block_size;
1095 +       char * end;
1096 +       int rc = -EINVAL;
1097 +
1098 +       if (argc != 8) {
1099 +               ti->error = "dm-bbr requires exactly 8 arguments: "
1100 +                           "device offset table1_lsn table2_lsn table_size start_replacement nr_replacement_blks block_size";
1101 +               goto out1;
1102 +       }
1103 +
1104 +       bbr_id = bbr_alloc_private();
1105 +       if (!bbr_id) {
1106 +               ti->error = "dm-bbr: Error allocating bbr private data.";
1107 +               goto out1;
1108 +       }
1109 +
1110 +       bbr_id->offset = simple_strtoull(argv[1], &end, 10);
1111 +       bbr_id->lba_table1 = simple_strtoull(argv[2], &end, 10);
1112 +       bbr_id->lba_table2 = simple_strtoull(argv[3], &end, 10);
1113 +       bbr_id->nr_sects_bbr_table = simple_strtoull(argv[4], &end, 10);
1114 +       bbr_id->start_replacement_sect = simple_strtoull(argv[5], &end, 10);
1115 +       bbr_id->nr_replacement_blks = simple_strtoull(argv[6], &end, 10);
1116 +       block_size = simple_strtoul(argv[7], &end, 10);
1117 +       bbr_id->blksize_in_sects = (block_size >> SECTOR_SHIFT);
1118 +
1119 +       bbr_id->bbr_table = kmalloc(bbr_id->nr_sects_bbr_table << SECTOR_SHIFT,
1120 +                                   GFP_KERNEL);
1121 +       if (!bbr_id->bbr_table) {
1122 +               ti->error = "dm-bbr: Error allocating bbr table.";
1123 +               goto out2;
1124 +       }
1125 +
1126 +       if (dm_get_device(ti, argv[0], 0, ti->len,
1127 +                         dm_table_get_mode(ti->table), &bbr_id->dev)) {
1128 +               ti->error = "dm-bbr: Device lookup failed";
1129 +               goto out2;
1130 +       }
1131 +
1132 +       /* Using a semaphore here is probably overkill,
1133 +        * but at least it will be correct.
1134 +        */
1135 +       down(&bbr_instances_lock);
1136 +       if (bbr_instances == 0) {
1137 +               rc = bbr_global_init();
1138 +               if (rc) {
1139 +                       up(&bbr_instances_lock);
1140 +                       goto out3;
1141 +               }
1142 +       }
1143 +       bbr_instances++;
1144 +       up(&bbr_instances_lock);
1145 +
1146 +       rc = bbr_setup(bbr_id);
1147 +       if (rc) {
1148 +               ti->error = "dm-bbr: Device setup failed";
1149 +               goto out4;
1150 +       }
1151 +
1152 +       ti->private = bbr_id;
1153 +       return 0;
1154 +
1155 +out4:
1156 +       down(&bbr_instances_lock);
1157 +       bbr_instances--;
1158 +       if (bbr_instances == 0) {
1159 +               bbr_global_cleanup();
1160 +       }
1161 +       up(&bbr_instances_lock);
1162 +
1163 +out3:
1164 +       dm_put_device(ti, bbr_id->dev);
1165 +out2:
1166 +       bbr_free_private(bbr_id);
1167 +out1:
1168 +       return rc;
1169 +}
1170 +
1171 +static void bbr_dtr(struct dm_target * ti)
1172 +{
1173 +       struct bbr_private * bbr_id = (struct bbr_private *) ti->private;
1174 +
1175 +       dm_put_device(ti, bbr_id->dev);
1176 +       bbr_free_private(bbr_id);
1177 +
1178 +       down(&bbr_instances_lock);
1179 +       bbr_instances--;
1180 +       if (bbr_instances == 0) {
1181 +               bbr_global_cleanup();
1182 +       }
1183 +       up(&bbr_instances_lock);
1184 +}
1185 +
1186 +static int bbr_map(struct dm_target * ti, struct buffer_head * bh, int rw,
1187 +                  union map_info * map_context)
1188 +{
1189 +       struct bbr_private * bbr_id = (struct bbr_private *) ti->private;
1190 +
1191 +       bh->b_rsector += bbr_id->offset;
1192 +       switch (rw) {
1193 +               case READ:
1194 +               case READA:
1195 +                       map_context->ptr = NULL;
1196 +                       return bbr_read(bbr_id, bh);
1197 +               case WRITE:
1198 +                       return bbr_write(bbr_id, bh, map_context);
1199 +               default:
1200 +                       return -EIO;
1201 +       }
1202 +}
1203 +
1204 +static int bbr_status(struct dm_target * ti, status_type_t type,
1205 +                     char * result, unsigned int maxlen)
1206 +{
1207 +       struct bbr_private * bbr_id = (struct bbr_private *) ti->private;
1208 +
1209 +       switch (type) {
1210 +       case STATUSTYPE_INFO:
1211 +               result[0] = '\0';
1212 +               break;
1213 +
1214 +       case STATUSTYPE_TABLE:
1215 +               snprintf(result, maxlen, "%s "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" "PFU64" %u",
1216 +                        dm_kdevname(bbr_id->dev->dev), bbr_id->offset,
1217 +                        bbr_id->lba_table1, bbr_id->lba_table2,
1218 +                        bbr_id->nr_sects_bbr_table,
1219 +                        bbr_id->start_replacement_sect,
1220 +                        bbr_id->nr_replacement_blks,
1221 +                        bbr_id->blksize_in_sects << SECTOR_SHIFT);
1222 +                break;
1223 +       }
1224 +       return 0;
1225 +}
1226 +
1227 +static struct target_type bbr_target = {
1228 +       name:   "bbr",
1229 +       module: THIS_MODULE,
1230 +       ctr:    bbr_ctr,
1231 +       dtr:    bbr_dtr,
1232 +       map:    bbr_map,
1233 +       end_io: bbr_callback,
1234 +       status: bbr_status,
1235 +};
1236 +
1237 +int __init dm_bbr_init(void)
1238 +{
1239 +       int r = dm_register_target(&bbr_target);
1240 +
1241 +       if (r < 0)
1242 +               DMERR("dm-bbr: register failed %d", r);
1243 +
1244 +       return r;
1245 +}
1246 +
1247 +void __exit dm_bbr_exit(void)
1248 +{
1249 +       int r = dm_unregister_target(&bbr_target);
1250 +
1251 +       if (r < 0)
1252 +               DMERR("dm-bbr: unregister failed %d", r);
1253 +}
1254 +
1255 +module_init(dm_bbr_init);
1256 +module_exit(dm_bbr_exit);
1257 +MODULE_LICENSE("GPL");
1258 diff -urN linux-2.4.22/drivers/md/dm-bbr.h linux-2.4.22-evms/drivers/md/dm-bbr.h
1259 --- linux-2.4.22/drivers/md/dm-bbr.h    1970-01-01 01:00:00.000000000 +0100
1260 +++ linux-2.4.22-evms/drivers/md/dm-bbr.h       2003-09-15 17:08:42.000000000 +0200
1261 @@ -0,0 +1,148 @@
1262 +/*
1263 + *   Copyright (c) International Business Machines  Corp., 2002-2003
1264 + *
1265 + *   This program is free software;  you can redistribute it and/or modify
1266 + *   it under the terms of the GNU General Public License as published by
1267 + *   the Free Software Foundation; either version 2 of the License, or
1268 + *   (at your option) any later version.
1269 + *
1270 + *   This program is distributed in the hope that it will be useful,
1271 + *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
1272 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
1273 + *   the GNU General Public License for more details.
1274 + *
1275 + *   You should have received a copy of the GNU General Public License
1276 + *   along with this program;  if not, write to the Free Software
1277 + *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1278 + *
1279 + * linux/drivers/md/dm-bbr.h
1280 + *
1281 + * Bad-block-relocation (BBR) target for device-mapper.
1282 + *
1283 + * The BBR target is designed to remap I/O write failures to another safe
1284 + * location on disk. Note that most disk drives have BBR built into them,
1285 + * this means that our software BBR will be only activated when all hardware
1286 + * BBR replacement sectors have been used.
1287 + */
1288 +
1289 +#ifndef _DM_BBR_H_
1290 +#define _DM_BBR_H_
1291 +
1292 +#define BBR_TABLE_SIGNATURE            0x42627254 /* BbrT */
1293 +#define BBR_ENTRIES_PER_SECT           31
1294 +#define BBR_NR_BUFS                    128
1295 +#define INITIAL_CRC                    0xFFFFFFFF
1296 +#define CRC_POLYNOMIAL                 0xEDB88320L
1297 +
1298 +/**
1299 + * Macros to cleanly print 64-bit numbers on both 32-bit and 64-bit machines.
1300 + * Use these in place of %Ld, %Lu, and %Lx.
1301 + **/
1302 +#if BITS_PER_LONG > 32
1303 +#define PFU64 "%lu"
1304 +#else
1305 +#define PFU64 "%Lu"
1306 +#endif
1307 +
1308 +/**
1309 + * struct bbr_table_entry
1310 + * @bad_sect:          LBA of bad location.
1311 + * @replacement_sect:  LBA of new location.
1312 + *
1313 + * Structure to describe one BBR remap.
1314 + **/
1315 +struct bbr_table_entry {
1316 +       u64 bad_sect;
1317 +       u64 replacement_sect;
1318 +};
1319 +
1320 +/**
1321 + * struct bbr_table
1322 + * @signature:         Signature on each BBR table sector.
1323 + * @crc:               CRC for this table sector.
1324 + * @sequence_number:   Used to resolve conflicts when primary and secondary
1325 + *                     tables do not match.
1326 + * @in_use_cnt:                Number of in-use table entries.
1327 + * @entries:           Actual table of remaps.
1328 + *
1329 + * Structure to describe each sector of the metadata table. Each sector in this
1330 + * table can describe 31 remapped sectors.
1331 + **/
1332 +struct bbr_table {
1333 +       u32                     signature;
1334 +       u32                     crc;
1335 +       u32                     sequence_number;
1336 +       u32                     in_use_cnt;
1337 +       struct bbr_table_entry  entries[BBR_ENTRIES_PER_SECT];
1338 +};
1339 +
1340 +/**
1341 + * struct bbr_runtime_remap
1342 + *
1343 + * Node in the binary tree used to keep track of remaps.
1344 + **/
1345 +struct bbr_runtime_remap {
1346 +       struct bbr_table_entry          remap;
1347 +       struct bbr_runtime_remap        *left;
1348 +       struct bbr_runtime_remap        *right;
1349 +};
1350 +
1351 +/**
1352 + * struct bbr_private
1353 + * @dev:                       Info about underlying device.
1354 + * @bbr_table:                 Copy of metadata table.
1355 + * @offset:                    LBA of data area.
1356 + * @lba_table1:                        LBA of primary BBR table.
1357 + * @lba_table2:                        LBA of secondary BBR table.
1358 + * @nr_sects_bbr_table:                Size of each BBR table.
1359 + * @nr_replacement_blks:       Number of replacement blocks.
1360 + * @start_replacement_sect:    LBA of start of replacement blocks.
1361 + * @blksize_in_sects:          Size of each block.
1362 + * @in_use_replacement_blks:   Current number of remapped blocks.
1363 + * @remap_root:                        Binary tree containing all remaps.
1364 + * @bbr_id_lock:               Lock for the binary tree.
1365 + *
1366 + * Private data for each BBR target.
1367 + **/
1368 +struct bbr_private {
1369 +       struct dm_dev                   * dev;
1370 +       struct bbr_table                * bbr_table;
1371 +       struct bbr_runtime_remap        * remap_root;
1372 +       u64                             offset;
1373 +       u64                             lba_table1;
1374 +       u64                             lba_table2;
1375 +       u64                             nr_sects_bbr_table;
1376 +       u64                             start_replacement_sect;
1377 +       u64                             nr_replacement_blks;
1378 +       u32                             blksize_in_sects;
1379 +       atomic_t                        in_use_replacement_blks;
1380 +       spinlock_t                      bbr_id_lock;
1381 +};
1382 +
1383 +#define BBR_IO_HANDLED (1<<0)
1384 +#define BBR_IO_RELOCATE        (1<<1)
1385 +
1386 +/**
1387 + * struct bbr_io_buffer
1388 + * @bbr_io_list:       Thread's list of bbr_io_buf's.
1389 + * @bbr_id:            Object for this request.
1390 + * @bh:                        Original buffer_head.
1391 + * @sector:            Original sector
1392 + * @flags:             Operation flag (BBR_IO_*)
1393 + * @rw:                        READ or WRITE.
1394 + * @rc:                        Return code from bbr_io_handler.
1395 + *
1396 + * Structure used to track each write request.
1397 + **/
1398 +struct bbr_io_buffer {
1399 +       struct list_head        bbr_io_list;
1400 +       struct bbr_private      *bbr_id;
1401 +       struct buffer_head      *bh;
1402 +       u64                     sector;
1403 +       u32                     flags;
1404 +       s32                     rw;
1405 +       s32                     rc;
1406 +};
1407 +
1408 +#endif
1409 +
1410 diff -urN linux-2.4.22/drivers/md/dm-snapshot.c linux-2.4.22-evms/drivers/md/dm-snapshot.c
1411 --- linux-2.4.22/drivers/md/dm-snapshot.c       2003-09-15 17:07:45.000000000 +0200
1412 +++ linux-2.4.22-evms/drivers/md/dm-snapshot.c  2003-09-15 17:08:35.000000000 +0200
1413 @@ -92,6 +92,9 @@
1414  
1415         /* List of snapshots for this origin */
1416         struct list_head snapshots;
1417 +
1418 +       /* Count of snapshots and origins referrencing this structure. */
1419 +       unsigned int count;
1420  };
1421  
1422  /*
1423 @@ -155,6 +158,35 @@
1424  }
1425  
1426  /*
1427 + * Allocate and initialize an origin structure.
1428 + */
1429 +static struct origin * __alloc_origin(kdev_t dev)
1430 +{
1431 +       struct origin *o = kmalloc(sizeof(*o), GFP_KERNEL);
1432 +       if (o) {
1433 +               o->dev = dev;
1434 +               INIT_LIST_HEAD(&o->hash_list);
1435 +               INIT_LIST_HEAD(&o->snapshots);
1436 +               __insert_origin(o);
1437 +       }
1438 +       return o;
1439 +}
1440 +
1441 +static void __get_origin(struct origin *o)
1442 +{
1443 +       o->count++;
1444 +}
1445 +
1446 +static void __put_origin(struct origin *o)
1447 +{
1448 +       o->count--;
1449 +       if (o->count == 0) {
1450 +               list_del(&o->hash_list);
1451 +               kfree(o);
1452 +       }
1453 +}
1454 +
1455 +/*
1456   * Make a note of the snapshot and its origin so we can look it
1457   * up when the origin has a write on it.
1458   */
1459 @@ -168,20 +200,37 @@
1460  
1461         if (!o) {
1462                 /* New origin */
1463 -               o = kmalloc(sizeof(*o), GFP_KERNEL);
1464 +               o = __alloc_origin(dev);
1465                 if (!o) {
1466                         up_write(&_origins_lock);
1467                         return -ENOMEM;
1468                 }
1469 +       }
1470  
1471 -               /* Initialise the struct */
1472 -               INIT_LIST_HEAD(&o->snapshots);
1473 -               o->dev = dev;
1474 +       __get_origin(o);
1475 +       list_add_tail(&snap->list, &o->snapshots);
1476  
1477 -               __insert_origin(o);
1478 +       up_write(&_origins_lock);
1479 +       return 0;
1480 +}
1481 +
1482 +static int register_origin(kdev_t dev)
1483 +{
1484 +       struct origin *o;
1485 +
1486 +       down_write(&_origins_lock);
1487 +       o = __lookup_origin(dev);
1488 +
1489 +       if (!o) {
1490 +               /* New origin */
1491 +               o = __alloc_origin(dev);
1492 +               if (!o) {
1493 +                       up_write(&_origins_lock);
1494 +                       return -ENOMEM;
1495 +               }
1496         }
1497  
1498 -       list_add_tail(&snap->list, &o->snapshots);
1499 +       __get_origin(o);
1500  
1501         up_write(&_origins_lock);
1502         return 0;
1503 @@ -195,11 +244,18 @@
1504         o = __lookup_origin(s->origin->dev);
1505  
1506         list_del(&s->list);
1507 -       if (list_empty(&o->snapshots)) {
1508 -               list_del(&o->hash_list);
1509 -               kfree(o);
1510 -       }
1511 +       __put_origin(o);
1512 +
1513 +       up_write(&_origins_lock);
1514 +}
1515 +
1516 +static void unregister_origin(kdev_t dev)
1517 +{
1518 +       struct origin *o;
1519  
1520 +       down_write(&_origins_lock);
1521 +       o = __lookup_origin(dev);
1522 +       __put_origin(o);
1523         up_write(&_origins_lock);
1524  }
1525  
1526 @@ -1090,6 +1146,13 @@
1527                 return r;
1528         }
1529  
1530 +       r = register_origin(dev->dev);
1531 +       if (r) {
1532 +               ti->error = "Cannot register origin";
1533 +               dm_put_device(ti, dev);
1534 +               return r;
1535 +       }
1536 +
1537         ti->private = dev;
1538         return 0;
1539  }
1540 @@ -1097,6 +1160,7 @@
1541  static void origin_dtr(struct dm_target *ti)
1542  {
1543         struct dm_dev *dev = (struct dm_dev *) ti->private;
1544 +       unregister_origin(dev->dev);
1545         dm_put_device(ti, dev);
1546  }
1547  
1548 diff -urN linux-2.4.22/drivers/md/dm-sparse.c linux-2.4.22-evms/drivers/md/dm-sparse.c
1549 --- linux-2.4.22/drivers/md/dm-sparse.c 1970-01-01 01:00:00.000000000 +0100
1550 +++ linux-2.4.22-evms/drivers/md/dm-sparse.c    2003-09-15 17:09:48.000000000 +0200
1551 @@ -0,0 +1,713 @@
1552 +/* -*- linux-c -*- */
1553 +
1554 +/*
1555 + *   Copyright (c) International Business Machines  Corp., 2002
1556 + *
1557 + *   This program is free software;  you can redistribute it and/or modify
1558 + *   it under the terms of the GNU General Public License as published by
1559 + *   the Free Software Foundation; either version 2 of the License, or
1560 + *   (at your option) any later version.
1561 + *
1562 + *   This program is distributed in the hope that it will be useful,
1563 + *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
1564 + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
1565 + *   the GNU General Public License for more details.
1566 + *
1567 + *   You should have received a copy of the GNU General Public License
1568 + *   along with this program;  if not, write to the Free Software
1569 + *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
1570 + *
1571 + * linux/drivers/md/dm-sparse.c
1572 + *
1573 + * Sparse target for device-mapper.
1574 + *
1575 + * This target provides the ability to create a sparse device. This 
1576 + * allows a device to pretend to be larger than it really is.
1577 + */
1578 +
1579 +#include <linux/module.h>
1580 +#include <linux/init.h>
1581 +#include <linux/blkdev.h>
1582 +#include <linux/slab.h>
1583 +#include <linux/mempool.h>
1584 +#include <linux/vmalloc.h>
1585 +
1586 +#include "dm.h"
1587 +#include "dm-io.h"
1588 +
1589 +#define MAX_HASH_CHAIN_ENTRIES 10
1590 +#define NAME_SIZE 127
1591 +
1592 +/* Sparse Ioctl
1593 +   device
1594 +   start
1595 +   chunk_size
1596 +   chunks
1597 + */
1598 +
1599 +// Entries in the sparse remapping structure
1600 +struct sparse_hash_entry {
1601 +    u64 org_chunk; // Chunk number, not LBA.
1602 +    u64 sparse_chunk; // Chunk number, not LBA.
1603 +    struct sparse_hash_entry * next;
1604 +    struct sparse_hash_entry * prev;
1605 +};
1606 +
1607 +//Private data structure
1608 +struct sparse_volume {
1609 +    struct dm_dev *dev;
1610 +    struct rw_semaphore sparse_semaphore;
1611 +    struct sparse_hash_entry ** sparse_map; // Hash table of remappings
1612 +    struct sparse_hash_entry * free_hash_list;
1613 +    kmem_cache_t * hash_slab;
1614 +    mempool_t * hash_pool;
1615 +    u32 dm_io_flag;
1616 +    u32 chunk_size;    // Sectors.
1617 +    u32 chunk_shift; // Shift value for chunk size.
1618 +    u32 num_chunks;    // In this volume.
1619 +    u32 next_cow_entry; // Index into current COW table.
1620 +    u64 current_cow_sector;    // LOGICAL sector of current COW table.
1621 +    u32 next_free_chunk; // Index of next free chunk (not LBA!).
1622 +    u32 hash_table_size; // Size of the hash table for the remap.
1623 +    u64 start;
1624 +    u64 cow_table[64]; // One sector's worth of COW tables.
1625 +};
1626 +
1627 +/*************************** OLD SERVICES ****************************/
1628 +
1629 +/* computes log base 2 of value */
1630 +inline int log2(u32 value) //ok to change to u32?
1631 +{
1632 +    int result = -1;
1633 +    long tmp;              //ok to change to long?
1634 +    
1635 +    if (value) {
1636 +       tmp = value;
1637 +       result++;
1638 +       while (!(tmp & 1)) {
1639 +           result++;
1640 +           tmp >>= 1;
1641 +       }
1642 +       if (tmp != 1) {
1643 +           result = -2;
1644 +       }
1645 +    }
1646 +    return result;
1647 +}
1648 +
1649 +/********************************* Functions *********************************/
1650 +
1651 +/***************************** Hash Functions *****************************/
1652 +
1653 +/* Take and initialize from the free hash list */
1654 +static struct sparse_hash_entry * 
1655 +allocate_sparse_hash_entry( struct sparse_volume * volume,     
1656 +                           u64 org_chunk,
1657 +                           u64 sparse_chunk )
1658 +{
1659 +    struct sparse_hash_entry * hash_entry;
1660 +    
1661 +       hash_entry = volume->free_hash_list;
1662 +       if ( hash_entry ) { //should always be the case b/c preallocate these
1663 +           volume->free_hash_list = hash_entry->next;
1664 +           hash_entry->org_chunk = org_chunk;
1665 +           hash_entry->sparse_chunk = sparse_chunk;
1666 +           hash_entry->next = NULL;
1667 +           hash_entry->prev = NULL;
1668 +       }
1669 +       
1670 +       return hash_entry;
1671 +}
1672 +
1673 +/*
1674 + *     This function inserts a new entry into a sparse hash chain, immediately
1675 + *     following the specified entry. This function should not be used to add
1676 + *     an entry into an empty list, or as the first entry in an existing list.
1677 + *     For that case, use insert_sparse_map_entry_at_head().
1678 + */
1679 +static int insert_sparse_hash_entry( struct sparse_hash_entry * entry,
1680 +                                    struct sparse_hash_entry * base )
1681 +{
1682 +       entry->next = base->next;
1683 +       entry->prev = base;
1684 +       base->next = entry;
1685 +       if ( entry->next ) {
1686 +               entry->next->prev = entry;
1687 +       }
1688 +       return 0;
1689 +}
1690 +
1691 +/*
1692 + *     This function inserts a new entry into a sparse chain as the first
1693 + *     entry in the chain.
1694 + */
1695 +static int insert_sparse_hash_entry_at_head( struct sparse_hash_entry * entry,
1696 +                                            struct sparse_hash_entry ** head )
1697 +{
1698 +       entry->next = *head;
1699 +       entry->prev = NULL;
1700 +       *head = entry;
1701 +       if ( entry->next ) {
1702 +           entry->next->prev = entry;
1703 +       }
1704 +       return 0;
1705 +}
1706 +
1707 +/*
1708 + *     Delete all items in a single chain in the hash table.
1709 + */
1710 +static int delete_sparse_hash_chain( struct sparse_volume * vol, 
1711 +                                    struct sparse_hash_entry * head )
1712 +{
1713 +    struct sparse_hash_entry * next;
1714 +    
1715 +    while ( head ) {
1716 +       next = head->next;
1717 +       mempool_free( head, vol->hash_pool );
1718 +       head = next;
1719 +    }
1720 +    return 0;
1721 +}
1722 +
1723 +/*
1724 + *     This function will search the hash chain that is anchored at the
1725 + *     specified head pointer. If the chunk number is found, a pointer to that
1726 + *     entry in the chain is set, and a 1 is returned. If the chunk is not
1727 + *     found, a pointer to the previous entry is set and 0 is returned. If the
1728 + *     return pointer is NULL, this means either the list is empty, or the
1729 + *     specified sector should become the first list item.
1730 + */
1731 +static int search_sparse_hash_chain( u64 chunk,
1732 +                                    struct sparse_hash_entry * head,
1733 +                                    struct sparse_hash_entry ** result )
1734 +{
1735 +    struct sparse_hash_entry * curr = head;
1736 +    struct sparse_hash_entry * prev = head;
1737 +    while ( curr && curr->org_chunk < chunk ) {
1738 +       prev = curr;
1739 +       curr = curr->next;
1740 +    }
1741 +    if (!curr) { // Either an empty chain or went off the end of the chain.
1742 +       *result = prev;
1743 +       return 0;
1744 +    }
1745 +    else if ( curr->org_chunk != chunk ) {
1746 +       *result = curr->prev;
1747 +       return 0;
1748 +    }
1749 +    else {
1750 +       *result = curr;
1751 +       return 1;
1752 +    }
1753 +}
1754 +
1755 +/*
1756 + *     This function takes a cow table entry (from the on-disk data), and
1757 + *     converts it into an appropriate entry for the sparse map, and
1758 + *     inserts it into the appropriate map for the specified volume.
1759 + */
1760 +static int add_cow_entry_to_sparse_map( u64 org_chunk,
1761 +                                       u64 sparse_chunk,
1762 +                                       struct sparse_volume * volume )
1763 +{
1764 +    struct sparse_hash_entry * new_entry;
1765 +    struct sparse_hash_entry * target_entry;
1766 +    u32 hash_value;
1767 +    int rc = -EINVAL;
1768 +
1769 +    new_entry = allocate_sparse_hash_entry(volume, org_chunk, sparse_chunk);
1770 +    if (!new_entry) {
1771 +       return -ENOMEM;
1772 +    }
1773 +    
1774 +    hash_value = (long)org_chunk % volume->hash_table_size;
1775 +    
1776 +    if (! search_sparse_hash_chain( org_chunk, 
1777 +                                   volume->sparse_map[hash_value], 
1778 +                                   &target_entry ) ) { 
1779 +       //should always take this path
1780 +
1781 +       if ( target_entry ) {
1782 +           insert_sparse_hash_entry( new_entry, target_entry );
1783 +       }
1784 +       else {
1785 +           insert_sparse_hash_entry_at_head
1786 +               ( new_entry, &(volume->sparse_map[hash_value]) );
1787 +       }
1788 +       rc = 0;
1789 +    }
1790 +    return rc;
1791 +}
1792 +
1793 +/*
1794 + *     Construct the initial hash table state based on 
1795 + *     existing COW tables on the disk.
1796 + */
1797 +static int build_sparse_maps(struct sparse_volume * volume)
1798 +{
1799 +    int rc = 0, done = 0;
1800 +    struct io_region job;
1801 +    struct page * page;
1802 +    unsigned int error, offset;
1803 +  
1804 +    while (!done) {
1805 +       
1806 +       // Read in one sector's worth of COW tables.
1807 +        job.dev = volume->dev->dev;
1808 +        job.sector = volume->current_cow_sector;
1809 +        job.count = 1;
1810 +        page = virt_to_page(volume->cow_table);
1811 +        offset = (unsigned long)volume->cow_table & ~PAGE_MASK;
1812 +        rc = dm_io_sync(1, &job, READ, page, offset, &error);
1813 +        if (rc) {
1814 +            return rc;
1815 +       }
1816 +
1817 +       // Translate every valid COW table entry into
1818 +       // a sparse map entry.
1819 +       for ( volume->next_cow_entry = 0;
1820 +
1821 +             volume->next_cow_entry < (SECTOR_SIZE/sizeof(u64)) &&
1822 +                 volume->cow_table[volume->next_cow_entry] != 
1823 +                 0xffffffffffffffff;
1824 +
1825 +             volume->next_cow_entry++, volume->next_free_chunk++ ) {
1826 +
1827 +           if ( (rc = add_cow_entry_to_sparse_map
1828 +                 ( le64_to_cpu( volume->cow_table[volume->next_cow_entry] ),
1829 +                   volume->next_free_chunk, volume ))) {
1830 +               return( rc );
1831 +           }
1832 +       }
1833 +       // Move on to the next sector if necessary.
1834 +       if ( volume->next_cow_entry == (SECTOR_SIZE/sizeof(u64)) ) {
1835 +           volume->current_cow_sector++;
1836 +       }
1837 +       else {
1838 +           done = 1;
1839 +       }
1840 +    }
1841 +    return 0;
1842 +}
1843 +
1844 +/************************* Other Functions ************************/
1845 +
1846 +/*
1847 + * Function: sparse_remap_chunk
1848 + *
1849 + *     This function performs a sector remap on a sparse volume. This should
1850 + *     be called from the I/O path, It first determines the base sector
1851 + *     of the chunk containing the specified sector, and saves the remainder.
1852 + *     Then it performs a search through the sparse map for the specified 
1853 + *     volume. If a match is found, the sector number is changed to the new 
1854 + *     value. If no match is found, the value is left the same, meaning the 
1855 + *     chunk has not been remapped.
1856 + */
1857 +static int sparse_remap_chunk( struct sparse_volume * sparse_volume,
1858 +                              u64 * sector )
1859 +{
1860 +    struct sparse_hash_entry * result;
1861 +    u64 chunk;
1862 +    u32 hash_value;
1863 +    u32 remainder;
1864 +    int rc = 1;
1865 +    
1866 +    down_read(&sparse_volume->sparse_semaphore);
1867 +    
1868 +    remainder = *sector & (u64)(sparse_volume->chunk_size - 1);
1869 +    chunk = *sector >> sparse_volume->chunk_shift;
1870 +    hash_value = ((u32)chunk) % sparse_volume->hash_table_size;
1871 +    
1872 +    if ( search_sparse_hash_chain( chunk, 
1873 +                                  sparse_volume->sparse_map[hash_value], 
1874 +                                  &result) ) {
1875 +       *sector = ( result->sparse_chunk << sparse_volume->chunk_shift ) 
1876 +           + remainder;
1877 +       rc =  0;
1878 +    }
1879 +    up_read(&sparse_volume->sparse_semaphore);
1880 +    return rc;
1881 +}
1882 +
1883 +/* Function: sparse_cow_write
1884 + *
1885 + *     Check this sparse node to see if the given sector/chunk has been
1886 + *     remapped yet. If it hasn't, create a new hash table entry, update the
1887 + *     in-memory COW table, write the COW table to disk.
1888 + */
1889 +
1890 +static int sparse_cow_write( struct sparse_volume * sparse_volume,
1891 +                            u64 * sector )
1892 +{
1893 +    struct sparse_hash_entry * target_entry, * new_map_entry;
1894 +    struct io_region job;
1895 +    struct page * page;
1896 +    char * cow = NULL;
1897 +    unsigned int error, offset;
1898 +    u64 chunk;
1899 +    u32 hash_value = 0;
1900 +    u32 remainder;
1901 +    int rc;
1902 +    
1903 +    down_write(&sparse_volume->sparse_semaphore);
1904 +    
1905 +    remainder = *sector & (u64)(sparse_volume->chunk_size - 1);
1906 +    chunk = *sector >> sparse_volume->chunk_shift;
1907 +    hash_value = ((u32)chunk) % sparse_volume->hash_table_size;
1908 +    
1909 +    if ( search_sparse_hash_chain( chunk, 
1910 +                                  sparse_volume->sparse_map[hash_value], 
1911 +                                  &target_entry) ) {
1912 +       *sector = 
1913 +           ( target_entry->sparse_chunk << sparse_volume->chunk_shift ) 
1914 +           + remainder;
1915 +       rc = 0;
1916 +       goto out;
1917 +    }
1918 +    
1919 +    // Is there enough room left on this sparse to remap this chunk?
1920 +    if ( sparse_volume->next_free_chunk >= sparse_volume->num_chunks ) {
1921 +       DMERR("dm-sparse: full no new remaps allowed\n");
1922 +       rc = -ENOSPC;
1923 +       goto out;
1924 +    }
1925 +    
1926 +    // Create and initialize a new hash table entry for the new remap.
1927 +    new_map_entry = allocate_sparse_hash_entry
1928 +       (sparse_volume, chunk, sparse_volume->next_free_chunk);
1929 +    if ( ! new_map_entry ) {
1930 +       // Can't get memory for map entry. Disable this sparse.
1931 +       DMERR("dm-sparse: memory error allocating hash entry\n");
1932 +       rc = -ENOMEM;
1933 +       goto out;
1934 +    }
1935 +    
1936 +    //Always write cow table so its safe
1937 +    cow = kmalloc( SECTOR_SIZE, GFP_KERNEL );
1938 +    if (! cow ) {
1939 +       // Can't get I/O buffer. Disable this sparse.
1940 +       DMERR("dm-sparse: memory error allocating COW table buffer");
1941 +       rc = -ENOMEM;
1942 +       goto out;       
1943 +    }
1944 +
1945 +    // Add the entry to the hash table.
1946 +    if ( target_entry ) {      
1947 +       insert_sparse_hash_entry( new_map_entry, target_entry );
1948 +    }
1949 +    else {
1950 +       insert_sparse_hash_entry_at_head
1951 +           ( new_map_entry, 
1952 +             &(sparse_volume->sparse_map[hash_value]) );
1953 +    }
1954 +    
1955 +    sparse_volume->next_free_chunk++;
1956 +    
1957 +    // Update the appropriate entry in the COW table. 
1958 +    sparse_volume->cow_table[sparse_volume->next_cow_entry] = 
1959 +       cpu_to_le64(chunk);
1960 +    sparse_volume->next_cow_entry++;
1961 +    
1962 +    memcpy(cow, sparse_volume->cow_table, SECTOR_SIZE);
1963 +
1964 +    //because of ordering issues needs to be synchronous
1965 +    job.dev = sparse_volume->dev->dev;
1966 +    job.sector = sparse_volume->current_cow_sector;
1967 +    job.count = 1;
1968 +    page = virt_to_page(cow);
1969 +    offset = (unsigned long)cow & ~PAGE_MASK;
1970 +    dm_io_sync(1, &job, WRITE, page, offset, &error);
1971 +    
1972 +    // Update the in-memory COW table values.
1973 +    if ( sparse_volume->next_cow_entry >= (SECTOR_SIZE/sizeof(u64)) )
1974 +       {
1975 +           sparse_volume->next_cow_entry = 0;
1976 +           sparse_volume->current_cow_sector++;
1977 +           memset(sparse_volume->cow_table, 0xff, SECTOR_SIZE);
1978 +       }
1979 +    
1980 +    *sector = ( new_map_entry->sparse_chunk << sparse_volume->chunk_shift )
1981 +       + remainder;
1982 +    
1983 +    rc = 0;
1984 +    
1985 + out:
1986 +    up_write(&sparse_volume->sparse_semaphore);
1987 +    if ( cow ) {
1988 +       kfree( cow );
1989 +    }
1990 +
1991 +    return rc;
1992 +}
1993 +
1994 +/************************ EXPORT FUNCTIONS ************************/
1995 +
1996 +/*
1997 + * Function: sparse_dtr
1998 + */
1999 +static void sparse_dtr( struct dm_target *ti )
2000 +{
2001 +    struct sparse_volume * vol = (struct sparse_volume *)ti->private;
2002 +    int i;
2003 +
2004 +    if (vol) {
2005 +
2006 +       if (vol->sparse_map) {
2007 +           for ( i = 0; i < vol->hash_table_size; i++ ) {
2008 +               delete_sparse_hash_chain( vol, vol->sparse_map[i] );
2009 +           }
2010 +           delete_sparse_hash_chain( vol, vol->free_hash_list );
2011 +           vfree(vol->sparse_map);
2012 +       }
2013 +
2014 +       if (vol->hash_pool)
2015 +           mempool_destroy(vol->hash_pool);
2016 +       
2017 +       if (vol->hash_slab) 
2018 +           kmem_cache_destroy(vol->hash_slab);
2019 +
2020 +       dm_put_device(ti, vol->dev);
2021 +
2022 +        if (vol->dm_io_flag) {
2023 +           dm_io_put(1);
2024 +       }
2025 +    
2026 +       kfree( vol );
2027 +    }
2028 +}
2029 +
2030 +/*
2031 + * Function: sparse_ctr
2032 + */
2033 +static int sparse_ctr( struct dm_target *ti, unsigned int argc, char** argv )
2034 +{
2035 +    int i, rc = -EINVAL;
2036 +    struct sparse_hash_entry *new_entry;
2037 +    struct sparse_volume *vol;
2038 +    struct dm_dev *dev;
2039 +    u32 chunk_size, chunks;
2040 +    u64 start;
2041 +    char* end, slab_name[NAME_SIZE+1];
2042 +
2043 +    if ( argc != 4 ) {
2044 +       ti->error="dm-sparse: wrong number of arguments";
2045 +       return rc;
2046 +    }
2047 +
2048 +    start = simple_strtoull(argv[1], &end, 10);
2049 +    if (*end) {
2050 +       ti->error="dm-sparse: Invalid first chunk lba";
2051 +       return rc;
2052 +    }
2053 +
2054 +    chunk_size = simple_strtoul(argv[2], &end, 10);    
2055 +    if (*end) {
2056 +       ti->error="dm-sparse: Invalid chunk_size";
2057 +       return rc;
2058 +    }
2059 +
2060 +    chunks = simple_strtoul(argv[3], &end, 10);
2061 +    if (*end) {
2062 +       ti->error="dm-sparse: Invalid number of chunks";
2063 +       return rc;
2064 +    }
2065 +
2066 +    if ( dm_get_device( ti, argv[0], ti->begin, start + chunks * chunk_size,
2067 +                       dm_table_get_mode(ti->table), &dev ) ) {
2068 +       ti->error = "dm-sparse: Device lookup failed";
2069 +       return rc;
2070 +    }
2071 +
2072 +    vol = kmalloc(sizeof(struct sparse_volume), GFP_KERNEL);
2073 +    if ( !vol ) {
2074 +       ti->error = "dm-sparse: Memory allocation for private-data failed";
2075 +        rc = -ENOMEM;
2076 +       goto out;
2077 +    }
2078 +
2079 +    memset( vol, 0, sizeof(struct sparse_volume) );
2080 +
2081 +    rc = dm_io_get(1);
2082 +    if (rc) {
2083 +           ti->error = "dm-sparse: failed to initialize dm-io.";
2084 +           sparse_dtr(ti);
2085 +           return rc;
2086 +    }
2087 +    
2088 +    // Initialize
2089 +    vol->dm_io_flag = 1;
2090 +    vol->chunk_size = chunk_size;
2091 +    vol->chunk_shift = log2(chunk_size);
2092 +    vol->num_chunks = chunks;
2093 +    vol->current_cow_sector = 1;
2094 +    vol->hash_table_size = chunks / MAX_HASH_CHAIN_ENTRIES + 1;
2095 +    vol->start = start;
2096 +    vol->dev = dev;
2097 +    init_rwsem(&vol->sparse_semaphore);
2098 +
2099 +    snprintf(slab_name, NAME_SIZE, "sparse-%p", vol);
2100 +    vol->hash_slab = kmem_cache_create(slab_name,
2101 +                                      sizeof(struct sparse_hash_entry),
2102 +                                      0, SLAB_HWCACHE_ALIGN,
2103 +                                      NULL, NULL);
2104 +    if ( ! vol->hash_slab ) {
2105 +       ti->error = "dm-sparse: memory allocation error in hash slab create";
2106 +       sparse_dtr(ti);
2107 +       return -ENOMEM;
2108 +    }
2109 +    vol->hash_pool = mempool_create(1, mempool_alloc_slab,
2110 +                                   mempool_free_slab,
2111 +                                   vol->hash_slab);    
2112 +    if ( ! vol->hash_pool ) {
2113 +       ti->error = "dm-sparse: memory allocation error in hash pool create";
2114 +       sparse_dtr(ti);
2115 +       return -ENOMEM;
2116 +    }
2117 +
2118 +    // Sparse hash table
2119 +    vol->sparse_map = vmalloc( vol->hash_table_size * 
2120 +                              sizeof( struct sparse_hash_entry * ) );
2121 +    if ( ! vol->sparse_map ) {
2122 +       ti->error = "dm-sparse: Memory allocation error in sparse_map create";
2123 +       sparse_dtr(ti);
2124 +       return -ENOMEM;
2125 +    }
2126 +
2127 +    memset( vol->sparse_map, 0, vol->hash_table_size * 
2128 +           sizeof( struct sparse_hash_entry * ) );
2129 +    
2130 +    for ( i = 0; i < chunks; i++ ) {
2131 +
2132 +       new_entry = mempool_alloc(vol->hash_pool, GFP_KERNEL );
2133 +       if ( ! new_entry ) {
2134 +           ti->error="dm-sparse: memory allocation error in hash table setup";
2135 +           sparse_dtr(ti);
2136 +           return -ENOMEM;
2137 +       }
2138 +
2139 +       new_entry->next = vol->free_hash_list;
2140 +       vol->free_hash_list = new_entry;
2141 +    }
2142 +    
2143 +    rc = build_sparse_maps(vol);
2144 +    if (rc) {
2145 +       ti->error = "dm-sparse: error building hash tables";
2146 +       sparse_dtr(ti);
2147 +       return rc;
2148 +    }
2149 +
2150 +    ti->private = vol;
2151 +    return rc;
2152 +
2153 + out:
2154 +    dm_put_device(ti, dev);
2155 +    return rc;
2156 +}
2157 +
2158 +/*
2159 + * Function: sparse_map
2160 + */
2161 +static int sparse_map( struct dm_target * ti, struct buffer_head * bh, int rw,
2162 +                      union map_info *map_context )
2163 +{
2164 +    struct sparse_volume * volume = (struct sparse_volume*)ti->private;
2165 +    u64 sector = bh->b_rsector;
2166 +    int rc;
2167 +
2168 +
2169 +
2170 +    // Check if this sector has been remapped
2171 +    rc = sparse_remap_chunk( volume, &sector );
2172 +    
2173 +    if ( rc < 0 ) { //Error
2174 +       bh->b_end_io(bh, 0);
2175 +       return rc;
2176 +    }
2177 +    
2178 +    if ( rc == 0 ) { // Remapped I/O : read or write same logic
2179 +       bh->b_rsector = volume->start + sector;
2180 +       bh->b_rdev = volume->dev->dev;
2181 +       return 1;
2182 +    }
2183 +    
2184 +    // ( Previously )Un-mapped:        read / write different logic
2185 +    
2186 +    if ( rw ) { //write :
2187 +       rc = sparse_cow_write( volume, &sector );
2188 +       
2189 +       if ( rc < 0 ) { //Error
2190 +           bh->b_end_io(bh, 0);
2191 +           return rc;
2192 +       }                   
2193 +       //Send write on
2194 +       bh->b_rsector = volume->start + sector;
2195 +       bh->b_rdev = volume->dev->dev;
2196 +       return 1;
2197 +    }
2198 +    
2199 +    //Reading something that was never written 
2200 +    //return zeros and indicate complete
2201 +    memset(bh->b_data, 0x0, bh->b_size);
2202 +    bh->b_end_io(bh, 1);
2203 +    return 0;
2204 +}
2205 +
2206 +static int sparse_status( struct dm_target *ti, status_type_t type, 
2207 +                         char *result, unsigned int maxlen )
2208 +{
2209 +    struct sparse_volume * vol = (struct sparse_volume * )ti->private;
2210 +    
2211 +    switch(type) {
2212 +
2213 +    case STATUSTYPE_INFO:
2214 +       snprintf( result, maxlen, "%d%%", 
2215 +                 ( vol->next_free_chunk * 100 ) / vol->num_chunks );   
2216 +       break;
2217 +       
2218 +    case STATUSTYPE_TABLE:
2219 +       snprintf( result, maxlen, "%s %Lu %u %u", 
2220 +                 dm_kdevname(vol->dev->dev), vol->start, 
2221 +                 vol->chunk_size, vol->num_chunks ); 
2222 +       break;
2223 +
2224 +    default:
2225 +       break;
2226 +    }
2227 +    
2228 +    return 0;
2229 +}
2230 +
2231 +/****************** FUNCTION TABLE **********************/
2232 +
2233 +static struct target_type sparse_target = {
2234 +    .name = "sparse",
2235 +    .module = THIS_MODULE,
2236 +    .ctr = sparse_ctr,
2237 +    .dtr = sparse_dtr,
2238 +    .map = sparse_map,
2239 +    .status = sparse_status,
2240 +};
2241 +
2242 +/********************* REGISTRATION *****************/
2243 +
2244 +int __init sparse_init(void)
2245 +{
2246 +    int rc = dm_register_target(&sparse_target);
2247 +
2248 +    if ( rc < 0 )
2249 +       DMWARN("sparse target registration failed");
2250 +
2251 +    return rc;
2252 +}
2253 +
2254 +void __exit sparse_exit(void)
2255 +{
2256 +    if (dm_unregister_target(&sparse_target) )
2257 +       DMWARN("sparse target unregistration failed");
2258 +
2259 +    return;
2260 +}
2261 +
2262 +module_init(sparse_init);
2263 +module_exit(sparse_exit);
2264 +MODULE_LICENSE("GPL");
2265 diff -urN linux-2.4.22/drivers/md/multipath.c linux-2.4.22-evms/drivers/md/multipath.c
2266 --- linux-2.4.22/drivers/md/multipath.c 2003-06-13 16:51:34.000000000 +0200
2267 +++ linux-2.4.22-evms/drivers/md/multipath.c    2003-09-15 17:09:36.000000000 +0200
2268 @@ -139,15 +139,16 @@
2269  static int multipath_map (mddev_t *mddev, kdev_t *rdev)
2270  {
2271         multipath_conf_t *conf = mddev_to_conf(mddev);
2272 -       int i, disks = MD_SB_DISKS;
2273 +       int i;
2274  
2275         /*
2276          * Later we do read balancing on the read side 
2277          * now we use the first available disk.
2278          */
2279  
2280 -       for (i = 0; i < disks; i++) {
2281 +       for (i = 0; i < conf->nr_disks; i++) {
2282                 if (conf->multipaths[i].operational) {
2283 +                       /* first operational is winner! */
2284                         *rdev = conf->multipaths[i].dev;
2285                         return (0);
2286                 }
2287 @@ -191,6 +192,8 @@
2288  {
2289         struct multipath_bh * mp_bh = (struct multipath_bh *)(bh->b_private);
2290  
2291 +       atomic_dec(&mp_bh->multipath->nr_pending);
2292 +
2293         /*
2294          * this branch is our 'one multipath IO has finished' event handler:
2295          */
2296 @@ -223,19 +226,39 @@
2297  }
2298  
2299  /*
2300 - * This routine returns the disk from which the requested read should
2301 - * be done.
2302 + * Multipath read balance ...
2303 + *
2304 + * Returns:
2305 + *
2306 + *     If no active paths
2307 + *
2308 + *             - Error ( -1 )
2309 + *
2310 + *     If active paths == 1
2311 + *
2312 + *             - 1st active path encountered
2313 + *
2314 + *     If active paths > 1
2315 + *
2316 + *             - 1st idle active path encountered
2317 + *             - else ... the active path doing the least amount of work.
2318   */
2319 -
2320  static int multipath_read_balance (multipath_conf_t *conf)
2321  {
2322 -       int disk;
2323 -
2324 -       for (disk = 0; disk < conf->raid_disks; disk++) 
2325 -               if (conf->multipaths[disk].operational)
2326 -                       return disk;
2327 -       BUG();
2328 -       return 0;
2329 +       int i, disk=-1, nr_pending, least_pending=0;
2330 +       
2331 +       for (i=0; i<conf->nr_disks; i++) {
2332 +               if (conf->multipaths[i].operational) {
2333 +                       nr_pending = atomic_read(&conf->multipaths[i].nr_pending);
2334 +                       if (nr_pending==0 || conf->working_disks==1)
2335 +                               return i;
2336 +                       if (least_pending==0 || nr_pending<least_pending) {
2337 +                               disk = i;
2338 +                               least_pending = nr_pending;
2339 +                       }
2340 +               }
2341 +       }
2342 +       return disk;
2343  }
2344  
2345  static int multipath_make_request (mddev_t *mddev, int rw,
2346 @@ -245,6 +268,7 @@
2347         struct buffer_head *bh_req;
2348         struct multipath_bh * mp_bh;
2349         struct multipath_info *multipath;
2350 +       int disk;
2351  
2352         if (!buffer_locked(bh))
2353                 BUG();
2354 @@ -267,7 +291,16 @@
2355         /*
2356          * read balancing logic:
2357          */
2358 -       multipath = conf->multipaths + multipath_read_balance(conf);
2359 +       disk = multipath_read_balance(conf);
2360 +       if (disk==-1) {
2361 +               printk (KERN_ERR "multipath_make_request: no more operational IO paths.\n");
2362 +               buffer_IO_error(bh);
2363 +               return 0;
2364 +       }
2365 +
2366 +       multipath = conf->multipaths + disk;
2367 +       mp_bh->multipath = multipath;
2368 +       atomic_inc(&multipath->nr_pending);
2369  
2370         bh_req = &mp_bh->bh_req;
2371         memcpy(bh_req, bh, sizeof(*bh));
2372 @@ -331,13 +364,14 @@
2373  {
2374         multipath_conf_t *conf = mddev_to_conf(mddev);
2375         struct multipath_info * multipaths = conf->multipaths;
2376 -       int disks = MD_SB_DISKS;
2377         int other_paths = 1;
2378 -       int i;
2379 +       int i, first = 1;
2380 +       mdk_rdev_t *rdev;
2381 +       struct md_list_head *tmp;
2382  
2383         if (conf->working_disks == 1) {
2384                 other_paths = 0;
2385 -               for (i = 0; i < disks; i++) {
2386 +               for (i = 0; i < MD_SB_DISKS; i++) {
2387                         if (multipaths[i].spare) {
2388                                 other_paths = 1;
2389                                 break;
2390 @@ -351,16 +385,17 @@
2391                  * first check if this is a queued request for a device
2392                  * which has just failed.
2393                  */
2394 -               for (i = 0; i < disks; i++) {
2395 +               for (i = 0; i < MD_SB_DISKS; i++) {
2396                         if (multipaths[i].dev==dev && !multipaths[i].operational)
2397                                 return 0;
2398                 }
2399                 printk (LAST_DISK);
2400         } else {
2401 +               mdp_super_t *sb = mddev->sb;
2402                 /*
2403                  * Mark disk as unusable
2404                  */
2405 -               for (i = 0; i < disks; i++) {
2406 +               for (i = 0; i < MD_SB_DISKS; i++) {
2407                         if (multipaths[i].dev==dev && multipaths[i].operational) {
2408                                 mark_disk_bad(mddev, i);
2409                                 break;
2410 @@ -369,7 +404,6 @@
2411                 if (!conf->working_disks) {
2412                         int err = 1;
2413                         mdp_disk_t *spare;
2414 -                       mdp_super_t *sb = mddev->sb;
2415  
2416                         spare = get_spare(mddev);
2417                         if (spare) {
2418 @@ -384,6 +418,21 @@
2419                                 sb->spare_disks--;
2420                         }
2421                 }
2422 +               /* prevent unnecessary work in md_do_recovery() */
2423 +               if (conf->working_disks) {
2424 +                       conf->raid_disks = conf->working_disks
2425 +                                        = sb->raid_disks = sb->active_disks;
2426 +               }
2427 +               /* update alias disk info to insure we can do sb commit. */
2428 +               ITERATE_RDEV(mddev,rdev,tmp) {
2429 +                       if (first && disk_active(&sb->disks[rdev->desc_nr])) {
2430 +                               rdev->alias_device = 0;
2431 +                               first = 0;
2432 +                       } else {
2433 +                               if (!disk_faulty(&sb->disks[rdev->desc_nr]))
2434 +                                       rdev->alias_device = 1;
2435 +                       }
2436 +               }
2437         }
2438         return 0;
2439  }
2440 @@ -677,9 +726,8 @@
2441  /*
2442   * This is a kernel thread which:
2443   *
2444 - *     1.      Retries failed read operations on working multipaths.
2445 + *     1.      Retries failed operations on working multipaths.
2446   *     2.      Updates the raid superblock when problems encounter.
2447 - *     3.      Performs writes following reads for array syncronising.
2448   */
2449  
2450  static void multipathd (void *data)
2451 @@ -833,6 +881,7 @@
2452         mdk_rdev_t *rdev, *def_rdev = NULL;
2453         struct md_list_head *tmp;
2454         int num_rdevs = 0;
2455 +       int active_disks = 0, spare_disks = 0, faulty_disks = 0;
2456  
2457         MOD_INC_USE_COUNT;
2458  
2459 @@ -881,9 +930,7 @@
2460                         printk(NOT_IN_SYNC, partition_name(rdev->dev));
2461  
2462                 /*
2463 -                * Mark all disks as spare to start with, then pick our
2464 -                * active disk.  If we have a disk that is marked active
2465 -                * in the sb, then use it, else use the first rdev.
2466 +                * Mark all disks as spare to start with.
2467                  */
2468                 disk->number = desc->number;
2469                 disk->raid_disk = desc->raid_disk;
2470 @@ -894,20 +941,21 @@
2471                 mark_disk_sync(desc);
2472  
2473                 if (disk_active(desc)) {
2474 -                       if(!conf->working_disks) {
2475 -                               printk(OPERATIONAL, partition_name(rdev->dev),
2476 -                                       desc->raid_disk);
2477 -                               disk->operational = 1;
2478 -                               disk->spare = 0;
2479 -                               conf->working_disks++;
2480 -                               def_rdev = rdev;
2481 -                       } else {
2482 -                               mark_disk_spare(desc);
2483 -                       }
2484 -               } else
2485 -                       mark_disk_spare(desc);
2486 +                       printk(OPERATIONAL, partition_name(rdev->dev),
2487 +                               desc->raid_disk);
2488 +                       disk->operational = 1;
2489 +                       disk->spare = 0;
2490 +                       conf->working_disks++;
2491 +                       def_rdev = rdev;
2492 +                       active_disks++;
2493 +               } else if (disk_faulty(desc)) {
2494 +                       disk->spare = 0;
2495 +                       faulty_disks++;
2496 +               } else {
2497 +                       spare_disks++;
2498 +               }
2499  
2500 -               if(!num_rdevs++) def_rdev = rdev;
2501 +               num_rdevs++;
2502         }
2503         if(!conf->working_disks && num_rdevs) {
2504                 desc = &sb->disks[def_rdev->desc_nr];
2505 @@ -918,11 +966,12 @@
2506                 disk->spare = 0;
2507                 conf->working_disks++;
2508                 mark_disk_active(desc);
2509 +               active_disks++;
2510         }
2511         /*
2512 -        * Make sure our active path is in desc spot 0
2513 +        * If there is only 1 active path ... make sure it is in desc spot 0
2514          */
2515 -       if(def_rdev->desc_nr != 0) {
2516 +       if (active_disks == 1 && def_rdev->desc_nr != 0) {
2517                 rdev = find_rdev_nr(mddev, 0);
2518                 desc = &sb->disks[def_rdev->desc_nr];
2519                 desc2 = sb->disks;
2520 @@ -940,10 +989,10 @@
2521                         def_rdev->desc_nr = 0;
2522                 }
2523         }
2524 -       conf->raid_disks = sb->raid_disks = sb->active_disks = 1;
2525 +       conf->raid_disks = sb->raid_disks = sb->active_disks = active_disks;
2526         conf->nr_disks = sb->nr_disks = sb->working_disks = num_rdevs;
2527 -       sb->failed_disks = 0;
2528 -       sb->spare_disks = num_rdevs - 1;
2529 +       sb->failed_disks = faulty_disks;
2530 +       sb->spare_disks = spare_disks;
2531         mddev->sb_dirty = 1;
2532         conf->mddev = mddev;
2533         conf->device_lock = MD_SPIN_LOCK_UNLOCKED;
2534 diff -urN linux-2.4.22/include/linux/raid/multipath.h linux-2.4.22-evms/include/linux/raid/multipath.h
2535 --- linux-2.4.22/include/linux/raid/multipath.h 2001-11-12 18:51:56.000000000 +0100
2536 +++ linux-2.4.22-evms/include/linux/raid/multipath.h    2003-09-15 17:09:36.000000000 +0200
2537 @@ -15,6 +15,7 @@
2538         int             spare;
2539  
2540         int             used_slot;
2541 +       atomic_t        nr_pending;     /* number of pending requests */
2542  };
2543  
2544  struct multipath_private_data {
2545 @@ -63,6 +64,7 @@
2546         struct buffer_head      *master_bh;
2547         struct buffer_head      bh_req;
2548         struct multipath_bh     *next_mp; /* next for retry or in free list */
2549 +       struct multipath_info   *multipath; /* allows end_request to easilly dec pending buffer count*/
2550  };
2551  /* bits for multipath_bh.state */
2552  #define        MPBH_Uptodate   1
This page took 0.299622 seconds and 3 git commands to generate.